Compare commits

...

1 Commits

Author SHA1 Message Date
Juergen Kunz
be574df599 feat(image): add progressive JPEG generation support
Some checks failed
Default (tags) / security (push) Failing after 24s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
- Add convertPDFToJpegBytes method for progressive JPEG images
- Integrate @push.rocks/smartjimp for true progressive encoding
- Update readme with comprehensive documentation
- Update legal section to Task Venture Capital GmbH
2025-08-02 17:29:38 +00:00
7 changed files with 1358 additions and 384 deletions

View File

@@ -9,7 +9,7 @@
"author": "Lossless GmbH",
"license": "MIT",
"scripts": {
"test": "(tstest test/ --verbose --timeout 60)",
"test": "(tstest test/ --verbose --timeout 120)",
"build": "(tsbuild tsfolders --allowimplicitany)",
"buildDocs": "tsdoc"
},
@@ -24,6 +24,7 @@
"@push.rocks/smartbuffer": "^3.0.5",
"@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.2.5",
"@push.rocks/smartjimp": "^1.2.0",
"@push.rocks/smartnetwork": "^4.1.2",
"@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpromise": "^4.2.3",

839
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

2
pnpm-workspace.yaml Normal file
View File

@@ -0,0 +1,2 @@
onlyBuiltDependencies:
- sharp

741
readme.md
View File

@@ -1,420 +1,409 @@
# @push.rocks/smartpdf
Create PDFs on the fly from HTML, websites, or existing PDFs with advanced features like text extraction, PDF merging, and PNG conversion.
# @push.rocks/smartpdf 📄✨
## Install
To install `@push.rocks/smartpdf`, use npm or yarn:
> **Transform HTML, websites, and PDFs into beautiful documents with just a few lines of code!**
[![npm version](https://img.shields.io/npm/v/@push.rocks/smartpdf.svg?style=flat-square)](https://www.npmjs.com/package/@push.rocks/smartpdf)
[![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg?style=flat-square)](https://www.typescriptlang.org/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](./license)
## 🚀 Why SmartPDF?
SmartPDF is your Swiss Army knife for PDF operations in Node.js. Whether you're generating invoices, creating reports, or converting web pages to PDFs, we've got you covered with a simple, powerful API.
### ✨ Features at a Glance
- 📝 **HTML to PDF** - Transform HTML strings with full CSS support
- 🌐 **Website to PDF** - Capture any website as a perfectly formatted PDF
- 🔀 **PDF Merging** - Combine multiple PDFs into one
- 🖼️ **PDF to Images** - Convert PDFs to PNG, WebP, or progressive JPEG
- 📑 **Text Extraction** - Pull text content from existing PDFs
- 🎯 **Smart Port Management** - Automatic port allocation for concurrent instances
- 💪 **TypeScript First** - Full type safety and IntelliSense support
-**High Performance** - Optimized for speed and reliability
## 📦 Installation
```bash
# Using npm
npm install @push.rocks/smartpdf --save
```
Or with yarn:
```bash
# Using yarn
yarn add @push.rocks/smartpdf
# Using pnpm (recommended)
pnpm add @push.rocks/smartpdf
```
## Requirements
This package requires a Chrome or Chromium installation to be available on the system, as it uses Puppeteer for rendering. The package will automatically detect and use the appropriate executable.
## Usage
`@push.rocks/smartpdf` provides a powerful interface for PDF generation and manipulation. All examples use ESM syntax and TypeScript.
### Getting Started
First, import the necessary classes:
## 🎯 Quick Start
```typescript
import { SmartPdf, IPdf } from '@push.rocks/smartpdf';
import { SmartPdf } from '@push.rocks/smartpdf';
// Create and start SmartPdf
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Generate a PDF from HTML
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
<h1>Hello, PDF World! 🌍</h1>
<p>This is my first SmartPDF document.</p>
`);
// Save it
await fs.writeFile('my-first-pdf.pdf', pdf.buffer);
// Don't forget to clean up!
await smartPdf.stop();
```
### Basic Setup with Automatic Port Allocation
SmartPdf automatically finds an available port between 20000-30000 for its internal server:
## 📚 Core Concepts
### 🏗️ Instance Management
SmartPDF uses a client-server architecture for maximum performance. Always remember:
1. **Create** an instance
2. **Start** the server
3. **Do your PDF magic**
4. **Stop** the server
```typescript
async function setupSmartPdf() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Your PDF operations here
await smartPdf.stop();
}
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// ... your PDF operations ...
await smartPdf.stop();
```
### Advanced Setup with Custom Port Configuration
You can specify custom port settings to avoid conflicts or meet specific requirements:
### 🔌 Smart Port Allocation
Run multiple instances without port conflicts:
```typescript
// Use a specific port
const smartPdf = await SmartPdf.create({ port: 3000 });
// Each instance automatically finds a free port
const instance1 = await SmartPdf.create(); // Port: 20000
const instance2 = await SmartPdf.create(); // Port: 20001
const instance3 = await SmartPdf.create(); // Port: 20002
// Use a custom port range
const smartPdf = await SmartPdf.create({
portRangeStart: 4000,
portRangeEnd: 5000
// Or specify custom settings
const customInstance = await SmartPdf.create({
port: 3000, // Use specific port
portRangeStart: 4000, // Or define a range
portRangeEnd: 5000
});
```
## 🎨 PDF Generation
### 📝 From HTML String
Create beautiful PDFs from HTML with full CSS support:
```typescript
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
<!DOCTYPE html>
<html>
<head>
<style>
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
body {
font-family: 'Roboto', sans-serif;
margin: 40px;
color: #333;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px;
border-radius: 10px;
text-align: center;
}
.content {
margin-top: 30px;
line-height: 1.6;
}
.highlight {
background-color: #ffd93d;
padding: 2px 6px;
border-radius: 3px;
}
</style>
</head>
<body>
<div class="header">
<h1>Invoice #2024-001</h1>
<p>Generated on ${new Date().toLocaleDateString()}</p>
</div>
<div class="content">
<h2>Bill To:</h2>
<p>Acme Corporation</p>
<p>Total: <span class="highlight">$1,234.56</span></p>
</div>
</body>
</html>
`);
await fs.writeFile('invoice.pdf', pdf.buffer);
await smartPdf.stop();
```
### 🌐 From Website
Capture any website as a PDF with two powerful methods:
#### Standard A4 Format
Perfect for articles and documents:
```typescript
const pdf = await smartPdf.getPdfResultForWebsite('https://example.com');
```
#### Full Page Capture
Capture the entire scrollable area:
```typescript
const fullPagePdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
```
### 🔀 Merge Multiple PDFs
Combine PDFs like a pro:
```typescript
// Load your PDFs
const invoice = await smartPdf.readFileToPdfObject('./invoice.pdf');
const terms = await smartPdf.readFileToPdfObject('./terms.pdf');
const contract = await smartPdf.getA4PdfResultForHtmlString('<h1>Contract</h1>...');
// Merge them in order
const mergedPdf = await smartPdf.mergePdfs([
contract.buffer,
invoice.buffer,
terms.buffer
]);
await fs.writeFile('complete-document.pdf', mergedPdf);
```
## 🖼️ Image Generation
### 🎨 Convert PDF to Images
SmartPDF supports three image formats, each with its own strengths:
#### PNG - Crystal Clear Quality
```typescript
const pngImages = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
scale: SmartPdf.SCALE_HIGH // 216 DPI - perfect for most uses
});
// The server will find an available port in your specified range
await smartPdf.start();
console.log(`Server running on port: ${smartPdf.serverPort}`);
// Save each page
pngImages.forEach((png, index) => {
fs.writeFileSync(`page-${index + 1}.png`, png);
});
```
### Creating PDFs from HTML Strings
Generate PDFs from HTML content with full CSS support:
#### WebP - Modern & Efficient
```typescript
async function createPdfFromHtml() {
const smartPdf = await SmartPdf.create();
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
quality: 90, // 0-100 quality scale
scale: 2.0 // 144 DPI - great for web
});
```
#### JPEG - Progressive Loading
```typescript
const jpegImages = await smartPdf.convertPDFToJpegBytes(pdf.buffer, {
quality: 85, // Balance between size and quality
scale: SmartPdf.SCALE_SCREEN, // 144 DPI
maxWidth: 1920 // Constrain dimensions
});
```
### 📏 DPI & Scale Guide
SmartPDF makes it easy to get the right resolution:
```typescript
// Built-in scale constants
SmartPdf.SCALE_SCREEN // 2.0 = ~144 DPI (web display)
SmartPdf.SCALE_HIGH // 3.0 = ~216 DPI (high quality, default)
SmartPdf.SCALE_PRINT // 6.0 = ~432 DPI (print quality)
// Or calculate your own
const scale = SmartPdf.getScaleForDPI(300); // Get scale for 300 DPI
```
### 🖼️ Thumbnail Generation
Create perfect thumbnails for document previews:
```typescript
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 0.5, // Small but readable
quality: 70, // Lower quality for tiny files
maxWidth: 200, // Constrain to thumbnail size
maxHeight: 200
});
```
## 📊 Format Comparison
Choose the right format for your needs:
| Format | File Size | Best For | Special Features |
|--------|-----------|----------|------------------|
| **PNG** | Largest | Screenshots, diagrams, text | Lossless, transparency |
| **JPEG** | 30-50% of PNG | Photos, complex images | Progressive loading |
| **WebP** | 25-40% of PNG | Modern web apps | Best compression |
## 🛡️ Best Practices
### 1. Always Use Try-Finally
```typescript
let smartPdf: SmartPdf;
try {
smartPdf = await SmartPdf.create();
await smartPdf.start();
const htmlString = `
<!DOCTYPE html>
<html>
<head>
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
h1 { color: #333; }
.highlight { background-color: yellow; }
</style>
</head>
<body>
<h1>Professional PDF Document</h1>
<p>This PDF was generated from <span class="highlight">HTML content</span>.</p>
</body>
</html>
`;
// Your PDF operations
const pdf: IPdf = await smartPdf.getA4PdfResultForHtmlString(htmlString);
// pdf.buffer contains the PDF data
// pdf.id contains a unique identifier
// pdf.name contains the filename
// pdf.metadata contains additional information like extracted text
await smartPdf.stop();
}
```
### Generating PDFs from Websites
Capture web pages as PDFs with two different approaches:
#### A4 Format PDF from Website
Captures the viewable area formatted for A4 paper:
```typescript
async function createA4PdfFromWebsite() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf: IPdf = await smartPdf.getPdfResultForWebsite('https://example.com');
// Save to file
await fs.writeFile('website-a4.pdf', pdf.buffer);
await smartPdf.stop();
}
```
#### Full Webpage as Single PDF
Captures the entire webpage in a single PDF, regardless of length:
```typescript
async function createFullPdfFromWebsite() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf: IPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
// This captures the entire scrollable area
await fs.writeFile('website-full.pdf', pdf.buffer);
await smartPdf.stop();
}
```
### Merging Multiple PDFs
Combine multiple PDF files into a single document:
```typescript
async function mergePdfs() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Create or load your PDFs
const pdf1 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 1</h1>');
const pdf2 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 2</h1>');
const pdf3 = await smartPdf.readFileToPdfObject('./existing-document.pdf');
// Merge PDFs - order matters!
const mergedPdf: Uint8Array = await smartPdf.mergePdfs([
pdf1.buffer,
pdf2.buffer,
pdf3.buffer
]);
// Save the merged PDF
await fs.writeFile('merged-document.pdf', mergedPdf);
await smartPdf.stop();
}
```
### Reading PDFs and Extracting Text
Extract text content from existing PDFs:
```typescript
async function extractTextFromPdf() {
const smartPdf = await SmartPdf.create();
// Read PDF from disk
const pdf: IPdf = await smartPdf.readFileToPdfObject('/path/to/document.pdf');
// Extract all text
const extractedText = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
console.log('Extracted text:', extractedText);
// The pdf object also contains metadata with text extraction
console.log('Metadata:', pdf.metadata);
}
```
### Converting PDF to PNG Images
Convert each page of a PDF into PNG images with configurable quality:
```typescript
async function convertPdfToPng() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Load a PDF
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Convert to PNG images with default high quality (216 DPI)
const pngImages: Uint8Array[] = await smartPdf.convertPDFToPngBytes(pdf.buffer);
// Or specify custom scale/DPI
const highResPngs = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
scale: SmartPdf.SCALE_PRINT, // 6.0 scale = ~432 DPI
maxWidth: 3000, // Optional: limit maximum width
maxHeight: 4000 // Optional: limit maximum height
});
// Save each page as a PNG
pngImages.forEach((pngBuffer, index) => {
fs.writeFileSync(`page-${index + 1}.png`, pngBuffer);
});
await smartPdf.stop();
}
```
#### Understanding Scale and DPI
PDF.js renders at 72 DPI by default. Use these scale factors for different quality levels:
- `SmartPdf.SCALE_SCREEN` (2.0): ~144 DPI - Good for screen display
- `SmartPdf.SCALE_HIGH` (3.0): ~216 DPI - High quality (default)
- `SmartPdf.SCALE_PRINT` (6.0): ~432 DPI - Print quality
- Custom DPI: `scale = SmartPdf.getScaleForDPI(300)` for 300 DPI
### Converting PDF to WebP Images
Generate web-optimized images using WebP format. WebP provides 25-35% better compression than PNG/JPEG while maintaining quality:
```typescript
async function createWebPImages() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Load a PDF
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create high-quality WebP images (default: 3.0 scale = 216 DPI, 85% quality)
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer);
// Save WebP images
webpImages.forEach((webpBuffer, index) => {
fs.writeFileSync(`page-${index + 1}.webp`, webpBuffer);
});
await smartPdf.stop();
}
```
#### Creating Thumbnails
Generate small thumbnail images for PDF galleries or document lists:
```typescript
async function createThumbnails() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create small thumbnails (0.5 scale = ~36 DPI, 70% quality)
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 0.5, // Small readable thumbnails
quality: 70 // Lower quality for smaller files
});
// Save thumbnails
thumbnails.forEach((thumb, index) => {
fs.writeFileSync(`thumb-${index + 1}.webp`, thumb);
});
await smartPdf.stop();
}
```
#### Constrained Dimensions
Create previews with maximum width/height constraints, useful for responsive layouts:
```typescript
async function createConstrainedPreviews() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create previews that fit within 800x600 pixels
const previews = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 1.0, // Start with full size
quality: 90, // High quality
maxWidth: 800, // Maximum 800px wide
maxHeight: 600 // Maximum 600px tall
});
// The method automatically scales down to fit within constraints
previews.forEach((preview, index) => {
fs.writeFileSync(`preview-constrained-${index + 1}.webp`, preview);
});
await smartPdf.stop();
}
```
#### WebP Options
The `convertPDFToWebpBytes` method accepts these options:
- `scale`: Scale factor for preview size (default: 3.0 for ~216 DPI)
- `quality`: WebP compression quality (default: 85, range: 0-100)
- `maxWidth`: Maximum width in pixels (optional)
- `maxHeight`: Maximum height in pixels (optional)
Common scale values:
- `0.5`: Thumbnails (~36 DPI)
- `2.0`: Screen display (~144 DPI)
- `3.0`: High quality (~216 DPI, default)
- `6.0`: Print quality (~432 DPI)
### Using External Browser Instance
For advanced use cases, you can provide your own Puppeteer browser instance:
```typescript
import puppeteer from 'puppeteer';
async function useExternalBrowser() {
// Create your own browser instance with custom options
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const smartPdf = await SmartPdf.create();
await smartPdf.start(browser);
// Use SmartPdf normally
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
// SmartPdf will not close the browser when stopping
await smartPdf.stop();
// You control the browser lifecycle
await browser.close();
}
```
### Running Multiple Instances
Thanks to automatic port allocation, you can run multiple SmartPdf instances simultaneously:
```typescript
async function runMultipleInstances() {
// Each instance automatically finds its own free port
const instance1 = await SmartPdf.create();
const instance2 = await SmartPdf.create();
const instance3 = await SmartPdf.create();
// Start all instances
await Promise.all([
instance1.start(),
instance2.start(),
instance3.start()
]);
console.log(`Instance 1 running on port: ${instance1.serverPort}`);
console.log(`Instance 2 running on port: ${instance2.serverPort}`);
console.log(`Instance 3 running on port: ${instance3.serverPort}`);
// Use instances independently
const pdfs = await Promise.all([
instance1.getA4PdfResultForHtmlString('<h1>PDF 1</h1>'),
instance2.getA4PdfResultForHtmlString('<h1>PDF 2</h1>'),
instance3.getA4PdfResultForHtmlString('<h1>PDF 3</h1>')
]);
// Clean up all instances
await Promise.all([
instance1.stop(),
instance2.stop(),
instance3.stop()
]);
}
```
### Error Handling
Always wrap SmartPdf operations in try-catch blocks and ensure proper cleanup:
```typescript
async function safePdfGeneration() {
let smartPdf: SmartPdf;
try {
smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
// Process PDF...
} catch (error) {
console.error('PDF generation failed:', error);
// Handle error appropriately
} finally {
// Always cleanup
if (smartPdf) {
await smartPdf.stop();
}
} finally {
if (smartPdf) {
await smartPdf.stop(); // Always cleanup!
}
}
```
### IPdf Interface
The `IPdf` interface represents a PDF with its metadata:
### 2. Optimize HTML for PDFs
```typescript
const optimizedHtml = `
<style>
/* Use print-friendly styles */
@media print {
.no-print { display: none; }
}
/* Avoid page breaks in wrong places */
h1, h2, h3 { page-break-after: avoid; }
table { page-break-inside: avoid; }
</style>
${yourContent}
`;
```
### 3. Handle Large Documents
For documents with many pages:
```typescript
// Process in batches
const pages = await smartPdf.convertPDFToPngBytes(largePdf.buffer);
for (let i = 0; i < pages.length; i += 10) {
const batch = pages.slice(i, i + 10);
await processBatch(batch);
}
```
## 🎯 Advanced Usage
### 🌐 Custom Browser Instance
Bring your own Puppeteer instance:
```typescript
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-dev-shm-usage']
});
const smartPdf = await SmartPdf.create();
await smartPdf.start(browser);
// SmartPdf won't close your browser
await smartPdf.stop();
await browser.close(); // You manage it
```
### ⚡ Parallel Processing
Process multiple PDFs concurrently:
```typescript
const urls = ['https://example1.com', 'https://example2.com', 'https://example3.com'];
const pdfs = await Promise.all(
urls.map(url => smartPdf.getFullWebsiteAsSinglePdf(url))
);
// Or with multiple instances for maximum performance
const instances = await Promise.all(
Array(3).fill(null).map(() => SmartPdf.create())
);
await Promise.all(instances.map(i => i.start()));
// Process in parallel across instances
const results = await Promise.all(
urls.map((url, i) => instances[i % instances.length].getFullWebsiteAsSinglePdf(url))
);
// Cleanup all instances
await Promise.all(instances.map(i => i.stop()));
```
## 📝 API Reference
### Class: SmartPdf
#### Static Methods
- `create(options?: ISmartPdfOptions)` - Create a new SmartPdf instance
- `getScaleForDPI(dpi: number)` - Calculate scale factor for desired DPI
#### Instance Methods
- `start(browser?: Browser)` - Start the PDF server
- `stop()` - Stop the PDF server
- `getA4PdfResultForHtmlString(html: string)` - Generate A4 PDF from HTML
- `getPdfResultForWebsite(url: string)` - Generate A4 PDF from website
- `getFullWebsiteAsSinglePdf(url: string)` - Capture full webpage as PDF
- `mergePdfs(buffers: Uint8Array[])` - Merge multiple PDFs
- `readFileToPdfObject(path: string)` - Read PDF file from disk
- `extractTextFromPdfBuffer(buffer: Buffer)` - Extract text from PDF
- `convertPDFToPngBytes(buffer: Uint8Array, options?)` - Convert to PNG
- `convertPDFToWebpBytes(buffer: Uint8Array, options?)` - Convert to WebP
- `convertPDFToJpegBytes(buffer: Uint8Array, options?)` - Convert to JPEG
### Interface: IPdf
```typescript
interface IPdf {
name: string; // Filename of the PDF
buffer: Buffer; // PDF content as buffer
id: string | null; // Unique identifier
name: string; // Filename
buffer: Buffer; // PDF content
id: string | null; // Unique identifier
metadata?: {
textExtraction?: string; // Extracted text content
textExtraction?: string; // Extracted text
};
}
```
## Best Practices
## 🤝 Contributing
1. **Always start and stop**: Initialize with `start()` and cleanup with `stop()` to properly manage resources.
2. **Port management**: Use the automatic port allocation feature to avoid conflicts when running multiple instances.
3. **Error handling**: Always implement proper error handling as PDF generation can fail due to various reasons.
4. **Resource cleanup**: Ensure `stop()` is called even if an error occurs to prevent memory leaks.
5. **HTML optimization**: When creating PDFs from HTML, ensure your HTML is well-formed and CSS is embedded or inlined.
We love contributions! Please feel free to submit a Pull Request.
## License and Legal Information

View File

@@ -174,6 +174,113 @@ tap.test('should verify WebP files are smaller than PNG', async () => {
expect(totalWebpSize).toBeLessThan(totalPngSize);
});
tap.test('should create JPEG images from PDF', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer);
expect(jpegImages.length).toBeGreaterThan(0);
console.log('JPEG image sizes:', jpegImages.map(img => img.length));
// Save the first page as JPEG
fs.writeFileSync(path.join(testResultsDir, 'jpeg_default_page1.jpg'), Buffer.from(jpegImages[0]));
});
tap.test('should create JPEG images with different quality levels', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Test different quality levels
const qualityLevels = [50, 70, 85, 95];
for (const quality of qualityLevels) {
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
scale: smartpdf.SmartPdf.SCALE_HIGH,
quality: quality
});
console.log(`JPEG quality ${quality}: ${jpegImages[0].length} bytes`);
// Save first page at each quality level
fs.writeFileSync(
path.join(testResultsDir, `jpeg_quality_${quality}_page1.jpg`),
Buffer.from(jpegImages[0])
);
}
});
tap.test('should create JPEG images with max dimensions', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Create constrained JPEG images
const constrainedJpegs = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
scale: smartpdf.SmartPdf.SCALE_HIGH,
quality: 85,
maxWidth: 1200,
maxHeight: 1200
});
expect(constrainedJpegs.length).toBeGreaterThan(0);
console.log('Constrained JPEG sizes:', constrainedJpegs.map(img => img.length));
// Save constrained JPEG
fs.writeFileSync(path.join(testResultsDir, 'jpeg_constrained_page1.jpg'), Buffer.from(constrainedJpegs[0]));
});
tap.test('should compare file sizes between PNG, WebP, and JPEG', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Generate all three formats at the same scale
const comparisonScale = smartpdf.SmartPdf.SCALE_HIGH; // 3.0 scale
const pngImages = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer, {
scale: comparisonScale
});
const webpImages = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
scale: comparisonScale,
quality: 85
});
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
scale: comparisonScale,
quality: 85
});
expect(pngImages.length).toEqual(webpImages.length);
expect(pngImages.length).toEqual(jpegImages.length);
// Compare sizes
let totalPngSize = 0;
let totalWebpSize = 0;
let totalJpegSize = 0;
pngImages.forEach((png, index) => {
const pngSize = png.length;
const webpSize = webpImages[index].length;
const jpegSize = jpegImages[index].length;
totalPngSize += pngSize;
totalWebpSize += webpSize;
totalJpegSize += jpegSize;
const webpReduction = ((pngSize - webpSize) / pngSize * 100).toFixed(1);
const jpegReduction = ((pngSize - jpegSize) / pngSize * 100).toFixed(1);
console.log(`Page ${index + 1}:`);
console.log(` PNG: ${pngSize} bytes`);
console.log(` WebP: ${webpSize} bytes (${webpReduction}% smaller than PNG)`);
console.log(` JPEG: ${jpegSize} bytes (${jpegReduction}% smaller than PNG)`);
});
const totalWebpReduction = ((totalPngSize - totalWebpSize) / totalPngSize * 100).toFixed(1);
const totalJpegReduction = ((totalPngSize - totalJpegSize) / totalPngSize * 100).toFixed(1);
console.log('\nTotal size comparison:');
console.log(`PNG: ${totalPngSize} bytes`);
console.log(`WebP: ${totalWebpSize} bytes (${totalWebpReduction}% reduction)`);
console.log(`JPEG: ${totalJpegSize} bytes (${totalJpegReduction}% reduction)`);
// JPEG and WebP should both be smaller than PNG
expect(totalJpegSize).toBeLessThan(totalPngSize);
expect(totalWebpSize).toBeLessThan(totalPngSize);
});
tap.test('should close the SmartPdf instance properly', async () => {
await testSmartPdf.stop();
});

View File

@@ -538,4 +538,52 @@ export class SmartPdf {
await page.close();
return webpBuffers;
}
/**
* Converts a PDF to progressive JPEG bytes for each page.
* This method creates progressive JPEG images that load in multiple passes,
* showing a low-quality preview first, then progressively improving.
* Uses SmartJimp for true progressive JPEG encoding.
*/
public async convertPDFToJpegBytes(
pdfBytes: Uint8Array,
options: {
scale?: number; // Scale factor for output size (default: 3.0 for 216 DPI)
quality?: number; // JPEG quality 0-100 (default: 85)
maxWidth?: number; // Maximum width in pixels (optional)
maxHeight?: number; // Maximum height in pixels (optional)
} = {}
): Promise<Uint8Array[]> {
// First, convert PDF to PNG using our existing method
const pngBuffers = await this.convertPDFToPngBytes(pdfBytes, {
scale: options.scale,
maxWidth: options.maxWidth,
maxHeight: options.maxHeight
});
// Initialize SmartJimp in sharp mode for progressive JPEG support
const smartJimpInstance = new plugins.smartjimp.SmartJimp({ mode: 'sharp' });
// Convert each PNG to progressive JPEG
const jpegBuffers: Uint8Array[] = [];
const quality = options.quality || 85;
for (const pngBuffer of pngBuffers) {
// Convert PNG buffer to progressive JPEG
const jpegBuffer = await smartJimpInstance.computeAssetVariation(
Buffer.from(pngBuffer),
{
format: 'jpeg',
progressive: true,
// SmartJimp uses a different quality scale, need to check if adjustment is needed
// For now, pass through the quality value
quality
}
);
jpegBuffers.push(new Uint8Array(jpegBuffer));
}
return jpegBuffers;
}
}

View File

@@ -13,6 +13,7 @@ import * as smartpath from '@push.rocks/smartpath';
import * as smartpuppeteer from '@push.rocks/smartpuppeteer';
import * as smartnetwork from '@push.rocks/smartnetwork';
import * as smartunique from '@push.rocks/smartunique';
import * as smartjimp from '@push.rocks/smartjimp';
export {
smartbuffer,
@@ -23,6 +24,7 @@ export {
smartpuppeteer,
smartunique,
smartnetwork,
smartjimp,
};
// tsclass scope