Compare commits

...

1 Commits

Author SHA1 Message Date
Juergen Kunz
6a4aeed3e1 BREAKING CHANGE(smartpdf): improve image generation quality and API consistency
- Renamed convertPDFToWebpPreviews to convertPDFToWebpBytes for consistency
- Added configurable scale options with DPI support
- Changed default scale to 3.0 (216 DPI) for better quality
- Added DPI helper methods and scale constants
2025-08-02 12:37:48 +00:00
5 changed files with 372 additions and 14 deletions

View File

@@ -1,5 +1,17 @@
# Changelog
## 2025-08-02 - 4.0.0 - BREAKING CHANGE(smartpdf)
Improve image generation quality and API consistency
- BREAKING: Renamed `convertPDFToWebpPreviews` to `convertPDFToWebpBytes` for API consistency
- Added configurable scale options to `convertPDFToPngBytes` method
- Changed default scale from 1.0 to 3.0 for PNG generation (216 DPI)
- Changed default scale from 0.5 to 3.0 for WebP generation (216 DPI)
- Added DPI helper methods: `getScaleForDPI()` and scale constants (SCALE_SCREEN, SCALE_HIGH, SCALE_PRINT)
- Added maxWidth/maxHeight constraints for both PNG and WebP generation
- Improved test file organization with clear naming conventions
- Updated documentation with DPI/scale guidance and examples
## 2025-08-01 - 3.3.0 - feat(smartpdf)
Add automatic port allocation and multi-instance support

View File

@@ -1,6 +1,6 @@
{
"name": "@push.rocks/smartpdf",
"version": "3.3.0",
"version": "4.0.0",
"private": false,
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
"main": "dist_ts/index.js",

107
readme.md
View File

@@ -179,7 +179,7 @@ async function extractTextFromPdf() {
```
### Converting PDF to PNG Images
Convert each page of a PDF into PNG images:
Convert each page of a PDF into PNG images with configurable quality:
```typescript
async function convertPdfToPng() {
@@ -189,9 +189,16 @@ async function convertPdfToPng() {
// Load a PDF
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Convert to PNG images (one per page)
// Convert to PNG images with default high quality (216 DPI)
const pngImages: Uint8Array[] = await smartPdf.convertPDFToPngBytes(pdf.buffer);
// Or specify custom scale/DPI
const highResPngs = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
scale: SmartPdf.SCALE_PRINT, // 6.0 scale = ~432 DPI
maxWidth: 3000, // Optional: limit maximum width
maxHeight: 4000 // Optional: limit maximum height
});
// Save each page as a PNG
pngImages.forEach((pngBuffer, index) => {
fs.writeFileSync(`page-${index + 1}.png`, pngBuffer);
@@ -201,6 +208,102 @@ async function convertPdfToPng() {
}
```
#### Understanding Scale and DPI
PDF.js renders at 72 DPI by default. Use these scale factors for different quality levels:
- `SmartPdf.SCALE_SCREEN` (2.0): ~144 DPI - Good for screen display
- `SmartPdf.SCALE_HIGH` (3.0): ~216 DPI - High quality (default)
- `SmartPdf.SCALE_PRINT` (6.0): ~432 DPI - Print quality
- Custom DPI: `scale = SmartPdf.getScaleForDPI(300)` for 300 DPI
### Converting PDF to WebP Images
Generate web-optimized images using WebP format. WebP provides 25-35% better compression than PNG/JPEG while maintaining quality:
```typescript
async function createWebPImages() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Load a PDF
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create high-quality WebP images (default: 3.0 scale = 216 DPI, 85% quality)
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer);
// Save WebP images
webpImages.forEach((webpBuffer, index) => {
fs.writeFileSync(`page-${index + 1}.webp`, webpBuffer);
});
await smartPdf.stop();
}
```
#### Creating Thumbnails
Generate small thumbnail images for PDF galleries or document lists:
```typescript
async function createThumbnails() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create small thumbnails (0.5 scale = ~36 DPI, 70% quality)
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 0.5, // Small readable thumbnails
quality: 70 // Lower quality for smaller files
});
// Save thumbnails
thumbnails.forEach((thumb, index) => {
fs.writeFileSync(`thumb-${index + 1}.webp`, thumb);
});
await smartPdf.stop();
}
```
#### Constrained Dimensions
Create previews with maximum width/height constraints, useful for responsive layouts:
```typescript
async function createConstrainedPreviews() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Create previews that fit within 800x600 pixels
const previews = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 1.0, // Start with full size
quality: 90, // High quality
maxWidth: 800, // Maximum 800px wide
maxHeight: 600 // Maximum 600px tall
});
// The method automatically scales down to fit within constraints
previews.forEach((preview, index) => {
fs.writeFileSync(`preview-constrained-${index + 1}.webp`, preview);
});
await smartPdf.stop();
}
```
#### WebP Options
The `convertPDFToWebpBytes` method accepts these options:
- `scale`: Scale factor for preview size (default: 3.0 for ~216 DPI)
- `quality`: WebP compression quality (default: 85, range: 0-100)
- `maxWidth`: Maximum width in pixels (optional)
- `maxHeight`: Maximum height in pixels (optional)
Common scale values:
- `0.5`: Thumbnails (~36 DPI)
- `2.0`: Screen display (~144 DPI)
- `3.0`: High quality (~216 DPI, default)
- `6.0`: Print quality (~432 DPI)
### Using External Browser Instance
For advanced use cases, you can provide your own Puppeteer browser instance:

View File

@@ -15,6 +15,13 @@ function ensureDir(dirPath: string): void {
}
}
// Clean test results directory at start
const testResultsDir = path.join('.nogit', 'testresults');
if (fs.existsSync(testResultsDir)) {
fs.rmSync(testResultsDir, { recursive: true, force: true });
}
ensureDir(testResultsDir);
tap.test('should create a valid instance of SmartPdf', async () => {
testSmartPdf = new smartpdf.SmartPdf();
expect(testSmartPdf).toBeInstanceOf(smartpdf.SmartPdf);
@@ -65,19 +72,108 @@ tap.test('should create PNG images from combined PDF using Puppeteer conversion'
});
tap.test('should store PNG results from both conversion functions in .nogit/testresults', async () => {
const testResultsDir = path.join('.nogit', 'testresults');
ensureDir(testResultsDir);
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
// Convert using Puppeteer-based function and store images
const imagesPuppeteer = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
imagesPuppeteer.forEach((img, index) => {
const filePath = path.join(testResultsDir, `puppeteer_method_page_${index + 1}.png`);
const filePath = path.join(testResultsDir, `png_combined_page${index + 1}.png`);
fs.writeFileSync(filePath, Buffer.from(img));
});
});
tap.test('should create WebP preview images from PDF', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
const webpPreviews = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer);
expect(webpPreviews.length).toBeGreaterThan(0);
console.log('WebP preview sizes:', webpPreviews.map(img => img.length));
// Also create PNG previews for comparison
const pngPreviews = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
console.log('PNG preview sizes:', pngPreviews.map(img => img.length));
// Save the first page as both WebP and PNG preview
fs.writeFileSync(path.join(testResultsDir, 'webp_default_page1.webp'), Buffer.from(webpPreviews[0]));
fs.writeFileSync(path.join(testResultsDir, 'png_default_page1.png'), Buffer.from(pngPreviews[0]));
});
tap.test('should create WebP previews with custom scale and quality', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Create smaller previews with lower quality for thumbnails
const thumbnails = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
scale: 0.5, // Create readable thumbnails at ~36 DPI
quality: 70
});
expect(thumbnails.length).toBeGreaterThan(0);
console.log('Thumbnail sizes:', thumbnails.map(img => img.length));
// Save thumbnails
thumbnails.forEach((thumb, index) => {
fs.writeFileSync(path.join(testResultsDir, `webp_thumbnail_page${index + 1}.webp`), Buffer.from(thumb));
});
});
tap.test('should create WebP previews with max dimensions', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Create previews with maximum dimensions (will use high scale but constrain to max size)
const constrainedPreviews = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
scale: smartpdf.SmartPdf.SCALE_HIGH, // Start with high quality
quality: 90,
maxWidth: 800,
maxHeight: 1000
});
expect(constrainedPreviews.length).toBeGreaterThan(0);
console.log('Constrained preview sizes:', constrainedPreviews.map(img => img.length));
// Save constrained preview
fs.writeFileSync(path.join(testResultsDir, 'webp_constrained_page1.webp'), Buffer.from(constrainedPreviews[0]));
});
tap.test('should verify WebP files are smaller than PNG', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
// Generate both PNG and WebP versions at the same scale for fair comparison
const comparisonScale = smartpdf.SmartPdf.SCALE_HIGH; // Both use 3.0 scale
const pngImages = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer, {
scale: comparisonScale
});
const webpImages = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
scale: comparisonScale,
quality: 85
});
expect(pngImages.length).toEqual(webpImages.length);
// Compare sizes
let totalPngSize = 0;
let totalWebpSize = 0;
pngImages.forEach((png, index) => {
const pngSize = png.length;
const webpSize = webpImages[index].length;
totalPngSize += pngSize;
totalWebpSize += webpSize;
const reduction = ((pngSize - webpSize) / pngSize * 100).toFixed(1);
console.log(`Page ${index + 1}: PNG=${pngSize} bytes, WebP=${webpSize} bytes, Reduction=${reduction}%`);
// Save comparison files
fs.writeFileSync(path.join(testResultsDir, `comparison_png_page${index + 1}.png`), Buffer.from(png));
fs.writeFileSync(path.join(testResultsDir, `comparison_webp_page${index + 1}.webp`), Buffer.from(webpImages[index]));
});
const totalReduction = ((totalPngSize - totalWebpSize) / totalPngSize * 100).toFixed(1);
console.log(`Total size reduction: ${totalReduction}% (PNG: ${totalPngSize} bytes, WebP: ${totalWebpSize} bytes)`);
// WebP should be smaller
expect(totalWebpSize).toBeLessThan(totalPngSize);
});
tap.test('should close the SmartPdf instance properly', async () => {
await testSmartPdf.stop();
});

View File

@@ -14,6 +14,19 @@ export interface ISmartPdfOptions {
}
export class SmartPdf {
// STATIC SCALE CONSTANTS
public static readonly SCALE_SCREEN = 2.0; // ~144 DPI - Good for screen display
public static readonly SCALE_HIGH = 3.0; // ~216 DPI - High quality (default)
public static readonly SCALE_PRINT = 6.0; // ~432 DPI - Print quality
/**
* Calculate scale factor for desired DPI
* PDF.js default is 72 DPI, so scale = desiredDPI / 72
*/
public static getScaleForDPI(dpi: number): number {
return dpi / 72;
}
// STATIC
public static async create(optionsArg?: ISmartPdfOptions) {
const smartpdfInstance = new SmartPdf(optionsArg);
@@ -318,10 +331,14 @@ export class SmartPdf {
*/
public async convertPDFToPngBytes(
pdfBytes: Uint8Array,
options: { width?: number; height?: number; quality?: number } = {}
options: {
scale?: number; // Scale factor for output size (default: 3.0 for 216 DPI)
maxWidth?: number; // Maximum width in pixels (optional)
maxHeight?: number; // Maximum height in pixels (optional)
} = {}
): Promise<Uint8Array[]> {
// Note: options.width, options.height, and options.quality are not applied here,
// as the rendered canvas size is determined by the PDF page dimensions.
// Set default scale for higher quality output (3.0 = ~216 DPI)
const scale = options.scale || 3.0;
// Create a new page using the headless browser.
const page = await this.headlessBrowser.newPage();
@@ -354,12 +371,31 @@ export class SmartPdf {
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const viewport = page.getViewport({ scale: 1.0 });
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = viewport.width;
canvas.height = viewport.height;
await page.render({ canvasContext: context, viewport: viewport }).promise;
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
@@ -391,4 +427,115 @@ export class SmartPdf {
await page.close();
return pngBuffers;
}
/**
* Converts a PDF to WebP bytes for each page.
* This method creates web-optimized images using WebP format.
* WebP provides 25-35% better compression than JPEG/PNG while maintaining quality.
*/
public async convertPDFToWebpBytes(
pdfBytes: Uint8Array,
options: {
scale?: number; // Scale factor for preview size (default: 3.0 for 216 DPI)
quality?: number; // WebP quality 0-100 (default: 85)
maxWidth?: number; // Maximum width in pixels (optional)
maxHeight?: number; // Maximum height in pixels (optional)
} = {}
): Promise<Uint8Array[]> {
// Set default options for higher quality output (3.0 = ~216 DPI)
const scale = options.scale || 3.0;
const quality = options.quality || 85;
// Create a new page using the headless browser
const page = await this.headlessBrowser.newPage();
// Prepare PDF data as a base64 string
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
// HTML template that loads PDF.js and renders the PDF with scaling
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to WebP Preview Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
// Replace the placeholder with the actual base64 PDF data
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Set the page content
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Wait until the PDF.js rendering is complete
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Query all canvas elements (each representing a rendered PDF page)
const canvasElements = await page.$$('canvas');
const webpBuffers: Uint8Array[] = [];
for (const canvasElement of canvasElements) {
// Screenshot the canvas element as WebP
const screenshotBuffer = (await canvasElement.screenshot({
type: 'webp',
quality: quality,
encoding: 'binary'
})) as Buffer;
webpBuffers.push(new Uint8Array(screenshotBuffer));
}
await page.close();
return webpBuffers;
}
}