feat(smartpdf): Improve dependency versions and optimize PDF to PNG conversion.
This commit is contained in:
parent
168527573c
commit
6b74301588
@ -1,5 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-02-25 - 3.2.0 - feat(smartpdf)
|
||||
Improve dependency versions and optimize PDF to PNG conversion.
|
||||
|
||||
- Update several dependencies to newer versions for better stability and performance.
|
||||
- Refactor tests to enhance readability and add directory creation validations.
|
||||
- Optimize PDF to PNG conversion by switching to a more efficient Puppeteer and PDF.js-based method.
|
||||
- Add checks for presence of required dependencies (GraphicsMagick and Ghostscript).
|
||||
- Fix media emulation issue by properly awaiting the emulateMediaType function.
|
||||
|
||||
## 2024-11-30 - 3.1.8 - fix(core)
|
||||
Fix candidate handling in PDF generation
|
||||
|
||||
|
23
package.json
23
package.json
@ -14,28 +14,27 @@
|
||||
"buildDocs": "tsdoc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbuild": "^2.2.0",
|
||||
"@git.zone/tsdoc": "^1.4.2",
|
||||
"@git.zone/tsbuild": "^2.2.1",
|
||||
"@git.zone/tsdoc": "^1.4.3",
|
||||
"@git.zone/tsrun": "^1.3.3",
|
||||
"@git.zone/tstest": "^1.0.77",
|
||||
"@push.rocks/tapbundle": "^5.5.3",
|
||||
"@types/node": "^22.10.1"
|
||||
"@git.zone/tstest": "^1.0.96",
|
||||
"@push.rocks/tapbundle": "^5.5.6",
|
||||
"@types/node": "^22.13.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@push.rocks/smartbuffer": "^3.0.4",
|
||||
"@push.rocks/smartdelay": "^3.0.5",
|
||||
"@push.rocks/smartfile": "^11.0.21",
|
||||
"@push.rocks/smartfile": "^11.2.0",
|
||||
"@push.rocks/smartnetwork": "^3.0.0",
|
||||
"@push.rocks/smartpath": "^5.0.18",
|
||||
"@push.rocks/smartpromise": "^4.0.4",
|
||||
"@push.rocks/smartpuppeteer": "^2.0.2",
|
||||
"@push.rocks/smartpromise": "^4.2.3",
|
||||
"@push.rocks/smartpuppeteer": "^2.0.5",
|
||||
"@push.rocks/smartunique": "^3.0.9",
|
||||
"@tsclass/tsclass": "^4.1.2",
|
||||
"@tsclass/tsclass": "^4.4.0",
|
||||
"@types/express": "^5.0.0",
|
||||
"express": "^4.21.1",
|
||||
"express": "^4.21.2",
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pdf2json": "3.1.4",
|
||||
"pdf2pic": "^3.1.3"
|
||||
"pdf2json": "3.1.5"
|
||||
},
|
||||
"files": [
|
||||
"ts/**/*",
|
||||
|
5417
pnpm-lock.yaml
generated
5417
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
87
test/test.ts
87
test/test.ts
@ -1,66 +1,85 @@
|
||||
import { expect, tap } from '@push.rocks/tapbundle';
|
||||
import * as smartpdf from '../ts/index.js';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
let testSmartPdf: smartpdf.SmartPdf;
|
||||
|
||||
tap.test('should create a valid instance of smartpdf', async () => {
|
||||
/**
|
||||
* Ensures that a directory exists.
|
||||
* @param dirPath - The directory path to ensure.
|
||||
*/
|
||||
function ensureDir(dirPath: string): void {
|
||||
if (!fs.existsSync(dirPath)) {
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
tap.test('should create a valid instance of SmartPdf', async () => {
|
||||
testSmartPdf = new smartpdf.SmartPdf();
|
||||
expect(testSmartPdf).toBeInstanceOf(smartpdf.SmartPdf);
|
||||
});
|
||||
|
||||
tap.test('should start the instance', async () => {
|
||||
tap.test('should start the SmartPdf instance', async () => {
|
||||
await testSmartPdf.start();
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from html string', async () => {
|
||||
await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
||||
tap.test('should create PDFs from HTML string', async () => {
|
||||
const pdf1 = await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
||||
const pdf2 = await testSmartPdf.getA4PdfResultForHtmlString('hello');
|
||||
expect(pdf1.buffer).toBeInstanceOf(Buffer);
|
||||
expect(pdf2.buffer).toBeInstanceOf(Buffer);
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from html string', async () => {
|
||||
await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
||||
tap.test('should create PDFs from websites', async () => {
|
||||
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
||||
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
|
||||
expect(pdfA4.buffer).toBeInstanceOf(Buffer);
|
||||
expect(pdfSingle.buffer).toBeInstanceOf(Buffer);
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from website as A4', async () => {
|
||||
await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from website as single page PDF', async () => {
|
||||
await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
|
||||
});
|
||||
|
||||
tap.test('should create a valid PDFResult', async () => {
|
||||
const writePDfToDisk = async (urlArg: string, fileName: string) => {
|
||||
tap.test('should create valid PDF results and write them to disk', async () => {
|
||||
const writePdfToDisk = async (urlArg: string, fileName: string) => {
|
||||
const pdfResult = await testSmartPdf.getFullWebsiteAsSinglePdf(urlArg);
|
||||
expect(pdfResult.buffer).toBeInstanceOf(Buffer);
|
||||
const fs = await import('fs');
|
||||
|
||||
if (!fs.existsSync('.nogit/')) {
|
||||
fs.mkdirSync('.nogit/');
|
||||
}
|
||||
fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer as Buffer);
|
||||
ensureDir('.nogit');
|
||||
fs.writeFileSync(path.join('.nogit', fileName), pdfResult.buffer as Buffer);
|
||||
};
|
||||
await writePDfToDisk('https://lossless.com/', '1.pdf');
|
||||
await writePDfToDisk('https://layer.io', '2.pdf');
|
||||
await writePdfToDisk('https://lossless.com/', '1.pdf');
|
||||
await writePdfToDisk('https://layer.io', '2.pdf');
|
||||
});
|
||||
|
||||
tap.test('should merge pdfs', async () => {
|
||||
const fs = await import('fs');
|
||||
tap.test('should merge PDFs into a combined PDF', async () => {
|
||||
const pdf1 = await testSmartPdf.readFileToPdfObject('.nogit/1.pdf');
|
||||
const pdf2 = await testSmartPdf.readFileToPdfObject('.nogit/2.pdf');
|
||||
fs.writeFileSync(
|
||||
`.nogit/combined.pdf`,
|
||||
await testSmartPdf.mergePdfs([pdf1.buffer, pdf2.buffer])
|
||||
);
|
||||
const mergedBuffer = await testSmartPdf.mergePdfs([pdf1.buffer, pdf2.buffer]);
|
||||
ensureDir('.nogit');
|
||||
fs.writeFileSync(path.join('.nogit', 'combined.pdf'), mergedBuffer);
|
||||
});
|
||||
|
||||
tap.test('should create images from an pdf', async () => {
|
||||
tap.test('should create PNG images from combined PDF using Puppeteer conversion', async () => {
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
|
||||
const images = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
|
||||
console.log(images.map((val) => val.length));
|
||||
expect(images.length).toBeGreaterThan(0);
|
||||
console.log('Puppeteer-based conversion image sizes:', images.map(img => img.length));
|
||||
});
|
||||
|
||||
tap.test('should be able to close properly', async () => {
|
||||
tap.test('should store PNG results from both conversion functions in .nogit/testresults', async () => {
|
||||
const testResultsDir = path.join('.nogit', 'testresults');
|
||||
ensureDir(testResultsDir);
|
||||
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
|
||||
|
||||
// Convert using Puppeteer-based function and store images
|
||||
const imagesPuppeteer = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
|
||||
imagesPuppeteer.forEach((img, index) => {
|
||||
const filePath = path.join(testResultsDir, `puppeteer_method_page_${index + 1}.png`);
|
||||
fs.writeFileSync(filePath, Buffer.from(img));
|
||||
});
|
||||
});
|
||||
|
||||
tap.test('should close the SmartPdf instance properly', async () => {
|
||||
await testSmartPdf.stop();
|
||||
});
|
||||
|
||||
tap.start();
|
||||
tap.start();
|
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartpdf',
|
||||
version: '3.1.8',
|
||||
version: '3.2.0',
|
||||
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ import * as paths from './smartpdf.paths.js';
|
||||
import { Server } from 'http';
|
||||
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
||||
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
||||
import { execFile } from 'child_process';
|
||||
|
||||
declare const document: any;
|
||||
|
||||
@ -75,7 +76,7 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
/**
|
||||
* returns a pdf for a given html string;
|
||||
* Returns a PDF for a given HTML string.
|
||||
*/
|
||||
async getA4PdfResultForHtmlString(htmlStringArg: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
await this._readyDeferred.promise;
|
||||
@ -155,7 +156,7 @@ export class SmartPdf {
|
||||
width: 1920,
|
||||
height: 1200,
|
||||
});
|
||||
page.emulateMediaType('screen');
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
@ -200,9 +201,9 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
const path = plugins.smartpath.transform.makeAbsolute(pathArg);
|
||||
const parsedPath = plugins.path.parse(path);
|
||||
const buffer = await plugins.smartfile.fs.toBuffer(path);
|
||||
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
|
||||
const parsedPath = plugins.path.parse(absolutePath);
|
||||
const buffer = await plugins.smartfile.fs.toBuffer(absolutePath);
|
||||
return {
|
||||
name: parsedPath.base,
|
||||
buffer,
|
||||
@ -229,40 +230,109 @@ export class SmartPdf {
|
||||
return deferred.promise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for the presence of required dependencies: GraphicsMagick and Ghostscript.
|
||||
*/
|
||||
private async checkDependencies(): Promise<void> {
|
||||
await Promise.all([
|
||||
this.checkCommandExists('gm', ['version']),
|
||||
this.checkCommandExists('gs', ['--version']),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a given command exists by trying to execute it.
|
||||
*/
|
||||
private checkCommandExists(command: string, args: string[]): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile(command, args, (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`));
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
|
||||
* This method creates a temporary HTML page that loads PDF.js from a CDN,
|
||||
* renders each PDF page to a canvas, and then screenshots each canvas element.
|
||||
*/
|
||||
public async convertPDFToPngBytes(
|
||||
pdfBytes: Uint8Array,
|
||||
options: {
|
||||
width?: number;
|
||||
height?: number;
|
||||
quality?: number;
|
||||
} = {}
|
||||
) {
|
||||
const { width = 1024, height = 768, quality = 100 } = options;
|
||||
options: { width?: number; height?: number; quality?: number } = {}
|
||||
): Promise<Uint8Array[]> {
|
||||
// Note: options.width, options.height, and options.quality are not applied here,
|
||||
// as the rendered canvas size is determined by the PDF page dimensions.
|
||||
|
||||
// Load the PDF document
|
||||
const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes);
|
||||
// Create a new page using the headless browser.
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
|
||||
const converter = plugins.pdf2pic.fromBuffer(Buffer.from(pdfBytes), {
|
||||
density: 100, // Image density (DPI)
|
||||
format: 'png', // Image format
|
||||
width, // Output image width
|
||||
height, // Output image height
|
||||
quality, // Output image quality
|
||||
});
|
||||
// Prepare PDF data as a base64 string.
|
||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||
|
||||
// Get array promises that resolve to JPG buffers
|
||||
const imagePromises: Promise<Buffer>[] = [];
|
||||
const numPages = pdfDoc.getPageCount();
|
||||
// HTML template that loads PDF.js and renders the PDF.
|
||||
const htmlTemplate: string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PDF to PNG Converter</title>
|
||||
<style>
|
||||
body { margin: 0; }
|
||||
canvas { display: block; margin: 10px auto; }
|
||||
</style>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
(async function() {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||
const pdfData = "__PDF_DATA__";
|
||||
const raw = atob(pdfData);
|
||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||
const pdf = await loadingTask.promise;
|
||||
const numPages = pdf.numPages;
|
||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
const viewport = page.getViewport({ scale: 1.0 });
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
await page.render({ canvasContext: context, viewport: viewport }).promise;
|
||||
document.body.appendChild(canvas);
|
||||
}
|
||||
window.renderComplete = true;
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
for (let i = 0; i < numPages; i++) {
|
||||
imagePromises.push(converter(i + 1, {
|
||||
responseType: 'buffer',
|
||||
}).then((output) => output.buffer));
|
||||
// Replace the placeholder with the actual base64 PDF data.
|
||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||
|
||||
// Set the page content.
|
||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||
|
||||
// Wait until the PDF.js rendering is complete.
|
||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||
|
||||
// Query all canvas elements (each representing a rendered PDF page).
|
||||
const canvasElements = await page.$$('canvas');
|
||||
const pngBuffers: Uint8Array[] = [];
|
||||
|
||||
for (const canvasElement of canvasElements) {
|
||||
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
||||
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
||||
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
||||
}
|
||||
|
||||
// Resolve all promises and return the array of buffers
|
||||
const imageBuffers = await Promise.all(imagePromises);
|
||||
const imageUint8Arrays = imageBuffers.map((buffer) => buffer);
|
||||
return imageUint8Arrays;
|
||||
await page.close();
|
||||
return pngBuffers;
|
||||
}
|
||||
}
|
||||
}
|
@ -33,7 +33,6 @@ export { tsclass };
|
||||
// thirdparty
|
||||
import express from 'express';
|
||||
import pdf2json from 'pdf2json';
|
||||
import pdf2pic from 'pdf2pic';
|
||||
import pdfLib from 'pdf-lib';
|
||||
|
||||
export { express, pdf2json, pdf2pic, pdfLib, };
|
||||
export { express, pdf2json, pdfLib, };
|
||||
|
Loading…
x
Reference in New Issue
Block a user