feat(smartpdf): Improve dependency versions and optimize PDF to PNG conversion.

This commit is contained in:
2025-02-25 18:03:27 +00:00
parent 168527573c
commit 6b74301588
7 changed files with 3391 additions and 2286 deletions

View File

@@ -3,6 +3,7 @@ import * as paths from './smartpdf.paths.js';
import { Server } from 'http';
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
import { execFile } from 'child_process';
declare const document: any;
@@ -75,7 +76,7 @@ export class SmartPdf {
}
/**
* returns a pdf for a given html string;
* Returns a PDF for a given HTML string.
*/
async getA4PdfResultForHtmlString(htmlStringArg: string): Promise<plugins.tsclass.business.IPdf> {
await this._readyDeferred.promise;
@@ -155,7 +156,7 @@ export class SmartPdf {
width: 1920,
height: 1200,
});
page.emulateMediaType('screen');
await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId();
const { documentHeight, documentWidth } = await page.evaluate(() => {
@@ -200,9 +201,9 @@ export class SmartPdf {
}
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
const path = plugins.smartpath.transform.makeAbsolute(pathArg);
const parsedPath = plugins.path.parse(path);
const buffer = await plugins.smartfile.fs.toBuffer(path);
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
const parsedPath = plugins.path.parse(absolutePath);
const buffer = await plugins.smartfile.fs.toBuffer(absolutePath);
return {
name: parsedPath.base,
buffer,
@@ -229,40 +230,109 @@ export class SmartPdf {
return deferred.promise;
}
/**
* Checks for the presence of required dependencies: GraphicsMagick and Ghostscript.
*/
private async checkDependencies(): Promise<void> {
await Promise.all([
this.checkCommandExists('gm', ['version']),
this.checkCommandExists('gs', ['--version']),
]);
}
/**
* Checks if a given command exists by trying to execute it.
*/
private checkCommandExists(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
execFile(command, args, (error, stdout, stderr) => {
if (error) {
reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`));
} else {
resolve();
}
});
});
}
/**
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
* This method creates a temporary HTML page that loads PDF.js from a CDN,
* renders each PDF page to a canvas, and then screenshots each canvas element.
*/
public async convertPDFToPngBytes(
pdfBytes: Uint8Array,
options: {
width?: number;
height?: number;
quality?: number;
} = {}
) {
const { width = 1024, height = 768, quality = 100 } = options;
options: { width?: number; height?: number; quality?: number } = {}
): Promise<Uint8Array[]> {
// Note: options.width, options.height, and options.quality are not applied here,
// as the rendered canvas size is determined by the PDF page dimensions.
// Load the PDF document
const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes);
// Create a new page using the headless browser.
const page = await this.headlessBrowser.newPage();
const converter = plugins.pdf2pic.fromBuffer(Buffer.from(pdfBytes), {
density: 100, // Image density (DPI)
format: 'png', // Image format
width, // Output image width
height, // Output image height
quality, // Output image quality
});
// Prepare PDF data as a base64 string.
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
// Get array promises that resolve to JPG buffers
const imagePromises: Promise<Buffer>[] = [];
const numPages = pdfDoc.getPageCount();
// HTML template that loads PDF.js and renders the PDF.
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to PNG Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const viewport = page.getViewport({ scale: 1.0 });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = viewport.width;
canvas.height = viewport.height;
await page.render({ canvasContext: context, viewport: viewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
for (let i = 0; i < numPages; i++) {
imagePromises.push(converter(i + 1, {
responseType: 'buffer',
}).then((output) => output.buffer));
// Replace the placeholder with the actual base64 PDF data.
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Set the page content.
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Wait until the PDF.js rendering is complete.
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Query all canvas elements (each representing a rendered PDF page).
const canvasElements = await page.$$('canvas');
const pngBuffers: Uint8Array[] = [];
for (const canvasElement of canvasElements) {
// Screenshot the canvas element. The screenshot will be a PNG buffer.
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
pngBuffers.push(new Uint8Array(screenshotBuffer));
}
// Resolve all promises and return the array of buffers
const imageBuffers = await Promise.all(imagePromises);
const imageUint8Arrays = imageBuffers.map((buffer) => buffer);
return imageUint8Arrays;
await page.close();
return pngBuffers;
}
}
}