feat(smartpdf): replace internal Express server with @push.rocks/smartserve, add PDF→WebP rendering, improve start/stop handling and bump dependencies
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import * as plugins from './smartpdf.plugins.js';
|
||||
import * as paths from './smartpdf.paths.js';
|
||||
import { Server } from 'http';
|
||||
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
||||
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
||||
declare const document: any;
|
||||
@@ -16,7 +15,7 @@ export class SmartPdf {
|
||||
public static readonly SCALE_SCREEN = 2.0; // ~144 DPI - Good for screen display
|
||||
public static readonly SCALE_HIGH = 3.0; // ~216 DPI - High quality (default)
|
||||
public static readonly SCALE_PRINT = 6.0; // ~432 DPI - Print quality
|
||||
|
||||
|
||||
/**
|
||||
* Calculate scale factor for desired DPI
|
||||
* PDF.js default is 72 DPI, so scale = desiredDPI / 72
|
||||
@@ -24,7 +23,7 @@ export class SmartPdf {
|
||||
public static getScaleForDPI(dpi: number): number {
|
||||
return dpi / 72;
|
||||
}
|
||||
|
||||
|
||||
// STATIC
|
||||
public static async create(optionsArg?: ISmartPdfOptions) {
|
||||
const smartpdfInstance = new SmartPdf(optionsArg);
|
||||
@@ -32,13 +31,14 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
htmlServerInstance: Server;
|
||||
private smartserveInstance: plugins.smartserve.SmartServe;
|
||||
serverPort: number;
|
||||
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser;
|
||||
externalBrowserBool: boolean = false;
|
||||
private _readyDeferred: plugins.smartpromise.Deferred<void>;
|
||||
private _candidates: { [key: string]: PdfCandidate } = {};
|
||||
private _options: ISmartPdfOptions;
|
||||
private _isRunning: boolean = false;
|
||||
|
||||
constructor(optionsArg?: ISmartPdfOptions) {
|
||||
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||
@@ -50,7 +50,13 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) {
|
||||
const done = plugins.smartpromise.defer();
|
||||
if (this._isRunning) {
|
||||
throw new Error('SmartPdf is already running. Call stop() before starting again.');
|
||||
}
|
||||
|
||||
// Reset readiness deferred for this start cycle
|
||||
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||
|
||||
// lets set the external browser in case one is provided
|
||||
this.headlessBrowser = headlessBrowserArg;
|
||||
// setup puppeteer
|
||||
@@ -64,7 +70,7 @@ export class SmartPdf {
|
||||
|
||||
// Find an available port BEFORE creating server
|
||||
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
|
||||
|
||||
|
||||
if (this._options.port) {
|
||||
// If a specific port is requested, check if it's available
|
||||
const isPortAvailable = await smartnetworkInstance.isLocalPortUnused(this._options.port);
|
||||
@@ -74,6 +80,7 @@ export class SmartPdf {
|
||||
// Clean up browser if we created one
|
||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||
await this.headlessBrowser.close();
|
||||
this.headlessBrowser = null;
|
||||
}
|
||||
throw new Error(`Requested port ${this._options.port} is already in use`);
|
||||
}
|
||||
@@ -87,45 +94,62 @@ export class SmartPdf {
|
||||
// Clean up browser if we created one
|
||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||
await this.headlessBrowser.close();
|
||||
this.headlessBrowser = null;
|
||||
}
|
||||
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Now setup server after we know we have a valid port
|
||||
const app = plugins.express();
|
||||
app.get('/:pdfId', (req, res) => {
|
||||
const wantedCandidate = this._candidates[req.params.pdfId];
|
||||
if (!wantedCandidate) {
|
||||
console.log(`${req.url} not attached to a candidate`);
|
||||
return;
|
||||
|
||||
// Now setup server using smartserve
|
||||
this.smartserveInstance = new plugins.smartserve.SmartServe({
|
||||
port: this.serverPort,
|
||||
hostname: 'localhost',
|
||||
});
|
||||
|
||||
this.smartserveInstance.setHandler(async (request) => {
|
||||
const url = new URL(request.url);
|
||||
const pdfId = url.pathname.slice(1); // Remove leading /
|
||||
const candidate = this._candidates[pdfId];
|
||||
if (!candidate) {
|
||||
console.log(`${url.pathname} not attached to a candidate`);
|
||||
return new Response('Not found', { status: 404 });
|
||||
}
|
||||
res.setHeader('pdf-id', wantedCandidate.pdfId);
|
||||
res.send(wantedCandidate.htmlString);
|
||||
return new Response(candidate.htmlString, {
|
||||
headers: {
|
||||
'Content-Type': 'text/html; charset=utf-8',
|
||||
'pdf-id': candidate.pdfId,
|
||||
},
|
||||
});
|
||||
});
|
||||
this.htmlServerInstance = plugins.http.createServer(app);
|
||||
|
||||
this.htmlServerInstance.listen(this.serverPort, 'localhost');
|
||||
this.htmlServerInstance.on('listening', () => {
|
||||
console.log(`SmartPdf server listening on port ${this.serverPort}`);
|
||||
this._readyDeferred.resolve();
|
||||
done.resolve();
|
||||
});
|
||||
await done.promise;
|
||||
|
||||
await this.smartserveInstance.start();
|
||||
console.log(`SmartPdf server listening on port ${this.serverPort}`);
|
||||
this._isRunning = true;
|
||||
this._readyDeferred.resolve();
|
||||
}
|
||||
|
||||
// stop
|
||||
async stop() {
|
||||
const done = plugins.smartpromise.defer<void>();
|
||||
this.htmlServerInstance.close(() => {
|
||||
done.resolve();
|
||||
});
|
||||
|
||||
if (!this.externalBrowserBool) {
|
||||
await this.headlessBrowser.close();
|
||||
if (!this._isRunning) {
|
||||
return;
|
||||
}
|
||||
|
||||
await done.promise;
|
||||
this._isRunning = false;
|
||||
|
||||
// Close browser first to cleanly terminate keepalive connections
|
||||
// before the server shuts down (prevents ECONNRESET errors)
|
||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||
await this.headlessBrowser.close();
|
||||
}
|
||||
this.headlessBrowser = null;
|
||||
|
||||
if (this.smartserveInstance) {
|
||||
await this.smartserveInstance.stop();
|
||||
this.smartserveInstance = null;
|
||||
}
|
||||
|
||||
// Clear any remaining candidates
|
||||
this._candidates = {};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -135,124 +159,144 @@ export class SmartPdf {
|
||||
await this._readyDeferred.promise;
|
||||
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
||||
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
await page.setViewport({
|
||||
width: 794,
|
||||
height: 1122,
|
||||
});
|
||||
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
|
||||
waitUntil: 'networkidle2',
|
||||
});
|
||||
const headers = response.headers();
|
||||
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
||||
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
||||
return;
|
||||
} else {
|
||||
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
|
||||
}
|
||||
let page: plugins.smartpuppeteer.puppeteer.Page;
|
||||
try {
|
||||
page = await this.headlessBrowser.newPage();
|
||||
await page.setViewport({
|
||||
width: 794,
|
||||
height: 1122,
|
||||
});
|
||||
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
|
||||
waitUntil: 'networkidle2',
|
||||
});
|
||||
const headers = response.headers();
|
||||
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
||||
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
||||
return;
|
||||
} else {
|
||||
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
|
||||
}
|
||||
|
||||
const pdfBuffer = await page.pdf({
|
||||
width: 794,
|
||||
height: 1122,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
delete this._candidates[pdfCandidate.pdfId];
|
||||
pdfCandidate.doneDeferred.resolve();
|
||||
await pdfCandidate.doneDeferred.promise;
|
||||
return {
|
||||
id: pdfCandidate.pdfId,
|
||||
name: `${pdfCandidate.pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
const pdfBuffer = await page.pdf({
|
||||
width: 794,
|
||||
height: 1122,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
delete this._candidates[pdfCandidate.pdfId];
|
||||
pdfCandidate.doneDeferred.resolve();
|
||||
await pdfCandidate.doneDeferred.promise;
|
||||
return {
|
||||
id: pdfCandidate.pdfId,
|
||||
name: `${pdfCandidate.pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
} catch (err) {
|
||||
// Clean up candidate on error
|
||||
delete this._candidates[pdfCandidate.pdfId];
|
||||
if (page) {
|
||||
await page.close().catch(() => {});
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async getPdfResultForWebsite(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
await page.setViewport({
|
||||
width: 1980,
|
||||
height: 1200,
|
||||
});
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
try {
|
||||
await page.setViewport({
|
||||
width: 1980,
|
||||
height: 1200,
|
||||
});
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
return {
|
||||
documentHeight: document.height,
|
||||
documentWidth: document.width,
|
||||
};
|
||||
});
|
||||
const pdfBuffer = await page.pdf({
|
||||
height: documentHeight,
|
||||
width: documentWidth,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
documentHeight: document.height,
|
||||
documentWidth: document.width,
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
});
|
||||
const pdfBuffer = await page.pdf({
|
||||
height: documentHeight,
|
||||
width: documentWidth,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
} catch (err) {
|
||||
await page.close().catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: 1200,
|
||||
});
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
try {
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: 1200,
|
||||
});
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
return {
|
||||
documentHeight: Math.max(
|
||||
document.body.scrollHeight,
|
||||
document.documentElement.scrollHeight
|
||||
) || 1200,
|
||||
documentWidth: Math.max(
|
||||
document.body.clientWidth,
|
||||
document.documentElement.clientWidth
|
||||
) || 1920,
|
||||
};
|
||||
});
|
||||
// Update viewport height to the full document height.
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: documentHeight,
|
||||
});
|
||||
const pdfBuffer = await page.pdf({
|
||||
height: documentHeight,
|
||||
width: 1920,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
scale: 1,
|
||||
pageRanges: '1',
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
documentHeight: Math.max(
|
||||
document.body.scrollHeight,
|
||||
document.documentElement.scrollHeight
|
||||
) || 1200,
|
||||
documentWidth: Math.max(
|
||||
document.body.clientWidth,
|
||||
document.documentElement.clientWidth
|
||||
) || 1920,
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
});
|
||||
// Update viewport height to the full document height.
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: documentHeight,
|
||||
});
|
||||
const pdfBuffer = await page.pdf({
|
||||
height: documentHeight,
|
||||
width: 1920,
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
scale: 1,
|
||||
pageRanges: '1',
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
} catch (err) {
|
||||
await page.close().catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise<Uint8Array> {
|
||||
@@ -318,89 +362,94 @@ export class SmartPdf {
|
||||
// Create a new page using the headless browser.
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
|
||||
// Prepare PDF data as a base64 string.
|
||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||
try {
|
||||
// Prepare PDF data as a base64 string.
|
||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||
|
||||
// HTML template that loads PDF.js and renders the PDF.
|
||||
const htmlTemplate: string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PDF to PNG Converter</title>
|
||||
<style>
|
||||
body { margin: 0; }
|
||||
canvas { display: block; margin: 10px auto; }
|
||||
</style>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
(async function() {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||
const pdfData = "__PDF_DATA__";
|
||||
const raw = atob(pdfData);
|
||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||
const pdf = await loadingTask.promise;
|
||||
const numPages = pdf.numPages;
|
||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
// Apply scale factor to viewport
|
||||
const viewport = page.getViewport({ scale: ${scale} });
|
||||
|
||||
// Apply max width/height constraints if specified
|
||||
let finalScale = ${scale};
|
||||
${options.maxWidth ? `
|
||||
if (viewport.width > ${options.maxWidth}) {
|
||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||
}` : ''}
|
||||
${options.maxHeight ? `
|
||||
if (viewport.height > ${options.maxHeight}) {
|
||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||
finalScale = Math.min(finalScale, heightScale);
|
||||
}` : ''}
|
||||
|
||||
// Get final viewport with adjusted scale
|
||||
const finalViewport = page.getViewport({ scale: finalScale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = finalViewport.width;
|
||||
canvas.height = finalViewport.height;
|
||||
canvas.setAttribute('data-page', pageNum);
|
||||
|
||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||
document.body.appendChild(canvas);
|
||||
}
|
||||
window.renderComplete = true;
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
// HTML template that loads PDF.js and renders the PDF.
|
||||
const htmlTemplate: string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PDF to PNG Converter</title>
|
||||
<style>
|
||||
body { margin: 0; }
|
||||
canvas { display: block; margin: 10px auto; }
|
||||
</style>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
(async function() {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||
const pdfData = "__PDF_DATA__";
|
||||
const raw = atob(pdfData);
|
||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||
const pdf = await loadingTask.promise;
|
||||
const numPages = pdf.numPages;
|
||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
// Apply scale factor to viewport
|
||||
const viewport = page.getViewport({ scale: ${scale} });
|
||||
|
||||
// Replace the placeholder with the actual base64 PDF data.
|
||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||
// Apply max width/height constraints if specified
|
||||
let finalScale = ${scale};
|
||||
${options.maxWidth ? `
|
||||
if (viewport.width > ${options.maxWidth}) {
|
||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||
}` : ''}
|
||||
${options.maxHeight ? `
|
||||
if (viewport.height > ${options.maxHeight}) {
|
||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||
finalScale = Math.min(finalScale, heightScale);
|
||||
}` : ''}
|
||||
|
||||
// Set the page content.
|
||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||
// Get final viewport with adjusted scale
|
||||
const finalViewport = page.getViewport({ scale: finalScale });
|
||||
|
||||
// Wait until the PDF.js rendering is complete.
|
||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = finalViewport.width;
|
||||
canvas.height = finalViewport.height;
|
||||
canvas.setAttribute('data-page', pageNum);
|
||||
|
||||
// Query all canvas elements (each representing a rendered PDF page).
|
||||
const canvasElements = await page.$$('canvas');
|
||||
const pngBuffers: Uint8Array[] = [];
|
||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||
document.body.appendChild(canvas);
|
||||
}
|
||||
window.renderComplete = true;
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
for (const canvasElement of canvasElements) {
|
||||
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
||||
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
||||
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
||||
// Replace the placeholder with the actual base64 PDF data.
|
||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||
|
||||
// Set the page content.
|
||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||
|
||||
// Wait until the PDF.js rendering is complete.
|
||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||
|
||||
// Query all canvas elements (each representing a rendered PDF page).
|
||||
const canvasElements = await page.$$('canvas');
|
||||
const pngBuffers: Uint8Array[] = [];
|
||||
|
||||
for (const canvasElement of canvasElements) {
|
||||
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
||||
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
||||
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return pngBuffers;
|
||||
} catch (err) {
|
||||
await page.close().catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return pngBuffers;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -424,94 +473,99 @@ export class SmartPdf {
|
||||
// Create a new page using the headless browser
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
|
||||
// Prepare PDF data as a base64 string
|
||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||
try {
|
||||
// Prepare PDF data as a base64 string
|
||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||
|
||||
// HTML template that loads PDF.js and renders the PDF with scaling
|
||||
const htmlTemplate: string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PDF to WebP Preview Converter</title>
|
||||
<style>
|
||||
body { margin: 0; }
|
||||
canvas { display: block; margin: 10px auto; }
|
||||
</style>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
(async function() {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||
const pdfData = "__PDF_DATA__";
|
||||
const raw = atob(pdfData);
|
||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||
const pdf = await loadingTask.promise;
|
||||
const numPages = pdf.numPages;
|
||||
|
||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
// Apply scale factor to viewport
|
||||
const viewport = page.getViewport({ scale: ${scale} });
|
||||
|
||||
// Apply max width/height constraints if specified
|
||||
let finalScale = ${scale};
|
||||
${options.maxWidth ? `
|
||||
if (viewport.width > ${options.maxWidth}) {
|
||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||
}` : ''}
|
||||
${options.maxHeight ? `
|
||||
if (viewport.height > ${options.maxHeight}) {
|
||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||
finalScale = Math.min(finalScale, heightScale);
|
||||
}` : ''}
|
||||
|
||||
// Get final viewport with adjusted scale
|
||||
const finalViewport = page.getViewport({ scale: finalScale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = finalViewport.width;
|
||||
canvas.height = finalViewport.height;
|
||||
canvas.setAttribute('data-page', pageNum);
|
||||
|
||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||
document.body.appendChild(canvas);
|
||||
}
|
||||
window.renderComplete = true;
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
// HTML template that loads PDF.js and renders the PDF with scaling
|
||||
const htmlTemplate: string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PDF to WebP Preview Converter</title>
|
||||
<style>
|
||||
body { margin: 0; }
|
||||
canvas { display: block; margin: 10px auto; }
|
||||
</style>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
(async function() {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||
const pdfData = "__PDF_DATA__";
|
||||
const raw = atob(pdfData);
|
||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||
const pdf = await loadingTask.promise;
|
||||
const numPages = pdf.numPages;
|
||||
|
||||
// Replace the placeholder with the actual base64 PDF data
|
||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||
const page = await pdf.getPage(pageNum);
|
||||
// Apply scale factor to viewport
|
||||
const viewport = page.getViewport({ scale: ${scale} });
|
||||
|
||||
// Set the page content
|
||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||
// Apply max width/height constraints if specified
|
||||
let finalScale = ${scale};
|
||||
${options.maxWidth ? `
|
||||
if (viewport.width > ${options.maxWidth}) {
|
||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||
}` : ''}
|
||||
${options.maxHeight ? `
|
||||
if (viewport.height > ${options.maxHeight}) {
|
||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||
finalScale = Math.min(finalScale, heightScale);
|
||||
}` : ''}
|
||||
|
||||
// Wait until the PDF.js rendering is complete
|
||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||
// Get final viewport with adjusted scale
|
||||
const finalViewport = page.getViewport({ scale: finalScale });
|
||||
|
||||
// Query all canvas elements (each representing a rendered PDF page)
|
||||
const canvasElements = await page.$$('canvas');
|
||||
const webpBuffers: Uint8Array[] = [];
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = finalViewport.width;
|
||||
canvas.height = finalViewport.height;
|
||||
canvas.setAttribute('data-page', pageNum);
|
||||
|
||||
for (const canvasElement of canvasElements) {
|
||||
// Screenshot the canvas element as WebP
|
||||
const screenshotBuffer = (await canvasElement.screenshot({
|
||||
type: 'webp',
|
||||
quality: quality,
|
||||
encoding: 'binary'
|
||||
})) as Buffer;
|
||||
webpBuffers.push(new Uint8Array(screenshotBuffer));
|
||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||
document.body.appendChild(canvas);
|
||||
}
|
||||
window.renderComplete = true;
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
// Replace the placeholder with the actual base64 PDF data
|
||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||
|
||||
// Set the page content
|
||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||
|
||||
// Wait until the PDF.js rendering is complete
|
||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||
|
||||
// Query all canvas elements (each representing a rendered PDF page)
|
||||
const canvasElements = await page.$$('canvas');
|
||||
const webpBuffers: Uint8Array[] = [];
|
||||
|
||||
for (const canvasElement of canvasElements) {
|
||||
// Screenshot the canvas element as WebP
|
||||
const screenshotBuffer = (await canvasElement.screenshot({
|
||||
type: 'webp',
|
||||
quality: quality,
|
||||
encoding: 'binary'
|
||||
})) as Buffer;
|
||||
webpBuffers.push(new Uint8Array(screenshotBuffer));
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return webpBuffers;
|
||||
} catch (err) {
|
||||
await page.close().catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return webpBuffers;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -538,11 +592,11 @@ export class SmartPdf {
|
||||
|
||||
// Initialize SmartJimp in sharp mode for progressive JPEG support
|
||||
const smartJimpInstance = new plugins.smartjimp.SmartJimp({ mode: 'sharp' });
|
||||
|
||||
|
||||
// Convert each PNG to progressive JPEG
|
||||
const jpegBuffers: Uint8Array[] = [];
|
||||
const quality = options.quality || 85;
|
||||
|
||||
|
||||
for (const pngBuffer of pngBuffers) {
|
||||
// Convert PNG buffer to progressive JPEG
|
||||
const jpegBuffer = await smartJimpInstance.computeAssetVariation(
|
||||
@@ -550,15 +604,13 @@ export class SmartPdf {
|
||||
{
|
||||
format: 'jpeg',
|
||||
progressive: true,
|
||||
// SmartJimp uses a different quality scale, need to check if adjustment is needed
|
||||
// For now, pass through the quality value
|
||||
quality
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
jpegBuffers.push(new Uint8Array(jpegBuffer));
|
||||
}
|
||||
|
||||
|
||||
return jpegBuffers;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user