diff --git a/changelog.md b/changelog.md index 787fe99..9d33775 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2025-02-25 - 3.2.2 - fix(SmartPdf) +Fix buffer handling for PDF conversion and text extraction + +- Ensure Uint8Array is converted to Node Buffer for PDF conversion. +- Correct the PDF page viewport handling by using document dimensions. +- Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer. + ## 2025-02-25 - 3.2.1 - fix(SmartPdf) Fix type for extractTextFromPdfBuffer function diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 08da412..d6db252 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartpdf', - version: '3.2.1', + version: '3.2.2', description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.' } diff --git a/ts/smartpdf.classes.smartpdf.ts b/ts/smartpdf.classes.smartpdf.ts index 9af8bb0..12213b2 100644 --- a/ts/smartpdf.classes.smartpdf.ts +++ b/ts/smartpdf.classes.smartpdf.ts @@ -35,7 +35,7 @@ export class SmartPdf { this.externalBrowserBool = true; } else { this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({ - forceNoSandbox: true, + forceNoSandbox: false, }); } @@ -104,6 +104,8 @@ export class SmartPdf { printBackground: true, displayHeaderFooter: false, }); + // Convert Uint8Array to Node Buffer + const nodePdfBuffer = Buffer.from(pdfBuffer); await page.close(); delete this._candidates[pdfCandidate.pdfId]; pdfCandidate.doneDeferred.resolve(); @@ -112,9 +114,9 @@ export class SmartPdf { id: pdfCandidate.pdfId, name: `${pdfCandidate.pdfId}.js`, metadata: { - textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), + textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), }, - buffer: pdfBuffer, + buffer: nodePdfBuffer, }; } @@ -139,14 +141,16 @@ export class SmartPdf { printBackground: true, displayHeaderFooter: false, }); + // Convert Uint8Array to Node Buffer + const nodePdfBuffer = Buffer.from(pdfBuffer); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { - textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), + textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), }, - buffer: pdfBuffer, + buffer: nodePdfBuffer, }; } @@ -159,12 +163,20 @@ export class SmartPdf { await page.emulateMediaType('screen'); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const pdfId = plugins.smartunique.shortId(); + // Use both document.body and document.documentElement to ensure we have a valid height and width. const { documentHeight, documentWidth } = await page.evaluate(() => { return { - documentHeight: document.body.scrollHeight, - documentWidth: document.body.clientWidth, + documentHeight: Math.max( + document.body.scrollHeight, + document.documentElement.scrollHeight + ) || 1200, + documentWidth: Math.max( + document.body.clientWidth, + document.documentElement.clientWidth + ) || 1920, }; }); + // Update viewport height to the full document height. await page.setViewport({ width: 1920, height: documentHeight, @@ -177,14 +189,16 @@ export class SmartPdf { scale: 1, pageRanges: '1', }); + // Convert Uint8Array to Node Buffer + const nodePdfBuffer = Buffer.from(pdfBuffer); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { - textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), + textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), }, - buffer: pdfBuffer, + buffer: nodePdfBuffer, }; } @@ -212,7 +226,7 @@ export class SmartPdf { }; } - public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise { + public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise { const deferred = plugins.smartpromise.defer(); const pdfParser: any = new plugins.pdf2json(); pdfParser.on('pdfParser_dataReady', (pdfData: any) => {