fix(SmartPdf): Fix buffer handling for PDF conversion and text extraction
This commit is contained in:
		| @@ -1,5 +1,12 @@ | ||||
| # Changelog | ||||
|  | ||||
| ## 2025-02-25 - 3.2.2 - fix(SmartPdf) | ||||
| Fix buffer handling for PDF conversion and text extraction | ||||
|  | ||||
| - Ensure Uint8Array is converted to Node Buffer for PDF conversion. | ||||
| - Correct the PDF page viewport handling by using document dimensions. | ||||
| - Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer. | ||||
|  | ||||
| ## 2025-02-25 - 3.2.1 - fix(SmartPdf) | ||||
| Fix type for extractTextFromPdfBuffer function | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,6 @@ | ||||
|  */ | ||||
| export const commitinfo = { | ||||
|   name: '@push.rocks/smartpdf', | ||||
|   version: '3.2.1', | ||||
|   version: '3.2.2', | ||||
|   description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.' | ||||
| } | ||||
|   | ||||
| @@ -35,7 +35,7 @@ export class SmartPdf { | ||||
|       this.externalBrowserBool = true; | ||||
|     } else { | ||||
|       this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({ | ||||
|         forceNoSandbox: true, | ||||
|         forceNoSandbox: false, | ||||
|       }); | ||||
|     } | ||||
|  | ||||
| @@ -104,6 +104,8 @@ export class SmartPdf { | ||||
|       printBackground: true, | ||||
|       displayHeaderFooter: false, | ||||
|     }); | ||||
|     // Convert Uint8Array to Node Buffer | ||||
|     const nodePdfBuffer = Buffer.from(pdfBuffer); | ||||
|     await page.close(); | ||||
|     delete this._candidates[pdfCandidate.pdfId]; | ||||
|     pdfCandidate.doneDeferred.resolve(); | ||||
| @@ -112,9 +114,9 @@ export class SmartPdf { | ||||
|       id: pdfCandidate.pdfId, | ||||
|       name: `${pdfCandidate.pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|       buffer: nodePdfBuffer, | ||||
|     }; | ||||
|   } | ||||
|  | ||||
| @@ -139,14 +141,16 @@ export class SmartPdf { | ||||
|       printBackground: true, | ||||
|       displayHeaderFooter: false, | ||||
|     }); | ||||
|     // Convert Uint8Array to Node Buffer | ||||
|     const nodePdfBuffer = Buffer.from(pdfBuffer); | ||||
|     await page.close(); | ||||
|     return { | ||||
|       id: pdfId, | ||||
|       name: `${pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|       buffer: nodePdfBuffer, | ||||
|     }; | ||||
|   } | ||||
|  | ||||
| @@ -159,12 +163,20 @@ export class SmartPdf { | ||||
|     await page.emulateMediaType('screen'); | ||||
|     const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); | ||||
|     const pdfId = plugins.smartunique.shortId(); | ||||
|     // Use both document.body and document.documentElement to ensure we have a valid height and width. | ||||
|     const { documentHeight, documentWidth } = await page.evaluate(() => { | ||||
|       return { | ||||
|         documentHeight: document.body.scrollHeight, | ||||
|         documentWidth: document.body.clientWidth, | ||||
|         documentHeight: Math.max( | ||||
|           document.body.scrollHeight, | ||||
|           document.documentElement.scrollHeight | ||||
|         ) || 1200, | ||||
|         documentWidth: Math.max( | ||||
|           document.body.clientWidth, | ||||
|           document.documentElement.clientWidth | ||||
|         ) || 1920, | ||||
|       }; | ||||
|     }); | ||||
|     // Update viewport height to the full document height. | ||||
|     await page.setViewport({ | ||||
|       width: 1920, | ||||
|       height: documentHeight, | ||||
| @@ -177,14 +189,16 @@ export class SmartPdf { | ||||
|       scale: 1, | ||||
|       pageRanges: '1', | ||||
|     }); | ||||
|     // Convert Uint8Array to Node Buffer | ||||
|     const nodePdfBuffer = Buffer.from(pdfBuffer); | ||||
|     await page.close(); | ||||
|     return { | ||||
|       id: pdfId, | ||||
|       name: `${pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|       buffer: nodePdfBuffer, | ||||
|     }; | ||||
|   } | ||||
|  | ||||
| @@ -212,7 +226,7 @@ export class SmartPdf { | ||||
|     }; | ||||
|   } | ||||
|  | ||||
|   public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise<string> { | ||||
|   public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> { | ||||
|     const deferred = plugins.smartpromise.defer<string>(); | ||||
|     const pdfParser: any = new plugins.pdf2json(); | ||||
|     pdfParser.on('pdfParser_dataReady', (pdfData: any) => { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user