fix(SmartPdf): Fix buffer handling for PDF conversion and text extraction
This commit is contained in:
parent
29d3cbb0b6
commit
9908897aa2
@ -1,5 +1,12 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-02-25 - 3.2.2 - fix(SmartPdf)
|
||||
Fix buffer handling for PDF conversion and text extraction
|
||||
|
||||
- Ensure Uint8Array is converted to Node Buffer for PDF conversion.
|
||||
- Correct the PDF page viewport handling by using document dimensions.
|
||||
- Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer.
|
||||
|
||||
## 2025-02-25 - 3.2.1 - fix(SmartPdf)
|
||||
Fix type for extractTextFromPdfBuffer function
|
||||
|
||||
|
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartpdf',
|
||||
version: '3.2.1',
|
||||
version: '3.2.2',
|
||||
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ export class SmartPdf {
|
||||
this.externalBrowserBool = true;
|
||||
} else {
|
||||
this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
|
||||
forceNoSandbox: true,
|
||||
forceNoSandbox: false,
|
||||
});
|
||||
}
|
||||
|
||||
@ -104,6 +104,8 @@ export class SmartPdf {
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
delete this._candidates[pdfCandidate.pdfId];
|
||||
pdfCandidate.doneDeferred.resolve();
|
||||
@ -112,9 +114,9 @@ export class SmartPdf {
|
||||
id: pdfCandidate.pdfId,
|
||||
name: `${pdfCandidate.pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
}
|
||||
|
||||
@ -139,14 +141,16 @@ export class SmartPdf {
|
||||
printBackground: true,
|
||||
displayHeaderFooter: false,
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
}
|
||||
|
||||
@ -159,12 +163,20 @@ export class SmartPdf {
|
||||
await page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
const pdfId = plugins.smartunique.shortId();
|
||||
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||
return {
|
||||
documentHeight: document.body.scrollHeight,
|
||||
documentWidth: document.body.clientWidth,
|
||||
documentHeight: Math.max(
|
||||
document.body.scrollHeight,
|
||||
document.documentElement.scrollHeight
|
||||
) || 1200,
|
||||
documentWidth: Math.max(
|
||||
document.body.clientWidth,
|
||||
document.documentElement.clientWidth
|
||||
) || 1920,
|
||||
};
|
||||
});
|
||||
// Update viewport height to the full document height.
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: documentHeight,
|
||||
@ -177,14 +189,16 @@ export class SmartPdf {
|
||||
scale: 1,
|
||||
pageRanges: '1',
|
||||
});
|
||||
// Convert Uint8Array to Node Buffer
|
||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||
await page.close();
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
buffer: nodePdfBuffer,
|
||||
};
|
||||
}
|
||||
|
||||
@ -212,7 +226,7 @@ export class SmartPdf {
|
||||
};
|
||||
}
|
||||
|
||||
public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise<string> {
|
||||
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
|
||||
const deferred = plugins.smartpromise.defer<string>();
|
||||
const pdfParser: any = new plugins.pdf2json();
|
||||
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
|
||||
|
Loading…
x
Reference in New Issue
Block a user