fix(SmartPdf): Fix buffer handling for PDF conversion and text extraction
This commit is contained in:
parent
29d3cbb0b6
commit
9908897aa2
@ -1,5 +1,12 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2025-02-25 - 3.2.2 - fix(SmartPdf)
|
||||||
|
Fix buffer handling for PDF conversion and text extraction
|
||||||
|
|
||||||
|
- Ensure Uint8Array is converted to Node Buffer for PDF conversion.
|
||||||
|
- Correct the PDF page viewport handling by using document dimensions.
|
||||||
|
- Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer.
|
||||||
|
|
||||||
## 2025-02-25 - 3.2.1 - fix(SmartPdf)
|
## 2025-02-25 - 3.2.1 - fix(SmartPdf)
|
||||||
Fix type for extractTextFromPdfBuffer function
|
Fix type for extractTextFromPdfBuffer function
|
||||||
|
|
||||||
|
@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartpdf',
|
name: '@push.rocks/smartpdf',
|
||||||
version: '3.2.1',
|
version: '3.2.2',
|
||||||
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ export class SmartPdf {
|
|||||||
this.externalBrowserBool = true;
|
this.externalBrowserBool = true;
|
||||||
} else {
|
} else {
|
||||||
this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
|
this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
|
||||||
forceNoSandbox: true,
|
forceNoSandbox: false,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,6 +104,8 @@ export class SmartPdf {
|
|||||||
printBackground: true,
|
printBackground: true,
|
||||||
displayHeaderFooter: false,
|
displayHeaderFooter: false,
|
||||||
});
|
});
|
||||||
|
// Convert Uint8Array to Node Buffer
|
||||||
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
await page.close();
|
await page.close();
|
||||||
delete this._candidates[pdfCandidate.pdfId];
|
delete this._candidates[pdfCandidate.pdfId];
|
||||||
pdfCandidate.doneDeferred.resolve();
|
pdfCandidate.doneDeferred.resolve();
|
||||||
@ -112,9 +114,9 @@ export class SmartPdf {
|
|||||||
id: pdfCandidate.pdfId,
|
id: pdfCandidate.pdfId,
|
||||||
name: `${pdfCandidate.pdfId}.js`,
|
name: `${pdfCandidate.pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: nodePdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,14 +141,16 @@ export class SmartPdf {
|
|||||||
printBackground: true,
|
printBackground: true,
|
||||||
displayHeaderFooter: false,
|
displayHeaderFooter: false,
|
||||||
});
|
});
|
||||||
|
// Convert Uint8Array to Node Buffer
|
||||||
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
await page.close();
|
await page.close();
|
||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: nodePdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,12 +163,20 @@ export class SmartPdf {
|
|||||||
await page.emulateMediaType('screen');
|
await page.emulateMediaType('screen');
|
||||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||||
const pdfId = plugins.smartunique.shortId();
|
const pdfId = plugins.smartunique.shortId();
|
||||||
|
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
||||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||||
return {
|
return {
|
||||||
documentHeight: document.body.scrollHeight,
|
documentHeight: Math.max(
|
||||||
documentWidth: document.body.clientWidth,
|
document.body.scrollHeight,
|
||||||
|
document.documentElement.scrollHeight
|
||||||
|
) || 1200,
|
||||||
|
documentWidth: Math.max(
|
||||||
|
document.body.clientWidth,
|
||||||
|
document.documentElement.clientWidth
|
||||||
|
) || 1920,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
// Update viewport height to the full document height.
|
||||||
await page.setViewport({
|
await page.setViewport({
|
||||||
width: 1920,
|
width: 1920,
|
||||||
height: documentHeight,
|
height: documentHeight,
|
||||||
@ -177,14 +189,16 @@ export class SmartPdf {
|
|||||||
scale: 1,
|
scale: 1,
|
||||||
pageRanges: '1',
|
pageRanges: '1',
|
||||||
});
|
});
|
||||||
|
// Convert Uint8Array to Node Buffer
|
||||||
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
await page.close();
|
await page.close();
|
||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: nodePdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,7 +226,7 @@ export class SmartPdf {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise<string> {
|
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
|
||||||
const deferred = plugins.smartpromise.defer<string>();
|
const deferred = plugins.smartpromise.defer<string>();
|
||||||
const pdfParser: any = new plugins.pdf2json();
|
const pdfParser: any = new plugins.pdf2json();
|
||||||
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
|
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user