fix(SmartPdf): Fix buffer handling for PDF conversion and text extraction

2025-02-25 18:22:06 +00:00
parent 29d3cbb0b6
commit 9908897aa2
3 changed files with 32 additions and 11 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,12 @@
 # Changelog

+## 2025-02-25 - 3.2.2 - fix(SmartPdf)
+Fix buffer handling for PDF conversion and text extraction
+
+- Ensure Uint8Array is converted to Node Buffer for PDF conversion.
+- Correct the PDF page viewport handling by using document dimensions.
+- Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer.
+
 ## 2025-02-25 - 3.2.1 - fix(SmartPdf)
 Fix type for extractTextFromPdfBuffer function

--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartpdf',
-  version: '3.2.1',
+  version: '3.2.2',
  description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
 }
--- a/ts/smartpdf.classes.smartpdf.ts
+++ b/ts/smartpdf.classes.smartpdf.ts
@@ -35,7 +35,7 @@ export class SmartPdf {
      this.externalBrowserBool = true;
    } else {
      this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
-        forceNoSandbox: true,
+        forceNoSandbox: false,
      });
    }

@@ -104,6 +104,8 @@ export class SmartPdf {
      printBackground: true,
      displayHeaderFooter: false,
    });
+    // Convert Uint8Array to Node Buffer
+    const nodePdfBuffer = Buffer.from(pdfBuffer);
    await page.close();
    delete this._candidates[pdfCandidate.pdfId];
    pdfCandidate.doneDeferred.resolve();
@@ -112,9 +114,9 @@ export class SmartPdf {
      id: pdfCandidate.pdfId,
      name: `${pdfCandidate.pdfId}.js`,
      metadata: {
-        textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
+        textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
      },
-      buffer: pdfBuffer,
+      buffer: nodePdfBuffer,
    };
  }

@@ -139,14 +141,16 @@ export class SmartPdf {
      printBackground: true,
      displayHeaderFooter: false,
    });
+    // Convert Uint8Array to Node Buffer
+    const nodePdfBuffer = Buffer.from(pdfBuffer);
    await page.close();
    return {
      id: pdfId,
      name: `${pdfId}.js`,
      metadata: {
-        textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
+        textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
      },
-      buffer: pdfBuffer,
+      buffer: nodePdfBuffer,
    };
  }

@@ -159,12 +163,20 @@ export class SmartPdf {
    await page.emulateMediaType('screen');
    const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
    const pdfId = plugins.smartunique.shortId();
+    // Use both document.body and document.documentElement to ensure we have a valid height and width.
    const { documentHeight, documentWidth } = await page.evaluate(() => {
      return {
-        documentHeight: document.body.scrollHeight,
-        documentWidth: document.body.clientWidth,
+        documentHeight: Math.max(
+          document.body.scrollHeight,
+          document.documentElement.scrollHeight
+        ) || 1200,
+        documentWidth: Math.max(
+          document.body.clientWidth,
+          document.documentElement.clientWidth
+        ) || 1920,
      };
    });
+    // Update viewport height to the full document height.
    await page.setViewport({
      width: 1920,
      height: documentHeight,
@@ -177,14 +189,16 @@ export class SmartPdf {
      scale: 1,
      pageRanges: '1',
    });
+    // Convert Uint8Array to Node Buffer
+    const nodePdfBuffer = Buffer.from(pdfBuffer);
    await page.close();
    return {
      id: pdfId,
      name: `${pdfId}.js`,
      metadata: {
-        textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
+        textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
      },
-      buffer: pdfBuffer,
+      buffer: nodePdfBuffer,
    };
  }

@@ -212,7 +226,7 @@ export class SmartPdf {
    };
  }

-  public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise<string> {
+  public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
    const deferred = plugins.smartpromise.defer<string>();
    const pdfParser: any = new plugins.pdf2json();
    pdfParser.on('pdfParser_dataReady', (pdfData: any) => {