import * as plugins from './smartpdf.plugins.js'; import * as paths from './smartpdf.paths.js'; import { Server } from 'http'; import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js'; import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js'; import { execFile } from 'child_process'; declare const document: any; export class SmartPdf { // STATIC public static async create() { const smartpdfInstance = new SmartPdf(); return smartpdfInstance; } // INSTANCE htmlServerInstance: Server; serverPort: number; headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser; externalBrowserBool: boolean = false; private _readyDeferred: plugins.smartpromise.Deferred; private _candidates: { [key: string]: PdfCandidate } = {}; constructor() { this._readyDeferred = new plugins.smartpromise.Deferred(); } async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) { const done = plugins.smartpromise.defer(); // lets set the external browser in case one is provided this.headlessBrowser = headlessBrowserArg; // setup puppeteer if (this.headlessBrowser) { this.externalBrowserBool = true; } else { this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({ forceNoSandbox: true, }); } // setup server const app = plugins.express(); app.get('/:pdfId', (req, res) => { const wantedCandidate = this._candidates[req.params.pdfId]; if (!wantedCandidate) { console.log(`${req.url} not attached to a candidate`); return; } res.setHeader('pdf-id', wantedCandidate.pdfId); res.send(wantedCandidate.htmlString); }); this.htmlServerInstance = plugins.http.createServer(app); const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork(); const portAvailable = smartnetworkInstance.isLocalPortUnused(3210); this.htmlServerInstance.listen(3210, 'localhost'); this.htmlServerInstance.on('listening', () => { this._readyDeferred.resolve(); done.resolve(); }); await done.promise; } // stop async stop() { const done = plugins.smartpromise.defer(); this.htmlServerInstance.close(() => { done.resolve(); }); if (!this.externalBrowserBool) { await this.headlessBrowser.close(); } await done.promise; } /** * Returns a PDF for a given HTML string. */ async getA4PdfResultForHtmlString(htmlStringArg: string): Promise { await this._readyDeferred.promise; const pdfCandidate = new PdfCandidate(htmlStringArg); this._candidates[pdfCandidate.pdfId] = pdfCandidate; const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 794, height: 1122, }); const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, { waitUntil: 'networkidle2', }); const headers = response.headers(); if (headers['pdf-id'] !== pdfCandidate.pdfId) { console.log('Error! Headers do not match. For security reasons no pdf is being emitted!'); return; } else { console.log(`id security check passed for ${pdfCandidate.pdfId}`); } const pdfBuffer = await page.pdf({ width: 794, height: 1122, printBackground: true, displayHeaderFooter: false, }); await page.close(); delete this._candidates[pdfCandidate.pdfId]; pdfCandidate.doneDeferred.resolve(); await pdfCandidate.doneDeferred.promise; return { id: pdfCandidate.pdfId, name: `${pdfCandidate.pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } async getPdfResultForWebsite(websiteUrl: string): Promise { const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 1980, height: 1200, }); await page.emulateMediaType('screen'); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const pdfId = plugins.smartunique.shortId(); const { documentHeight, documentWidth } = await page.evaluate(() => { return { documentHeight: document.height, documentWidth: document.width, }; }); const pdfBuffer = await page.pdf({ height: documentHeight, width: documentWidth, printBackground: true, displayHeaderFooter: false, }); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise { const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 1920, height: 1200, }); await page.emulateMediaType('screen'); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const pdfId = plugins.smartunique.shortId(); const { documentHeight, documentWidth } = await page.evaluate(() => { return { documentHeight: document.body.scrollHeight, documentWidth: document.body.clientWidth, }; }); await page.setViewport({ width: 1920, height: documentHeight, }); const pdfBuffer = await page.pdf({ height: documentHeight, width: 1920, printBackground: true, displayHeaderFooter: false, scale: 1, pageRanges: '1', }); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise { const mergedPdf = await plugins.pdfLib.PDFDocument.create(); for (const pdfBytes of inputPdfBuffers) { const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes); const pages = await mergedPdf.copyPages(pdfDoc, pdfDoc.getPageIndices()); pages.forEach((page) => mergedPdf.addPage(page)); } const mergedPdfBytes = await mergedPdf.save(); return mergedPdfBytes; } public async readFileToPdfObject(pathArg: string): Promise { const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg); const parsedPath = plugins.path.parse(absolutePath); const buffer = await plugins.smartfile.fs.toBuffer(absolutePath); return { name: parsedPath.base, buffer, id: null, metadata: null, }; } public async extractTextFromPdfBuffer(pdfBufferArg: Uint8Array): Promise { const deferred = plugins.smartpromise.defer(); const pdfParser: any = new plugins.pdf2json(); pdfParser.on('pdfParser_dataReady', (pdfData: any) => { let finalText = ''; for (const page of pdfData.Pages) { for (const text of page.Texts) { for (const letter of text.R) { finalText = finalText + letter.T; } } } deferred.resolve(finalText); }); pdfParser.parseBuffer(pdfBufferArg); return deferred.promise; } /** * Checks for the presence of required dependencies: GraphicsMagick and Ghostscript. */ private async checkDependencies(): Promise { await Promise.all([ this.checkCommandExists('gm', ['version']), this.checkCommandExists('gs', ['--version']), ]); } /** * Checks if a given command exists by trying to execute it. */ private checkCommandExists(command: string, args: string[]): Promise { return new Promise((resolve, reject) => { execFile(command, args, (error, stdout, stderr) => { if (error) { reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`)); } else { resolve(); } }); }); } /** * Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js. * This method creates a temporary HTML page that loads PDF.js from a CDN, * renders each PDF page to a canvas, and then screenshots each canvas element. */ public async convertPDFToPngBytes( pdfBytes: Uint8Array, options: { width?: number; height?: number; quality?: number } = {} ): Promise { // Note: options.width, options.height, and options.quality are not applied here, // as the rendered canvas size is determined by the PDF page dimensions. // Create a new page using the headless browser. const page = await this.headlessBrowser.newPage(); // Prepare PDF data as a base64 string. const base64Pdf: string = Buffer.from(pdfBytes).toString('base64'); // HTML template that loads PDF.js and renders the PDF. const htmlTemplate: string = ` PDF to PNG Converter `; // Replace the placeholder with the actual base64 PDF data. const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf); // Set the page content. await page.setContent(htmlContent, { waitUntil: 'networkidle0' }); // Wait until the PDF.js rendering is complete. await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 }); // Query all canvas elements (each representing a rendered PDF page). const canvasElements = await page.$$('canvas'); const pngBuffers: Uint8Array[] = []; for (const canvasElement of canvasElements) { // Screenshot the canvas element. The screenshot will be a PNG buffer. const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer; pngBuffers.push(new Uint8Array(screenshotBuffer)); } await page.close(); return pngBuffers; } }