import * as plugins from './smartpdf.plugins.js'; import * as paths from './smartpdf.paths.js'; import { Server } from 'http'; import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js'; import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js'; declare const document: any; export class SmartPdf { // STATIC public static async create() { const smartpdfInstance = new SmartPdf(); return smartpdfInstance; } // INSTANCE htmlServerInstance: Server; serverPort: number; headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser; externalBrowserBool: boolean = false; private _readyDeferred: plugins.smartpromise.Deferred; private _candidates: { [key: string]: PdfCandidate } = {}; constructor() { this._readyDeferred = new plugins.smartpromise.Deferred(); } async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) { const done = plugins.smartpromise.defer(); // lets set the external browser in case one is provided this.headlessBrowser = headlessBrowserArg; // setup puppeteer if (this.headlessBrowser) { this.externalBrowserBool = true; } else { this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({ forceNoSandbox: true, }); } // setup server const app = plugins.express(); app.get('/:pdfId', (req, res) => { res.setHeader('PDF-ID', this._candidates[req.params.pdfId].pdfId); res.send(this._candidates[req.params.pdfId].htmlString); }); this.htmlServerInstance = plugins.http.createServer(app); const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork(); const portAvailable = smartnetworkInstance.isLocalPortUnused(3210); this.htmlServerInstance.listen(3210, 'localhost'); this.htmlServerInstance.on('listening', () => { this._readyDeferred.resolve(); done.resolve(); }); await done.promise; } // stop async stop() { const done = plugins.smartpromise.defer(); this.htmlServerInstance.close(() => { done.resolve(); }); if (!this.externalBrowserBool) { await this.headlessBrowser.close(); } await done.promise; } /** * returns a pdf for a given html string; */ async getA4PdfResultForHtmlString(htmlStringArg: string): Promise { await this._readyDeferred.promise; const pdfCandidate = new PdfCandidate(htmlStringArg); this._candidates[pdfCandidate.pdfId] = pdfCandidate; const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 794, height: 1122, }); const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, { waitUntil: 'networkidle2', }); // await plugins.smartdelay.delayFor(1000); const headers = response.headers(); if (headers['pdf-id'] !== pdfCandidate.pdfId) { console.log('Error! Headers do not match. For security reasons no pdf is being emitted!'); return; } else { console.log(`id security check passed for ${pdfCandidate.pdfId}`); } const pdfBuffer = await page.pdf({ width: 794, height: 1122, printBackground: true, displayHeaderFooter: false, }); await page.close(); delete this._candidates[pdfCandidate.pdfId]; pdfCandidate.doneDeferred.resolve(); await pdfCandidate.doneDeferred.promise; return { id: pdfCandidate.pdfId, name: `${pdfCandidate.pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } async getPdfResultForWebsite(websiteUrl: string): Promise { const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 1980, height: 1200, }); await page.emulateMediaType('screen'); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const pdfId = plugins.smartunique.shortId(); const { documentHeight, documentWidth } = await page.evaluate(() => { return { documentHeight: document.height, documentWidth: document.width, }; }); const pdfBuffer = await page.pdf({ height: documentHeight, width: documentWidth, printBackground: true, displayHeaderFooter: false, }); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise { const page = await this.headlessBrowser.newPage(); await page.setViewport({ width: 1920, height: 1200, }); page.emulateMediaType('screen'); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const pdfId = plugins.smartunique.shortId(); const { documentHeight, documentWidth } = await page.evaluate(() => { return { documentHeight: document.body.scrollHeight, documentWidth: document.body.clientWidth, }; }); await page.setViewport({ width: 1920, height: documentHeight, }); const pdfBuffer = await page.pdf({ height: documentHeight, width: 1920, printBackground: true, displayHeaderFooter: false, scale: 1, pageRanges: '1', }); await page.close(); return { id: pdfId, name: `${pdfId}.js`, metadata: { textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), }, buffer: pdfBuffer, }; } public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise { const mergedPdf = await plugins.pdfLib.PDFDocument.create(); for (const pdfBytes of inputPdfBuffers) { const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes); const pages = await mergedPdf.copyPages(pdfDoc, pdfDoc.getPageIndices()); pages.forEach((page) => mergedPdf.addPage(page)); } const mergedPdfBytes = await mergedPdf.save(); return mergedPdfBytes; } public async readFileToPdfObject(pathArg: string): Promise { const path = plugins.smartpath.transform.makeAbsolute(pathArg); const parsedPath = plugins.path.parse(path); const buffer = await plugins.smartfile.fs.toBuffer(path); return { name: parsedPath.base, buffer, id: null, metadata: null, }; } public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise { const deferred = plugins.smartpromise.defer(); const pdfParser: any = new plugins.pdf2json(); pdfParser.on('pdfParser_dataReady', (pdfData: any) => { let finalText = ''; for (const page of pdfData.Pages) { for (const text of page.Texts) { for (const letter of text.R) { finalText = finalText + letter.T; } } } deferred.resolve(finalText); }); pdfParser.parseBuffer(pdfBufferArg); return deferred.promise; } public async convertPDFToPngBytes( pdfBytes: Uint8Array, options: { width?: number; height?: number; quality?: number; } = {} ) { const { width = 1024, height = 768, quality = 100 } = options; // Load the PDF document const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes); const converter = plugins.pdf2pic.fromBuffer(Buffer.from(pdfBytes), { density: 100, // Image density (DPI) format: 'png', // Image format width, // Output image width height, // Output image height quality, // Output image quality }); // Get array promises that resolve to JPG buffers const imagePromises: Promise[] = []; const numPages = pdfDoc.getPageCount(); for (let i = 0; i < numPages; i++) { imagePromises.push(converter(i + 1, { responseType: 'buffer', }).then((output) => output.buffer)); } // Resolve all promises and return the array of buffers const imageBuffers = await Promise.all(imagePromises); const imageUint8Arrays = imageBuffers.map((buffer) => buffer); return imageUint8Arrays; } }