Compare commits

..

16 Commits

Author SHA1 Message Date
796aa905d2 2.0.18 2022-01-06 13:23:05 +01:00
253fb95143 fix(core): update 2022-01-06 13:23:04 +01:00
f3ea075b72 2.0.17 2022-01-06 13:10:12 +01:00
af725a7f78 fix(core): update 2022-01-06 13:10:12 +01:00
016e0db797 2.0.16 2022-01-05 23:55:38 +01:00
4cf8b2e1f8 fix(core): update 2022-01-05 23:55:37 +01:00
67b0aa9d47 2.0.15 2022-01-05 17:20:28 +01:00
567c6eafea fix(core): update 2022-01-05 17:20:28 +01:00
ff890fb2af 2.0.14 2022-01-05 16:32:48 +01:00
a512fd64b5 fix(core): update 2022-01-05 16:32:47 +01:00
377318a62a 2.0.13 2022-01-05 14:19:40 +01:00
671c871304 fix(core): update 2022-01-05 14:19:39 +01:00
e0cc6b5655 2.0.12 2022-01-05 14:17:43 +01:00
e74b44b49c fix(core): update 2022-01-05 14:17:43 +01:00
d6f0d88d4a 2.0.11 2021-10-14 16:04:58 +02:00
9674e5b8dc fix(core): update 2021-10-14 16:04:58 +02:00
6 changed files with 3461 additions and 3101 deletions

6462
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"name": "@pushrocks/smartpdf", "name": "@pushrocks/smartpdf",
"version": "2.0.10", "version": "2.0.18",
"private": false, "private": false,
"description": "create pdfs on the fly", "description": "create pdfs on the fly",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
@ -14,21 +14,22 @@
"devDependencies": { "devDependencies": {
"@gitzone/tsbuild": "^2.1.28", "@gitzone/tsbuild": "^2.1.28",
"@gitzone/tsrun": "^1.2.18", "@gitzone/tsrun": "^1.2.18",
"@gitzone/tstest": "^1.0.59", "@gitzone/tstest": "^1.0.60",
"@pushrocks/tapbundle": "^3.2.14", "@pushrocks/tapbundle": "^3.2.15",
"@types/node": "^16.10.5", "@types/node": "^17.0.8",
"tslint": "^6.1.3", "tslint": "^6.1.3",
"tslint-config-prettier": "^1.18.0" "tslint-config-prettier": "^1.18.0"
}, },
"dependencies": { "dependencies": {
"@pushrocks/smartfile": "^8.0.10", "@pushrocks/smartfile": "^9.0.5",
"@pushrocks/smartnetwork": "^2.0.10", "@pushrocks/smartnetwork": "^2.0.10",
"@pushrocks/smartpromise": "^3.1.6", "@pushrocks/smartpromise": "^3.1.6",
"@pushrocks/smartpuppeteer": "^1.0.27", "@pushrocks/smartpuppeteer": "^1.0.36",
"@pushrocks/smartunique": "^3.0.3", "@pushrocks/smartunique": "^3.0.3",
"@types/express": "^4.17.13", "@types/express": "^4.17.13",
"express": "^4.17.1", "express": "^4.17.2",
"pdf-merger-js": "^3.2.1" "pdf-merger-js": "^3.2.1",
"pdf2json": "^2.0.0"
}, },
"files": [ "files": [
"ts/**/*", "ts/**/*",

View File

@ -13,7 +13,11 @@ tap.test('should start the instance', async () => {
}); });
tap.test('should create a pdf from html string', async () => { tap.test('should create a pdf from html string', async () => {
await testSmartPdf.getPdfResultForHtmlString('hi'); await testSmartPdf.getA4PdfResultForHtmlString('hi');
});
tap.test('should create a pdf from html string', async () => {
await testSmartPdf.getA4PdfResultForHtmlString('hi');
}); });
tap.test('should create a pdf from website as A4', async () => { tap.test('should create a pdf from website as A4', async () => {
@ -36,7 +40,7 @@ tap.test('should create a valid PDFResult', async () => {
fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer); fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer);
}; };
await writePDfToDisk('https://maintainedby.lossless.com/', '1.pdf') await writePDfToDisk('https://maintainedby.lossless.com/', '1.pdf')
await writePDfToDisk('https://lossless.com/', '2.pdf') await writePDfToDisk('https://rendertron.lossless.one/render/https://lossless.com', '2.pdf')
}); });
tap.test('should combine pdfs', async () => { tap.test('should combine pdfs', async () => {

View File

@ -1,5 +1,8 @@
export interface IPdfResult { export interface IPdfResult {
name: string; name: string;
id: string; id: string;
metadata: {
textExtraction: string;
};
buffer: Buffer; buffer: Buffer;
} }

View File

@ -63,11 +63,15 @@ export class SmartPdf {
/** /**
* returns a pdf for a given html string; * returns a pdf for a given html string;
*/ */
async getPdfResultForHtmlString(htmlStringArg: string): Promise<interfaces.IPdfResult> { async getA4PdfResultForHtmlString(htmlStringArg: string): Promise<interfaces.IPdfResult> {
await this._readyDeferred.promise; await this._readyDeferred.promise;
const pdfCandidate = new PdfCandidate(htmlStringArg); const pdfCandidate = new PdfCandidate(htmlStringArg);
this._candidates[pdfCandidate.pdfId] = pdfCandidate; this._candidates[pdfCandidate.pdfId] = pdfCandidate;
const page = await this.headlessBrowser.newPage(); const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 794,
height: 1122,
});
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, { const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
waitUntil: 'networkidle2', waitUntil: 'networkidle2',
}); });
@ -80,10 +84,10 @@ export class SmartPdf {
} }
const pdfBuffer = await page.pdf({ const pdfBuffer = await page.pdf({
format: 'a4', width: 794,
height: 1122,
printBackground: true, printBackground: true,
displayHeaderFooter: false, displayHeaderFooter: false,
preferCSSPageSize: true,
}); });
await page.close(); await page.close();
delete this._candidates[pdfCandidate.pdfId]; delete this._candidates[pdfCandidate.pdfId];
@ -92,12 +96,19 @@ export class SmartPdf {
return { return {
id: pdfCandidate.pdfId, id: pdfCandidate.pdfId,
name: `${pdfCandidate.pdfId}.js`, name: `${pdfCandidate.pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer, buffer: pdfBuffer,
}; };
} }
async getPdfResultForWebsite(websiteUrl: string): Promise<interfaces.IPdfResult> { async getPdfResultForWebsite(websiteUrl: string): Promise<interfaces.IPdfResult> {
const page = await this.headlessBrowser.newPage(); const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 1980,
height: 1200,
});
await page.emulateMediaType('screen'); await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId(); const pdfId = plugins.smartunique.shortId();
@ -108,44 +119,56 @@ export class SmartPdf {
}; };
}); });
const pdfBuffer = await page.pdf({ const pdfBuffer = await page.pdf({
format: 'a4', height: documentHeight,
height: documentWidth,
width: documentWidth, width: documentWidth,
printBackground: true, printBackground: true,
displayHeaderFooter: false, displayHeaderFooter: false,
preferCSSPageSize: true,
}); });
await page.close(); await page.close();
return { return {
id: pdfId, id: pdfId,
name: `${pdfId}.js`, name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer, buffer: pdfBuffer,
}; };
} }
async getFullWebsiteAsSinglePdf(websiteUrl: string) { async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<interfaces.IPdfResult> {
const page = await this.headlessBrowser.newPage(); const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 1920,
height: 1200,
});
page.emulateMediaType('screen'); page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId(); const pdfId = plugins.smartunique.shortId();
const { documentHeight, documentWidth } = await page.evaluate(() => { const { documentHeight, documentWidth } = await page.evaluate(() => {
return { return {
documentHeight: document.height, documentHeight: document.body.scrollHeight,
documentWidth: document.width, documentWidth: document.body.clientWidth,
}; };
}); });
await page.setViewport({
width: 1920,
height: documentHeight,
});
const pdfBuffer = await page.pdf({ const pdfBuffer = await page.pdf({
format: 'a4', height: documentHeight,
height: documentWidth, width: 1920,
width: documentWidth,
printBackground: true, printBackground: true,
displayHeaderFooter: false, displayHeaderFooter: false,
preferCSSPageSize: true, scale: 1,
pageRanges: '1'
}); });
await page.close(); await page.close();
return { return {
id: pdfId, id: pdfId,
name: `${pdfId}.js`, name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer, buffer: pdfBuffer,
}; };
} }
@ -157,4 +180,23 @@ export class SmartPdf {
} }
return merger.saveAsBuffer(); return merger.saveAsBuffer();
} }
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
const deferred = plugins.smartpromise.defer<string>();
const pdfParser: any = new plugins.pdf2json();
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
let finalText = ''
for (const page of pdfData.Pages) {
for(const text of page.Texts) {
for (const letter of text.R) {
finalText = finalText + letter.T;
}
};
}
deferred.resolve(finalText);
});
pdfParser.parseBuffer(pdfBufferArg);
return deferred.promise;
}
} }

View File

@ -15,6 +15,8 @@ export { smartfile, smartpromise, smartpuppeteer, smartunique, smartnetwork };
// thirdparty // thirdparty
import pdfMerger from 'pdf-merger-js'; import pdfMerger from 'pdf-merger-js';
// @ts-ignore
import pdf2json from 'pdf2json';
import express from 'express'; import express from 'express';
export { pdfMerger, express }; export { pdfMerger, pdf2json, express };