Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
016e0db797 | |||
4cf8b2e1f8 | |||
67b0aa9d47 | |||
567c6eafea | |||
ff890fb2af | |||
a512fd64b5 | |||
377318a62a | |||
671c871304 | |||
e0cc6b5655 | |||
e74b44b49c | |||
d6f0d88d4a | |||
9674e5b8dc |
6404
package-lock.json
generated
6404
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
17
package.json
17
package.json
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@pushrocks/smartpdf",
|
"name": "@pushrocks/smartpdf",
|
||||||
"version": "2.0.10",
|
"version": "2.0.16",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "create pdfs on the fly",
|
"description": "create pdfs on the fly",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
@ -14,21 +14,22 @@
|
|||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@gitzone/tsbuild": "^2.1.28",
|
"@gitzone/tsbuild": "^2.1.28",
|
||||||
"@gitzone/tsrun": "^1.2.18",
|
"@gitzone/tsrun": "^1.2.18",
|
||||||
"@gitzone/tstest": "^1.0.59",
|
"@gitzone/tstest": "^1.0.60",
|
||||||
"@pushrocks/tapbundle": "^3.2.14",
|
"@pushrocks/tapbundle": "^3.2.15",
|
||||||
"@types/node": "^16.10.5",
|
"@types/node": "^17.0.8",
|
||||||
"tslint": "^6.1.3",
|
"tslint": "^6.1.3",
|
||||||
"tslint-config-prettier": "^1.18.0"
|
"tslint-config-prettier": "^1.18.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@pushrocks/smartfile": "^8.0.10",
|
"@pushrocks/smartfile": "^9.0.5",
|
||||||
"@pushrocks/smartnetwork": "^2.0.10",
|
"@pushrocks/smartnetwork": "^2.0.10",
|
||||||
"@pushrocks/smartpromise": "^3.1.6",
|
"@pushrocks/smartpromise": "^3.1.6",
|
||||||
"@pushrocks/smartpuppeteer": "^1.0.27",
|
"@pushrocks/smartpuppeteer": "^1.0.36",
|
||||||
"@pushrocks/smartunique": "^3.0.3",
|
"@pushrocks/smartunique": "^3.0.3",
|
||||||
"@types/express": "^4.17.13",
|
"@types/express": "^4.17.13",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.2",
|
||||||
"pdf-merger-js": "^3.2.1"
|
"pdf-merger-js": "^3.2.1",
|
||||||
|
"pdf2json": "^2.0.0"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"ts/**/*",
|
"ts/**/*",
|
||||||
|
@ -16,6 +16,10 @@ tap.test('should create a pdf from html string', async () => {
|
|||||||
await testSmartPdf.getPdfResultForHtmlString('hi');
|
await testSmartPdf.getPdfResultForHtmlString('hi');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
tap.test('should create a pdf from html string', async () => {
|
||||||
|
await testSmartPdf.getPdfResultForHtmlString('hi');
|
||||||
|
});
|
||||||
|
|
||||||
tap.test('should create a pdf from website as A4', async () => {
|
tap.test('should create a pdf from website as A4', async () => {
|
||||||
await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
||||||
});
|
});
|
||||||
@ -36,7 +40,7 @@ tap.test('should create a valid PDFResult', async () => {
|
|||||||
fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer);
|
fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer);
|
||||||
};
|
};
|
||||||
await writePDfToDisk('https://maintainedby.lossless.com/', '1.pdf')
|
await writePDfToDisk('https://maintainedby.lossless.com/', '1.pdf')
|
||||||
await writePDfToDisk('https://lossless.com/', '2.pdf')
|
await writePDfToDisk('https://rendertron.lossless.one/render/https://lossless.com', '2.pdf')
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should combine pdfs', async () => {
|
tap.test('should combine pdfs', async () => {
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
export interface IPdfResult {
|
export interface IPdfResult {
|
||||||
name: string;
|
name: string;
|
||||||
id: string;
|
id: string;
|
||||||
|
metadata: {
|
||||||
|
textExtraction: string;
|
||||||
|
};
|
||||||
buffer: Buffer;
|
buffer: Buffer;
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,10 @@ export class SmartPdf {
|
|||||||
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
||||||
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
|
await page.setViewport({
|
||||||
|
width: 794,
|
||||||
|
height: 1122,
|
||||||
|
});
|
||||||
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
|
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
|
||||||
waitUntil: 'networkidle2',
|
waitUntil: 'networkidle2',
|
||||||
});
|
});
|
||||||
@ -92,6 +96,9 @@ export class SmartPdf {
|
|||||||
return {
|
return {
|
||||||
id: pdfCandidate.pdfId,
|
id: pdfCandidate.pdfId,
|
||||||
name: `${pdfCandidate.pdfId}.js`,
|
name: `${pdfCandidate.pdfId}.js`,
|
||||||
|
metadata: {
|
||||||
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -119,11 +126,14 @@ export class SmartPdf {
|
|||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
|
metadata: {
|
||||||
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async getFullWebsiteAsSinglePdf(websiteUrl: string) {
|
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<interfaces.IPdfResult> {
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
page.emulateMediaType('screen');
|
page.emulateMediaType('screen');
|
||||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||||
@ -146,6 +156,9 @@ export class SmartPdf {
|
|||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
|
metadata: {
|
||||||
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
|
},
|
||||||
buffer: pdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -157,4 +170,23 @@ export class SmartPdf {
|
|||||||
}
|
}
|
||||||
return merger.saveAsBuffer();
|
return merger.saveAsBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
|
||||||
|
const deferred = plugins.smartpromise.defer<string>();
|
||||||
|
const pdfParser: any = new plugins.pdf2json();
|
||||||
|
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
|
||||||
|
let finalText = ''
|
||||||
|
for (const page of pdfData.Pages) {
|
||||||
|
for(const text of page.Texts) {
|
||||||
|
for (const letter of text.R) {
|
||||||
|
finalText = finalText + letter.T;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
deferred.resolve(finalText);
|
||||||
|
});
|
||||||
|
pdfParser.parseBuffer(pdfBufferArg);
|
||||||
|
return deferred.promise;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,8 @@ export { smartfile, smartpromise, smartpuppeteer, smartunique, smartnetwork };
|
|||||||
|
|
||||||
// thirdparty
|
// thirdparty
|
||||||
import pdfMerger from 'pdf-merger-js';
|
import pdfMerger from 'pdf-merger-js';
|
||||||
|
// @ts-ignore
|
||||||
|
import pdf2json from 'pdf2json';
|
||||||
import express from 'express';
|
import express from 'express';
|
||||||
|
|
||||||
export { pdfMerger, express };
|
export { pdfMerger, pdf2json, express };
|
||||||
|
Reference in New Issue
Block a user