fix(core): update
This commit is contained in:
parent
ff890fb2af
commit
567c6eafea
44
package-lock.json
generated
44
package-lock.json
generated
@ -16,7 +16,8 @@
|
||||
"@pushrocks/smartunique": "^3.0.3",
|
||||
"@types/express": "^4.17.13",
|
||||
"express": "^4.17.2",
|
||||
"pdf-merger-js": "^3.2.1"
|
||||
"pdf-merger-js": "^3.2.1",
|
||||
"pdf2json": "^2.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@gitzone/tsbuild": "^2.1.28",
|
||||
@ -10978,6 +10979,33 @@
|
||||
"pdfjs": "^2.4.5"
|
||||
}
|
||||
},
|
||||
"node_modules/pdf2json": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://verdaccio.lossless.one/pdf2json/-/pdf2json-2.0.0.tgz",
|
||||
"integrity": "sha512-+FZy7GSvLOLc+zksg0SoMvXqIqcku5lBlEPuYJJkhMWB2x6yfthzEhhSbZc20UheClMPagH/+NXnMRbvQMQR1w==",
|
||||
"bundleDependencies": [
|
||||
"@xmldom/xmldom"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@xmldom/xmldom": "^0.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"pdf2json": "bin/pdf2json"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.18.0",
|
||||
"npm": ">=6.14.15"
|
||||
}
|
||||
},
|
||||
"node_modules/pdf2json/node_modules/@xmldom/xmldom": {
|
||||
"version": "0.7.5",
|
||||
"inBundle": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/pdfjs": {
|
||||
"version": "2.4.6",
|
||||
"resolved": "https://verdaccio.lossless.one/pdfjs/-/pdfjs-2.4.6.tgz",
|
||||
@ -23633,6 +23661,20 @@
|
||||
"pdfjs": "^2.4.5"
|
||||
}
|
||||
},
|
||||
"pdf2json": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://verdaccio.lossless.one/pdf2json/-/pdf2json-2.0.0.tgz",
|
||||
"integrity": "sha512-+FZy7GSvLOLc+zksg0SoMvXqIqcku5lBlEPuYJJkhMWB2x6yfthzEhhSbZc20UheClMPagH/+NXnMRbvQMQR1w==",
|
||||
"requires": {
|
||||
"@xmldom/xmldom": "^0.7.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@xmldom/xmldom": {
|
||||
"version": "0.7.5",
|
||||
"bundled": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"pdfjs": {
|
||||
"version": "2.4.6",
|
||||
"resolved": "https://verdaccio.lossless.one/pdfjs/-/pdfjs-2.4.6.tgz",
|
||||
|
@ -28,7 +28,8 @@
|
||||
"@pushrocks/smartunique": "^3.0.3",
|
||||
"@types/express": "^4.17.13",
|
||||
"express": "^4.17.2",
|
||||
"pdf-merger-js": "^3.2.1"
|
||||
"pdf-merger-js": "^3.2.1",
|
||||
"pdf2json": "^2.0.0"
|
||||
},
|
||||
"files": [
|
||||
"ts/**/*",
|
||||
|
@ -16,6 +16,10 @@ tap.test('should create a pdf from html string', async () => {
|
||||
await testSmartPdf.getPdfResultForHtmlString('hi');
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from html string', async () => {
|
||||
await testSmartPdf.getPdfResultForHtmlString('hi');
|
||||
});
|
||||
|
||||
tap.test('should create a pdf from website as A4', async () => {
|
||||
await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
||||
});
|
||||
|
@ -1,5 +1,8 @@
|
||||
export interface IPdfResult {
|
||||
name: string;
|
||||
id: string;
|
||||
metadata: {
|
||||
textExtraction: string;
|
||||
};
|
||||
buffer: Buffer;
|
||||
}
|
||||
|
@ -70,8 +70,8 @@ export class SmartPdf {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
await page.setViewport({
|
||||
width: 794,
|
||||
height: 1122
|
||||
})
|
||||
height: 1122,
|
||||
});
|
||||
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
|
||||
waitUntil: 'networkidle2',
|
||||
});
|
||||
@ -96,6 +96,9 @@ export class SmartPdf {
|
||||
return {
|
||||
id: pdfCandidate.pdfId,
|
||||
name: `${pdfCandidate.pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
};
|
||||
}
|
||||
@ -123,11 +126,14 @@ export class SmartPdf {
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
};
|
||||
}
|
||||
|
||||
async getFullWebsiteAsSinglePdf(websiteUrl: string) {
|
||||
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<interfaces.IPdfResult> {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
page.emulateMediaType('screen');
|
||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||
@ -150,6 +156,9 @@ export class SmartPdf {
|
||||
return {
|
||||
id: pdfId,
|
||||
name: `${pdfId}.js`,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||
},
|
||||
buffer: pdfBuffer,
|
||||
};
|
||||
}
|
||||
@ -161,4 +170,23 @@ export class SmartPdf {
|
||||
}
|
||||
return merger.saveAsBuffer();
|
||||
}
|
||||
|
||||
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
|
||||
const deferred = plugins.smartpromise.defer<string>();
|
||||
const pdfParser: any = new plugins.pdf2json();
|
||||
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
|
||||
let finalText = ''
|
||||
for (const page of pdfData.Pages) {
|
||||
for(const text of page.Texts) {
|
||||
for (const letter of text.R) {
|
||||
finalText = finalText + letter.T;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
deferred.resolve(finalText);
|
||||
});
|
||||
pdfParser.parseBuffer(pdfBufferArg);
|
||||
return deferred.promise;
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,8 @@ export { smartfile, smartpromise, smartpuppeteer, smartunique, smartnetwork };
|
||||
|
||||
// thirdparty
|
||||
import pdfMerger from 'pdf-merger-js';
|
||||
// @ts-ignore
|
||||
import pdf2json from 'pdf2json';
|
||||
import express from 'express';
|
||||
|
||||
export { pdfMerger, express };
|
||||
export { pdfMerger, pdf2json, express };
|
||||
|
Loading…
Reference in New Issue
Block a user