fix(core): update

This commit is contained in:
2022-01-05 17:20:28 +01:00
parent ff890fb2af
commit 567c6eafea
6 changed files with 86 additions and 6 deletions

View File

@@ -1,5 +1,8 @@
export interface IPdfResult {
name: string;
id: string;
metadata: {
textExtraction: string;
};
buffer: Buffer;
}

View File

@@ -70,8 +70,8 @@ export class SmartPdf {
const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 794,
height: 1122
})
height: 1122,
});
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
waitUntil: 'networkidle2',
});
@@ -96,6 +96,9 @@ export class SmartPdf {
return {
id: pdfCandidate.pdfId,
name: `${pdfCandidate.pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer,
};
}
@@ -123,11 +126,14 @@ export class SmartPdf {
return {
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer,
};
}
async getFullWebsiteAsSinglePdf(websiteUrl: string) {
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<interfaces.IPdfResult> {
const page = await this.headlessBrowser.newPage();
page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
@@ -150,6 +156,9 @@ export class SmartPdf {
return {
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
},
buffer: pdfBuffer,
};
}
@@ -161,4 +170,23 @@ export class SmartPdf {
}
return merger.saveAsBuffer();
}
public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> {
const deferred = plugins.smartpromise.defer<string>();
const pdfParser: any = new plugins.pdf2json();
pdfParser.on('pdfParser_dataReady', (pdfData: any) => {
let finalText = ''
for (const page of pdfData.Pages) {
for(const text of page.Texts) {
for (const letter of text.R) {
finalText = finalText + letter.T;
}
};
}
deferred.resolve(finalText);
});
pdfParser.parseBuffer(pdfBufferArg);
return deferred.promise;
}
}

View File

@@ -15,6 +15,8 @@ export { smartfile, smartpromise, smartpuppeteer, smartunique, smartnetwork };
// thirdparty
import pdfMerger from 'pdf-merger-js';
// @ts-ignore
import pdf2json from 'pdf2json';
import express from 'express';
export { pdfMerger, express };
export { pdfMerger, pdf2json, express };