fix(core): update
This commit is contained in:
		
							
								
								
									
										44
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										44
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							| @@ -16,7 +16,8 @@ | ||||
|         "@pushrocks/smartunique": "^3.0.3", | ||||
|         "@types/express": "^4.17.13", | ||||
|         "express": "^4.17.2", | ||||
|         "pdf-merger-js": "^3.2.1" | ||||
|         "pdf-merger-js": "^3.2.1", | ||||
|         "pdf2json": "^2.0.0" | ||||
|       }, | ||||
|       "devDependencies": { | ||||
|         "@gitzone/tsbuild": "^2.1.28", | ||||
| @@ -10978,6 +10979,33 @@ | ||||
|         "pdfjs": "^2.4.5" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/pdf2json": { | ||||
|       "version": "2.0.0", | ||||
|       "resolved": "https://verdaccio.lossless.one/pdf2json/-/pdf2json-2.0.0.tgz", | ||||
|       "integrity": "sha512-+FZy7GSvLOLc+zksg0SoMvXqIqcku5lBlEPuYJJkhMWB2x6yfthzEhhSbZc20UheClMPagH/+NXnMRbvQMQR1w==", | ||||
|       "bundleDependencies": [ | ||||
|         "@xmldom/xmldom" | ||||
|       ], | ||||
|       "license": "Apache-2.0", | ||||
|       "dependencies": { | ||||
|         "@xmldom/xmldom": "^0.7.5" | ||||
|       }, | ||||
|       "bin": { | ||||
|         "pdf2json": "bin/pdf2json" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=14.18.0", | ||||
|         "npm": ">=6.14.15" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/pdf2json/node_modules/@xmldom/xmldom": { | ||||
|       "version": "0.7.5", | ||||
|       "inBundle": true, | ||||
|       "license": "MIT", | ||||
|       "engines": { | ||||
|         "node": ">=10.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/pdfjs": { | ||||
|       "version": "2.4.6", | ||||
|       "resolved": "https://verdaccio.lossless.one/pdfjs/-/pdfjs-2.4.6.tgz", | ||||
| @@ -23633,6 +23661,20 @@ | ||||
|         "pdfjs": "^2.4.5" | ||||
|       } | ||||
|     }, | ||||
|     "pdf2json": { | ||||
|       "version": "2.0.0", | ||||
|       "resolved": "https://verdaccio.lossless.one/pdf2json/-/pdf2json-2.0.0.tgz", | ||||
|       "integrity": "sha512-+FZy7GSvLOLc+zksg0SoMvXqIqcku5lBlEPuYJJkhMWB2x6yfthzEhhSbZc20UheClMPagH/+NXnMRbvQMQR1w==", | ||||
|       "requires": { | ||||
|         "@xmldom/xmldom": "^0.7.5" | ||||
|       }, | ||||
|       "dependencies": { | ||||
|         "@xmldom/xmldom": { | ||||
|           "version": "0.7.5", | ||||
|           "bundled": true | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "pdfjs": { | ||||
|       "version": "2.4.6", | ||||
|       "resolved": "https://verdaccio.lossless.one/pdfjs/-/pdfjs-2.4.6.tgz", | ||||
|   | ||||
| @@ -28,7 +28,8 @@ | ||||
|     "@pushrocks/smartunique": "^3.0.3", | ||||
|     "@types/express": "^4.17.13", | ||||
|     "express": "^4.17.2", | ||||
|     "pdf-merger-js": "^3.2.1" | ||||
|     "pdf-merger-js": "^3.2.1", | ||||
|     "pdf2json": "^2.0.0" | ||||
|   }, | ||||
|   "files": [ | ||||
|     "ts/**/*", | ||||
|   | ||||
| @@ -16,6 +16,10 @@ tap.test('should create a pdf from html string', async () => { | ||||
|   await testSmartPdf.getPdfResultForHtmlString('hi'); | ||||
| }); | ||||
|  | ||||
| tap.test('should create a pdf from html string', async () => { | ||||
|   await testSmartPdf.getPdfResultForHtmlString('hi'); | ||||
| }); | ||||
|  | ||||
| tap.test('should create a pdf from website as A4', async () => { | ||||
|   await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org'); | ||||
| }); | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| export interface IPdfResult { | ||||
|   name: string; | ||||
|   id: string; | ||||
|   metadata: { | ||||
|     textExtraction: string; | ||||
|   }; | ||||
|   buffer: Buffer; | ||||
| } | ||||
|   | ||||
| @@ -70,8 +70,8 @@ export class SmartPdf { | ||||
|     const page = await this.headlessBrowser.newPage(); | ||||
|     await page.setViewport({ | ||||
|       width: 794, | ||||
|       height: 1122 | ||||
|     }) | ||||
|       height: 1122, | ||||
|     }); | ||||
|     const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, { | ||||
|       waitUntil: 'networkidle2', | ||||
|     }); | ||||
| @@ -96,6 +96,9 @@ export class SmartPdf { | ||||
|     return { | ||||
|       id: pdfCandidate.pdfId, | ||||
|       name: `${pdfCandidate.pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|     }; | ||||
|   } | ||||
| @@ -123,11 +126,14 @@ export class SmartPdf { | ||||
|     return { | ||||
|       id: pdfId, | ||||
|       name: `${pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|     }; | ||||
|   } | ||||
|  | ||||
|   async getFullWebsiteAsSinglePdf(websiteUrl: string) { | ||||
|   async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<interfaces.IPdfResult> { | ||||
|     const page = await this.headlessBrowser.newPage(); | ||||
|     page.emulateMediaType('screen'); | ||||
|     const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' }); | ||||
| @@ -150,6 +156,9 @@ export class SmartPdf { | ||||
|     return { | ||||
|       id: pdfId, | ||||
|       name: `${pdfId}.js`, | ||||
|       metadata: { | ||||
|         textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer), | ||||
|       }, | ||||
|       buffer: pdfBuffer, | ||||
|     }; | ||||
|   } | ||||
| @@ -161,4 +170,23 @@ export class SmartPdf { | ||||
|     } | ||||
|     return merger.saveAsBuffer(); | ||||
|   } | ||||
|  | ||||
|   public async extractTextFromPdfBuffer(pdfBufferArg: Buffer): Promise<string> { | ||||
|     const deferred = plugins.smartpromise.defer<string>(); | ||||
|     const pdfParser: any = new plugins.pdf2json(); | ||||
|     pdfParser.on('pdfParser_dataReady', (pdfData: any) => { | ||||
|       let finalText = '' | ||||
|       for (const page of pdfData.Pages) { | ||||
|         for(const text of page.Texts) { | ||||
|           for (const letter of text.R) { | ||||
|             finalText = finalText + letter.T; | ||||
|           } | ||||
|            | ||||
|         }; | ||||
|       } | ||||
|       deferred.resolve(finalText); | ||||
|     }); | ||||
|     pdfParser.parseBuffer(pdfBufferArg); | ||||
|     return deferred.promise; | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -15,6 +15,8 @@ export { smartfile, smartpromise, smartpuppeteer, smartunique, smartnetwork }; | ||||
|  | ||||
| // thirdparty | ||||
| import pdfMerger from 'pdf-merger-js'; | ||||
| // @ts-ignore | ||||
| import pdf2json from 'pdf2json'; | ||||
| import express from 'express'; | ||||
|  | ||||
| export { pdfMerger, express }; | ||||
| export { pdfMerger, pdf2json, express }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user