Compare commits
No commits in common. "master" and "v3.1.4" have entirely different histories.
131
changelog.md
131
changelog.md
@ -1,131 +0,0 @@
|
|||||||
# Changelog
|
|
||||||
|
|
||||||
## 2025-02-25 - 3.2.2 - fix(SmartPdf)
|
|
||||||
Fix buffer handling for PDF conversion and text extraction
|
|
||||||
|
|
||||||
- Ensure Uint8Array is converted to Node Buffer for PDF conversion.
|
|
||||||
- Correct the PDF page viewport handling by using document dimensions.
|
|
||||||
- Fix extractTextFromPdfBuffer argument type from Uint8Array to Buffer.
|
|
||||||
|
|
||||||
## 2025-02-25 - 3.2.1 - fix(SmartPdf)
|
|
||||||
Fix type for extractTextFromPdfBuffer function
|
|
||||||
|
|
||||||
- Corrected the parameter type from Buffer to Uint8Array for extractTextFromPdfBuffer function.
|
|
||||||
|
|
||||||
## 2025-02-25 - 3.2.0 - feat(smartpdf)
|
|
||||||
Improve dependency versions and optimize PDF to PNG conversion.
|
|
||||||
|
|
||||||
- Update several dependencies to newer versions for better stability and performance.
|
|
||||||
- Refactor tests to enhance readability and add directory creation validations.
|
|
||||||
- Optimize PDF to PNG conversion by switching to a more efficient Puppeteer and PDF.js-based method.
|
|
||||||
- Add checks for presence of required dependencies (GraphicsMagick and Ghostscript).
|
|
||||||
- Fix media emulation issue by properly awaiting the emulateMediaType function.
|
|
||||||
|
|
||||||
## 2024-11-30 - 3.1.8 - fix(core)
|
|
||||||
Fix candidate handling in PDF generation
|
|
||||||
|
|
||||||
- Added error handling for missing PDF candidates in server requests.
|
|
||||||
- Updated devDependencies and dependencies to latest versions for better stability and new features.
|
|
||||||
- Patched header retrieval logic during PDF generation for security check.
|
|
||||||
|
|
||||||
## 2024-09-27 - 3.1.7 - fix(dependencies)
|
|
||||||
Update dependencies to latest versions
|
|
||||||
|
|
||||||
- Updated @git.zone/tsbuild to version ^2.1.84
|
|
||||||
- Updated @git.zone/tsdoc to version ^1.3.12
|
|
||||||
- Updated @git.zone/tsrun to version ^1.2.49
|
|
||||||
- Updated @push.rocks/tapbundle to version ^5.3.0
|
|
||||||
- Updated @types/node to version ^22.7.4
|
|
||||||
- Updated @push.rocks/smartfile to version ^11.0.21
|
|
||||||
- Updated @push.rocks/smartpromise to version ^4.0.4
|
|
||||||
- Updated @tsclass/tsclass to version ^4.1.2
|
|
||||||
- Updated express to version ^4.21.0
|
|
||||||
- Updated pdf2pic to version ^3.1.3
|
|
||||||
|
|
||||||
## 2024-05-29 - 3.1.6 - Core
|
|
||||||
Updated description
|
|
||||||
|
|
||||||
- Minor changes to documentation and internal text.
|
|
||||||
|
|
||||||
## 2024-04-25 to 2024-04-30 - 3.1.0 to 3.1.5 - Core
|
|
||||||
Fix updates in core functionality
|
|
||||||
|
|
||||||
- Fixes and updates in core function in versions 3.1.0 to 3.1.5.
|
|
||||||
|
|
||||||
## 2024-04-25 - 3.0.17 - Feature
|
|
||||||
Now supports PDF to JPG conversion
|
|
||||||
|
|
||||||
- Added support for converting PDF files to JPG format.
|
|
||||||
|
|
||||||
## 2024-03-19 to 2024-04-14 - 3.0.17 - Maintenance
|
|
||||||
Various updates to project configuration files
|
|
||||||
|
|
||||||
- Updated `tsconfig`.
|
|
||||||
- Updated `npmextra.json`.
|
|
||||||
|
|
||||||
## 2023-07-11 to 2024-03-19 - 3.0.15 to 3.0.16 - Organization
|
|
||||||
Switch to new organization scheme and core updates
|
|
||||||
|
|
||||||
- Switched to new organization scheme.
|
|
||||||
- Applied core updates and bug fixes.
|
|
||||||
|
|
||||||
## 2022-11-07 to 2023-07-10 - 3.0.13 to 3.0.14 - Core
|
|
||||||
Fixes and updates to core functionality
|
|
||||||
|
|
||||||
- Various minor bug fixes and updates to core components.
|
|
||||||
|
|
||||||
## 2022-09-13 to 2022-11-07 - 3.0.10 to 3.0.12 - Core
|
|
||||||
Ongoing core updates and maintenance
|
|
||||||
|
|
||||||
- Regular fixes and operational improvements in core functionalities.
|
|
||||||
|
|
||||||
## 2022-06-12 to 2022-09-13 - 3.0.7 to 3.0.9 - Core
|
|
||||||
Continued focus on high-priority bug fixes and core functionalities
|
|
||||||
|
|
||||||
- Regular fixes for critical bugs and enhancements.
|
|
||||||
|
|
||||||
## 2022-03-24 to 2022-06-29 - 3.0.3 to 3.0.6 - Core
|
|
||||||
Further optimization and maintenance releases
|
|
||||||
|
|
||||||
- Further improvements and refinements of issues in core functionalities.
|
|
||||||
|
|
||||||
## 2022-01-05 to 2022-03-25 - 3.0.0 to 3.0.2 - Major Version Release
|
|
||||||
Major release for version 3.0.x, including core fixes
|
|
||||||
|
|
||||||
- Increased version from 2.x to 3.0. New significant changes and fixes.
|
|
||||||
|
|
||||||
## 2022-01-05 to 2022-03-24 - 2.0.13 to 2.0.19 - Core
|
|
||||||
Routine core updates and bug fixes
|
|
||||||
|
|
||||||
- Regular bug fixes in core components.
|
|
||||||
|
|
||||||
## 2019-11-19 to 2022-01-06 - 2.0.0 to 2.0.11 - Core
|
|
||||||
Multiple core updates and a few performance improvements
|
|
||||||
|
|
||||||
- Some performance enhancements and multiple bug fixes.
|
|
||||||
|
|
||||||
## 2019-11-16 to 2019-11-19 - 1.0.27 to 1.0.29 - API
|
|
||||||
Breaking change in API
|
|
||||||
|
|
||||||
- Naming PDF results to better represent their content.
|
|
||||||
|
|
||||||
## 2019-05-29 to 2019-11-15 - 1.0.13 to 1.0.26 - Core
|
|
||||||
Core functional updates and some major restructuring
|
|
||||||
|
|
||||||
- Introduced multiple updates to the core, addressing bugs and improving stability.
|
|
||||||
|
|
||||||
## 2019-04-10 to 2019-05-28 - 1.0.4 to 1.0.12 - Core
|
|
||||||
Fixes and updates in the core
|
|
||||||
|
|
||||||
- Implementation of multiple essential fixes for core components.
|
|
||||||
|
|
||||||
## 2018-10-06 - 1.0.1 to 1.0.3 - Core and Typings
|
|
||||||
Initial implementation and core fixes
|
|
||||||
|
|
||||||
- Initial implementation of the project.
|
|
||||||
- Fixed compilation problems in typings.
|
|
||||||
|
|
||||||
## 2016-01-29 - unknown - Initial
|
|
||||||
Initial commit
|
|
||||||
|
|
||||||
- Initial commit for the project setup.
|
|
36
package.json
36
package.json
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@push.rocks/smartpdf",
|
"name": "@push.rocks/smartpdf",
|
||||||
"version": "3.2.2",
|
"version": "3.1.4",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
|
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
@ -14,27 +14,28 @@
|
|||||||
"buildDocs": "tsdoc"
|
"buildDocs": "tsdoc"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@git.zone/tsbuild": "^2.2.1",
|
"@git.zone/tsbuild": "^2.1.66",
|
||||||
"@git.zone/tsdoc": "^1.4.3",
|
"@git.zone/tsdoc": "^1.1.12",
|
||||||
"@git.zone/tsrun": "^1.3.3",
|
"@git.zone/tsrun": "^1.2.44",
|
||||||
"@git.zone/tstest": "^1.0.96",
|
"@git.zone/tstest": "^1.0.77",
|
||||||
"@push.rocks/tapbundle": "^5.5.6",
|
"@push.rocks/tapbundle": "^5.0.23",
|
||||||
"@types/node": "^22.13.5"
|
"@types/node": "^20.12.7"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@push.rocks/smartbuffer": "^3.0.4",
|
"@push.rocks/smartbuffer": "^3.0.4",
|
||||||
"@push.rocks/smartdelay": "^3.0.5",
|
"@push.rocks/smartdelay": "^3.0.5",
|
||||||
"@push.rocks/smartfile": "^11.2.0",
|
"@push.rocks/smartfile": "^11.0.14",
|
||||||
"@push.rocks/smartnetwork": "^3.0.0",
|
"@push.rocks/smartnetwork": "^3.0.0",
|
||||||
"@push.rocks/smartpath": "^5.0.18",
|
"@push.rocks/smartpath": "^5.0.18",
|
||||||
"@push.rocks/smartpromise": "^4.2.3",
|
"@push.rocks/smartpromise": "^4.0.3",
|
||||||
"@push.rocks/smartpuppeteer": "^2.0.5",
|
"@push.rocks/smartpuppeteer": "^2.0.2",
|
||||||
"@push.rocks/smartunique": "^3.0.9",
|
"@push.rocks/smartunique": "^3.0.9",
|
||||||
"@tsclass/tsclass": "^4.4.0",
|
"@tsclass/tsclass": "^4.0.54",
|
||||||
"@types/express": "^5.0.0",
|
"@types/express": "^4.17.21",
|
||||||
"express": "^4.21.2",
|
"express": "^4.19.2",
|
||||||
"pdf-lib": "^1.17.1",
|
"pdf-lib": "^1.17.1",
|
||||||
"pdf2json": "3.1.5"
|
"pdf2json": "^3.0.5",
|
||||||
|
"pdf2pic": "^3.1.1"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"ts/**/*",
|
"ts/**/*",
|
||||||
@ -64,10 +65,5 @@
|
|||||||
"PDF merging",
|
"PDF merging",
|
||||||
"text extraction",
|
"text extraction",
|
||||||
"PDF management"
|
"PDF management"
|
||||||
],
|
]
|
||||||
"homepage": "https://code.foss.global/push.rocks/smartpdf",
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://code.foss.global/push.rocks/smartpdf.git"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
12100
pnpm-lock.yaml
generated
12100
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
89
test/test.ts
89
test/test.ts
@ -1,85 +1,66 @@
|
|||||||
import { expect, tap } from '@push.rocks/tapbundle';
|
import { expect, tap } from '@push.rocks/tapbundle';
|
||||||
import * as smartpdf from '../ts/index.js';
|
import * as smartpdf from '../ts/index.js';
|
||||||
import * as fs from 'fs';
|
|
||||||
import * as path from 'path';
|
|
||||||
|
|
||||||
let testSmartPdf: smartpdf.SmartPdf;
|
let testSmartPdf: smartpdf.SmartPdf;
|
||||||
|
|
||||||
/**
|
tap.test('should create a valid instance of smartpdf', async () => {
|
||||||
* Ensures that a directory exists.
|
|
||||||
* @param dirPath - The directory path to ensure.
|
|
||||||
*/
|
|
||||||
function ensureDir(dirPath: string): void {
|
|
||||||
if (!fs.existsSync(dirPath)) {
|
|
||||||
fs.mkdirSync(dirPath, { recursive: true });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tap.test('should create a valid instance of SmartPdf', async () => {
|
|
||||||
testSmartPdf = new smartpdf.SmartPdf();
|
testSmartPdf = new smartpdf.SmartPdf();
|
||||||
expect(testSmartPdf).toBeInstanceOf(smartpdf.SmartPdf);
|
expect(testSmartPdf).toBeInstanceOf(smartpdf.SmartPdf);
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should start the SmartPdf instance', async () => {
|
tap.test('should start the instance', async () => {
|
||||||
await testSmartPdf.start();
|
await testSmartPdf.start();
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should create PDFs from HTML string', async () => {
|
tap.test('should create a pdf from html string', async () => {
|
||||||
const pdf1 = await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
||||||
const pdf2 = await testSmartPdf.getA4PdfResultForHtmlString('hello');
|
|
||||||
expect(pdf1.buffer).toBeInstanceOf(Buffer);
|
|
||||||
expect(pdf2.buffer).toBeInstanceOf(Buffer);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should create PDFs from websites', async () => {
|
tap.test('should create a pdf from html string', async () => {
|
||||||
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
await testSmartPdf.getA4PdfResultForHtmlString('hi');
|
||||||
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
|
|
||||||
expect(pdfA4.buffer).toBeInstanceOf(Buffer);
|
|
||||||
expect(pdfSingle.buffer).toBeInstanceOf(Buffer);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should create valid PDF results and write them to disk', async () => {
|
tap.test('should create a pdf from website as A4', async () => {
|
||||||
const writePdfToDisk = async (urlArg: string, fileName: string) => {
|
await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should create a pdf from website as single page PDF', async () => {
|
||||||
|
await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should create a valid PDFResult', async () => {
|
||||||
|
const writePDfToDisk = async (urlArg: string, fileName: string) => {
|
||||||
const pdfResult = await testSmartPdf.getFullWebsiteAsSinglePdf(urlArg);
|
const pdfResult = await testSmartPdf.getFullWebsiteAsSinglePdf(urlArg);
|
||||||
expect(pdfResult.buffer).toBeInstanceOf(Buffer);
|
expect(pdfResult.buffer).toBeInstanceOf(Buffer);
|
||||||
ensureDir('.nogit');
|
const fs = await import('fs');
|
||||||
fs.writeFileSync(path.join('.nogit', fileName), pdfResult.buffer as Buffer);
|
|
||||||
|
if (!fs.existsSync('.nogit/')) {
|
||||||
|
fs.mkdirSync('.nogit/');
|
||||||
|
}
|
||||||
|
fs.writeFileSync(`.nogit/${fileName}`, pdfResult.buffer as Buffer);
|
||||||
};
|
};
|
||||||
await writePdfToDisk('https://lossless.com/', '1.pdf');
|
await writePDfToDisk('https://lossless.com/', '1.pdf');
|
||||||
await writePdfToDisk('https://layer.io', '2.pdf');
|
await writePDfToDisk('https://layer.io', '2.pdf');
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should merge PDFs into a combined PDF', async () => {
|
tap.test('should merge pdfs', async () => {
|
||||||
|
const fs = await import('fs');
|
||||||
const pdf1 = await testSmartPdf.readFileToPdfObject('.nogit/1.pdf');
|
const pdf1 = await testSmartPdf.readFileToPdfObject('.nogit/1.pdf');
|
||||||
const pdf2 = await testSmartPdf.readFileToPdfObject('.nogit/2.pdf');
|
const pdf2 = await testSmartPdf.readFileToPdfObject('.nogit/2.pdf');
|
||||||
const mergedBuffer = await testSmartPdf.mergePdfs([pdf1.buffer, pdf2.buffer]);
|
fs.writeFileSync(
|
||||||
ensureDir('.nogit');
|
`.nogit/combined.pdf`,
|
||||||
fs.writeFileSync(path.join('.nogit', 'combined.pdf'), mergedBuffer);
|
await testSmartPdf.mergePdfs([pdf1.buffer, pdf2.buffer])
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should create PNG images from combined PDF using Puppeteer conversion', async () => {
|
tap.test('should create images from an pdf', async () => {
|
||||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
|
||||||
const images = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
|
const images = await testSmartPdf.convertPDFToJPGBytes(pdfObject.buffer);
|
||||||
expect(images.length).toBeGreaterThan(0);
|
console.log(images);
|
||||||
console.log('Puppeteer-based conversion image sizes:', images.map(img => img.length));
|
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should store PNG results from both conversion functions in .nogit/testresults', async () => {
|
tap.test('should be able to close properly', async () => {
|
||||||
const testResultsDir = path.join('.nogit', 'testresults');
|
|
||||||
ensureDir(testResultsDir);
|
|
||||||
|
|
||||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/combined.pdf');
|
|
||||||
|
|
||||||
// Convert using Puppeteer-based function and store images
|
|
||||||
const imagesPuppeteer = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer);
|
|
||||||
imagesPuppeteer.forEach((img, index) => {
|
|
||||||
const filePath = path.join(testResultsDir, `puppeteer_method_page_${index + 1}.png`);
|
|
||||||
fs.writeFileSync(filePath, Buffer.from(img));
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
tap.test('should close the SmartPdf instance properly', async () => {
|
|
||||||
await testSmartPdf.stop();
|
await testSmartPdf.stop();
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.start();
|
tap.start();
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/**
|
/**
|
||||||
* autocreated commitinfo by @push.rocks/commitinfo
|
* autocreated commitinfo by @pushrocks/commitinfo
|
||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartpdf',
|
name: '@push.rocks/smartpdf',
|
||||||
version: '3.2.2',
|
version: '3.1.4',
|
||||||
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,6 @@ import * as paths from './smartpdf.paths.js';
|
|||||||
import { Server } from 'http';
|
import { Server } from 'http';
|
||||||
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
||||||
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
||||||
import { execFile } from 'child_process';
|
|
||||||
|
|
||||||
declare const document: any;
|
declare const document: any;
|
||||||
|
|
||||||
@ -35,20 +34,15 @@ export class SmartPdf {
|
|||||||
this.externalBrowserBool = true;
|
this.externalBrowserBool = true;
|
||||||
} else {
|
} else {
|
||||||
this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
|
this.headlessBrowser = await plugins.smartpuppeteer.getEnvAwareBrowserInstance({
|
||||||
forceNoSandbox: false,
|
forceNoSandbox: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// setup server
|
// setup server
|
||||||
const app = plugins.express();
|
const app = plugins.express();
|
||||||
app.get('/:pdfId', (req, res) => {
|
app.get('/:pdfId', (req, res) => {
|
||||||
const wantedCandidate = this._candidates[req.params.pdfId];
|
res.setHeader('PDF-ID', this._candidates[req.params.pdfId].pdfId);
|
||||||
if (!wantedCandidate) {
|
res.send(this._candidates[req.params.pdfId].htmlString);
|
||||||
console.log(`${req.url} not attached to a candidate`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
res.setHeader('pdf-id', wantedCandidate.pdfId);
|
|
||||||
res.send(wantedCandidate.htmlString);
|
|
||||||
});
|
});
|
||||||
this.htmlServerInstance = plugins.http.createServer(app);
|
this.htmlServerInstance = plugins.http.createServer(app);
|
||||||
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
|
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
|
||||||
@ -76,7 +70,7 @@ export class SmartPdf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a PDF for a given HTML string.
|
* returns a pdf for a given html string;
|
||||||
*/
|
*/
|
||||||
async getA4PdfResultForHtmlString(htmlStringArg: string): Promise<plugins.tsclass.business.IPdf> {
|
async getA4PdfResultForHtmlString(htmlStringArg: string): Promise<plugins.tsclass.business.IPdf> {
|
||||||
await this._readyDeferred.promise;
|
await this._readyDeferred.promise;
|
||||||
@ -90,6 +84,7 @@ export class SmartPdf {
|
|||||||
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
|
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, {
|
||||||
waitUntil: 'networkidle2',
|
waitUntil: 'networkidle2',
|
||||||
});
|
});
|
||||||
|
// await plugins.smartdelay.delayFor(1000);
|
||||||
const headers = response.headers();
|
const headers = response.headers();
|
||||||
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
||||||
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
||||||
@ -104,8 +99,6 @@ export class SmartPdf {
|
|||||||
printBackground: true,
|
printBackground: true,
|
||||||
displayHeaderFooter: false,
|
displayHeaderFooter: false,
|
||||||
});
|
});
|
||||||
// Convert Uint8Array to Node Buffer
|
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
|
||||||
await page.close();
|
await page.close();
|
||||||
delete this._candidates[pdfCandidate.pdfId];
|
delete this._candidates[pdfCandidate.pdfId];
|
||||||
pdfCandidate.doneDeferred.resolve();
|
pdfCandidate.doneDeferred.resolve();
|
||||||
@ -114,9 +107,9 @@ export class SmartPdf {
|
|||||||
id: pdfCandidate.pdfId,
|
id: pdfCandidate.pdfId,
|
||||||
name: `${pdfCandidate.pdfId}.js`,
|
name: `${pdfCandidate.pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: nodePdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,16 +134,14 @@ export class SmartPdf {
|
|||||||
printBackground: true,
|
printBackground: true,
|
||||||
displayHeaderFooter: false,
|
displayHeaderFooter: false,
|
||||||
});
|
});
|
||||||
// Convert Uint8Array to Node Buffer
|
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
|
||||||
await page.close();
|
await page.close();
|
||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: nodePdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,23 +151,15 @@ export class SmartPdf {
|
|||||||
width: 1920,
|
width: 1920,
|
||||||
height: 1200,
|
height: 1200,
|
||||||
});
|
});
|
||||||
await page.emulateMediaType('screen');
|
page.emulateMediaType('screen');
|
||||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||||
const pdfId = plugins.smartunique.shortId();
|
const pdfId = plugins.smartunique.shortId();
|
||||||
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
|
||||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||||
return {
|
return {
|
||||||
documentHeight: Math.max(
|
documentHeight: document.body.scrollHeight,
|
||||||
document.body.scrollHeight,
|
documentWidth: document.body.clientWidth,
|
||||||
document.documentElement.scrollHeight
|
|
||||||
) || 1200,
|
|
||||||
documentWidth: Math.max(
|
|
||||||
document.body.clientWidth,
|
|
||||||
document.documentElement.clientWidth
|
|
||||||
) || 1920,
|
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
// Update viewport height to the full document height.
|
|
||||||
await page.setViewport({
|
await page.setViewport({
|
||||||
width: 1920,
|
width: 1920,
|
||||||
height: documentHeight,
|
height: documentHeight,
|
||||||
@ -189,16 +172,14 @@ export class SmartPdf {
|
|||||||
scale: 1,
|
scale: 1,
|
||||||
pageRanges: '1',
|
pageRanges: '1',
|
||||||
});
|
});
|
||||||
// Convert Uint8Array to Node Buffer
|
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
|
||||||
await page.close();
|
await page.close();
|
||||||
return {
|
return {
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
name: `${pdfId}.js`,
|
name: `${pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(pdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: nodePdfBuffer,
|
buffer: pdfBuffer,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,9 +196,9 @@ export class SmartPdf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
|
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
|
||||||
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
|
const path = plugins.smartpath.transform.makeAbsolute(pathArg);
|
||||||
const parsedPath = plugins.path.parse(absolutePath);
|
const parsedPath = plugins.path.parse(path);
|
||||||
const buffer = await plugins.smartfile.fs.toBuffer(absolutePath);
|
const buffer = await plugins.smartfile.fs.toBuffer(path);
|
||||||
return {
|
return {
|
||||||
name: parsedPath.base,
|
name: parsedPath.base,
|
||||||
buffer,
|
buffer,
|
||||||
@ -244,109 +225,40 @@ export class SmartPdf {
|
|||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public async convertPDFToJPGBytes(
|
||||||
* Checks for the presence of required dependencies: GraphicsMagick and Ghostscript.
|
|
||||||
*/
|
|
||||||
private async checkDependencies(): Promise<void> {
|
|
||||||
await Promise.all([
|
|
||||||
this.checkCommandExists('gm', ['version']),
|
|
||||||
this.checkCommandExists('gs', ['--version']),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given command exists by trying to execute it.
|
|
||||||
*/
|
|
||||||
private checkCommandExists(command: string, args: string[]): Promise<void> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
execFile(command, args, (error, stdout, stderr) => {
|
|
||||||
if (error) {
|
|
||||||
reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`));
|
|
||||||
} else {
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
|
|
||||||
* This method creates a temporary HTML page that loads PDF.js from a CDN,
|
|
||||||
* renders each PDF page to a canvas, and then screenshots each canvas element.
|
|
||||||
*/
|
|
||||||
public async convertPDFToPngBytes(
|
|
||||||
pdfBytes: Uint8Array,
|
pdfBytes: Uint8Array,
|
||||||
options: { width?: number; height?: number; quality?: number } = {}
|
options: {
|
||||||
): Promise<Uint8Array[]> {
|
width?: number;
|
||||||
// Note: options.width, options.height, and options.quality are not applied here,
|
height?: number;
|
||||||
// as the rendered canvas size is determined by the PDF page dimensions.
|
quality?: number;
|
||||||
|
} = {}
|
||||||
|
) {
|
||||||
|
const { width = 1024, height = 768, quality = 100 } = options;
|
||||||
|
|
||||||
// Create a new page using the headless browser.
|
// Load the PDF document
|
||||||
const page = await this.headlessBrowser.newPage();
|
const pdfDoc = await plugins.pdfLib.PDFDocument.load(pdfBytes);
|
||||||
|
|
||||||
// Prepare PDF data as a base64 string.
|
const converter = plugins.pdf2pic.fromBuffer(Buffer.from(pdfBytes), {
|
||||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
density: 100, // Image density (DPI)
|
||||||
|
format: 'jpg', // Image format
|
||||||
|
width, // Output image width
|
||||||
|
height, // Output image height
|
||||||
|
quality, // Output image quality
|
||||||
|
});
|
||||||
|
|
||||||
// HTML template that loads PDF.js and renders the PDF.
|
// Get array promises that resolve to JPG buffers
|
||||||
const htmlTemplate: string = `
|
const imagePromises: Promise<Buffer>[] = [];
|
||||||
<!DOCTYPE html>
|
const numPages = pdfDoc.getPageCount();
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
<title>PDF to PNG Converter</title>
|
|
||||||
<style>
|
|
||||||
body { margin: 0; }
|
|
||||||
canvas { display: block; margin: 10px auto; }
|
|
||||||
</style>
|
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<script>
|
|
||||||
(async function() {
|
|
||||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
|
||||||
const pdfData = "__PDF_DATA__";
|
|
||||||
const raw = atob(pdfData);
|
|
||||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
|
||||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
|
||||||
const pdf = await loadingTask.promise;
|
|
||||||
const numPages = pdf.numPages;
|
|
||||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
||||||
const page = await pdf.getPage(pageNum);
|
|
||||||
const viewport = page.getViewport({ scale: 1.0 });
|
|
||||||
const canvas = document.createElement('canvas');
|
|
||||||
const context = canvas.getContext('2d');
|
|
||||||
canvas.width = viewport.width;
|
|
||||||
canvas.height = viewport.height;
|
|
||||||
await page.render({ canvasContext: context, viewport: viewport }).promise;
|
|
||||||
document.body.appendChild(canvas);
|
|
||||||
}
|
|
||||||
window.renderComplete = true;
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`;
|
|
||||||
|
|
||||||
// Replace the placeholder with the actual base64 PDF data.
|
for (let i = 0; i < numPages; i++) {
|
||||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
imagePromises.push(converter(i + 1, {
|
||||||
|
responseType: 'buffer',
|
||||||
// Set the page content.
|
}).then((output) => output.buffer));
|
||||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
|
||||||
|
|
||||||
// Wait until the PDF.js rendering is complete.
|
|
||||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
|
||||||
|
|
||||||
// Query all canvas elements (each representing a rendered PDF page).
|
|
||||||
const canvasElements = await page.$$('canvas');
|
|
||||||
const pngBuffers: Uint8Array[] = [];
|
|
||||||
|
|
||||||
for (const canvasElement of canvasElements) {
|
|
||||||
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
|
||||||
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
|
||||||
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await page.close();
|
// Resolve all promises and return the array of buffers
|
||||||
return pngBuffers;
|
const imageBuffers = await Promise.all(imagePromises);
|
||||||
|
const imageUint8Arrays = imageBuffers.map((buffer) => new Uint8Array(buffer));
|
||||||
|
return imageUint8Arrays;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ export { tsclass };
|
|||||||
// thirdparty
|
// thirdparty
|
||||||
import express from 'express';
|
import express from 'express';
|
||||||
import pdf2json from 'pdf2json';
|
import pdf2json from 'pdf2json';
|
||||||
|
import pdf2pic from 'pdf2pic';
|
||||||
import pdfLib from 'pdf-lib';
|
import pdfLib from 'pdf-lib';
|
||||||
|
|
||||||
export { express, pdf2json, pdfLib, };
|
export { express, pdf2json, pdf2pic, pdfLib, };
|
||||||
|
Loading…
x
Reference in New Issue
Block a user