Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5b1615d359 | |||
| c1208b5216 | |||
| d0c5821f80 | |||
| bd6705ca4a | |||
| 5bc84ffaa0 | |||
| 6435d0f042 | |||
| 9fbd735088 | |||
| fe05713d57 | |||
| be574df599 |
38
changelog.md
38
changelog.md
@@ -1,5 +1,43 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-03-09 - 4.2.0 - feat(smartpdf)
|
||||||
|
replace internal Express server with @push.rocks/smartserve, add PDF→WebP rendering, improve start/stop handling and bump dependencies
|
||||||
|
|
||||||
|
- Replace internal Express HTTP implementation with @push.rocks/smartserve and update README wording to reflect HTTP server usage
|
||||||
|
- Add PDF→WebP rendering: use pdf.js in-page rendering, capture canvases via Puppeteer to produce WebP buffers; added robust wait/timeout and error handling
|
||||||
|
- Add start/stop guards: _isRunning flag, reset readiness Deferred on start, and throw if start called while running
|
||||||
|
- Remove direct http/express exports from plugins and stop exporting express; export smartserve from plugins
|
||||||
|
- Improve JPEG conversion to produce progressive JPEGs via SmartJimp (sharp mode)
|
||||||
|
- Bump dependencies/devDependencies: @push.rocks/smartfs to ^1.5.0, add @push.rocks/smartserve ^2.0.1; devDeps @git.zone/tsbuild ^4.3.0, @git.zone/tstest ^3.3.0, @types/node ^25.3.5
|
||||||
|
|
||||||
|
## 2026-03-01 - 4.1.3 - fix(tests)
|
||||||
|
use example.com in image conversion test and relax JPEG size assertion
|
||||||
|
|
||||||
|
- Replaced https://www.wikipedia.org with https://example.com in test/test.ts for the third PDF generation test
|
||||||
|
- Removed the strict expectation that JPEG size must be smaller than PNG; now only asserts that WebP is smaller than PNG
|
||||||
|
- Updated test comment to note that JPEG may not be smaller for simple graphics pages
|
||||||
|
|
||||||
|
## 2026-03-01 - 4.1.2 - fix(smartfs)
|
||||||
|
replace smartfile with smartfs, update file reading to use SmartFs, remove GraphicsMagick/Ghostscript dependency checks, bump dev and runtime dependencies, update tests and docs, and adjust npmextra configuration
|
||||||
|
|
||||||
|
- Replace usage/export of @push.rocks/smartfile with @push.rocks/smartfs and update readFileToPdfObject to use SmartFs + SmartFsProviderNode
|
||||||
|
- Remove execFile import and the GraphicsMagick/Ghostscript dependency-checking helpers from smartpdf (no more gm/gs checks)
|
||||||
|
- Bump devDependencies: @git.zone/tsbuild ^4.1.2, @git.zone/tsdoc ^1.12.0, @git.zone/tsrun ^2.0.1, @git.zone/tstest ^3.1.8, @types/node ^25.3.2
|
||||||
|
- Bump runtime dependencies: @push.rocks/smartfs ^1.3.1, @push.rocks/smartnetwork ^4.4.0, @tsclass/tsclass ^9.3.0, @types/express ^5.0.6, express ^5.2.1, pdf2json ^4.0.2
|
||||||
|
- Tests updated: switched example URLs to example.com, added a third PDF generation test that writes .nogit/3.pdf, and exported tap.start() as default
|
||||||
|
- npmextra.json reorganized to namespaced keys, added release.registries and accessLevel, and adjusted tsdoc/legal entries
|
||||||
|
- Documentation/readme refreshed: added issue reporting/security section, feature table, and various wording/formatting updates
|
||||||
|
|
||||||
|
## 2025-08-02 - 4.1.0 - feat(smartpdf)
|
||||||
|
Add progressive JPEG generation support
|
||||||
|
|
||||||
|
- Added new convertPDFToJpegBytes method for progressive JPEG generation
|
||||||
|
- Integrated @push.rocks/smartjimp for true progressive JPEG encoding
|
||||||
|
- Progressive JPEGs load in multiple passes, showing low-quality preview first
|
||||||
|
- Supports quality and scale options like other image generation methods
|
||||||
|
- Updated readme with comprehensive documentation and modern styling
|
||||||
|
- Updated legal section to reflect Task Venture Capital GmbH ownership
|
||||||
|
|
||||||
## 2025-08-02 - 4.0.0 - BREAKING CHANGE(smartpdf)
|
## 2025-08-02 - 4.0.0 - BREAKING CHANGE(smartpdf)
|
||||||
Improve image generation quality and API consistency
|
Improve image generation quality and API consistency
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
{
|
{
|
||||||
"npmci": {
|
"@git.zone/cli": {
|
||||||
"npmGlobalTools": [],
|
|
||||||
"npmAccessLevel": "public"
|
|
||||||
},
|
|
||||||
"gitzone": {
|
|
||||||
"projectType": "npm",
|
"projectType": "npm",
|
||||||
"module": {
|
"module": {
|
||||||
"githost": "code.foss.global",
|
"githost": "code.foss.global",
|
||||||
@@ -26,9 +22,19 @@
|
|||||||
"text extraction",
|
"text extraction",
|
||||||
"PDF management"
|
"PDF management"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"release": {
|
||||||
|
"registries": [
|
||||||
|
"https://verdaccio.lossless.digital",
|
||||||
|
"https://registry.npmjs.org"
|
||||||
|
],
|
||||||
|
"accessLevel": "public"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tsdoc": {
|
"@git.zone/tsdoc": {
|
||||||
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
|
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
|
||||||
|
},
|
||||||
|
"@ship.zone/szci": {
|
||||||
|
"npmGlobalTools": []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
26
package.json
26
package.json
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@push.rocks/smartpdf",
|
"name": "@push.rocks/smartpdf",
|
||||||
"version": "4.0.0",
|
"version": "4.2.0",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
|
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
@@ -9,31 +9,31 @@
|
|||||||
"author": "Lossless GmbH",
|
"author": "Lossless GmbH",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "(tstest test/ --verbose --timeout 60)",
|
"test": "(tstest test/ --verbose --timeout 120)",
|
||||||
"build": "(tsbuild tsfolders --allowimplicitany)",
|
"build": "(tsbuild tsfolders --allowimplicitany)",
|
||||||
"buildDocs": "tsdoc"
|
"buildDocs": "tsdoc"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@git.zone/tsbuild": "^2.6.4",
|
"@git.zone/tsbuild": "^4.3.0",
|
||||||
"@git.zone/tsdoc": "^1.5.0",
|
"@git.zone/tsdoc": "^1.12.0",
|
||||||
"@git.zone/tsrun": "^1.3.3",
|
"@git.zone/tsrun": "^2.0.1",
|
||||||
"@git.zone/tstest": "^2.3.2",
|
"@git.zone/tstest": "^3.3.0",
|
||||||
"@types/node": "^24.1.0"
|
"@types/node": "^25.3.5"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@push.rocks/smartbuffer": "^3.0.5",
|
"@push.rocks/smartbuffer": "^3.0.5",
|
||||||
"@push.rocks/smartdelay": "^3.0.5",
|
"@push.rocks/smartdelay": "^3.0.5",
|
||||||
"@push.rocks/smartfile": "^11.2.5",
|
"@push.rocks/smartfs": "^1.5.0",
|
||||||
"@push.rocks/smartnetwork": "^4.1.2",
|
"@push.rocks/smartjimp": "^1.2.0",
|
||||||
|
"@push.rocks/smartnetwork": "^4.4.0",
|
||||||
"@push.rocks/smartpath": "^6.0.0",
|
"@push.rocks/smartpath": "^6.0.0",
|
||||||
"@push.rocks/smartpromise": "^4.2.3",
|
"@push.rocks/smartpromise": "^4.2.3",
|
||||||
"@push.rocks/smartpuppeteer": "^2.0.5",
|
"@push.rocks/smartpuppeteer": "^2.0.5",
|
||||||
|
"@push.rocks/smartserve": "^2.0.1",
|
||||||
"@push.rocks/smartunique": "^3.0.9",
|
"@push.rocks/smartunique": "^3.0.9",
|
||||||
"@tsclass/tsclass": "^9.2.0",
|
"@tsclass/tsclass": "^9.3.0",
|
||||||
"@types/express": "^5.0.3",
|
|
||||||
"express": "^5.1.0",
|
|
||||||
"pdf-lib": "^1.17.1",
|
"pdf-lib": "^1.17.1",
|
||||||
"pdf2json": "3.2.0"
|
"pdf2json": "^4.0.2"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"ts/**/*",
|
"ts/**/*",
|
||||||
|
|||||||
5370
pnpm-lock.yaml
generated
5370
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
2
pnpm-workspace.yaml
Normal file
2
pnpm-workspace.yaml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
onlyBuiltDependencies:
|
||||||
|
- sharp
|
||||||
791
readme.md
791
readme.md
@@ -1,436 +1,423 @@
|
|||||||
# @push.rocks/smartpdf
|
# @push.rocks/smartpdf 📄✨
|
||||||
Create PDFs on the fly from HTML, websites, or existing PDFs with advanced features like text extraction, PDF merging, and PNG conversion.
|
|
||||||
|
|
||||||
## Install
|
> **Transform HTML, websites, and PDFs into beautiful documents and images with just a few lines of code.**
|
||||||
To install `@push.rocks/smartpdf`, use npm or yarn:
|
|
||||||
|
[](https://www.npmjs.com/package/@push.rocks/smartpdf)
|
||||||
|
[](https://www.typescriptlang.org/)
|
||||||
|
[](./license)
|
||||||
|
|
||||||
|
## Issue Reporting and Security
|
||||||
|
|
||||||
|
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
|
||||||
|
|
||||||
|
## 🚀 Why SmartPDF?
|
||||||
|
|
||||||
|
SmartPDF is your Swiss Army knife for PDF operations in Node.js. Whether you're generating invoices from HTML, snapshotting web pages, merging documents, or converting PDF pages to images — SmartPDF handles it all through a clean, async-first TypeScript API backed by headless Chromium.
|
||||||
|
|
||||||
|
### ✨ Features at a Glance
|
||||||
|
|
||||||
|
| Feature | Description |
|
||||||
|
|---------|-------------|
|
||||||
|
| 📝 **HTML → PDF** | Render any HTML string (with full CSS) into an A4-sized PDF |
|
||||||
|
| 🌐 **Website → PDF** | Capture a live URL as a PDF — either A4 or full-page scroll |
|
||||||
|
| 🔀 **PDF Merging** | Combine multiple PDF buffers into a single document |
|
||||||
|
| 🖼️ **PDF → Images** | Convert PDF pages to **PNG**, **WebP**, or progressive **JPEG** |
|
||||||
|
| 📑 **Text Extraction** | Pull raw text content from any PDF buffer |
|
||||||
|
| 🔌 **Smart Port Management** | Automatic port allocation so multiple instances never collide |
|
||||||
|
| 🎛️ **DPI Control** | Built-in scale constants for screen, high-quality, and print resolutions |
|
||||||
|
| 🌐 **BYO Browser** | Optionally pass your own Puppeteer `Browser` instance |
|
||||||
|
|
||||||
|
## 📦 Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install @push.rocks/smartpdf --save
|
pnpm add @push.rocks/smartpdf
|
||||||
```
|
```
|
||||||
|
|
||||||
Or with yarn:
|
> **Prerequisites:** SmartPDF uses headless Chromium via Puppeteer under the hood. On most systems this is handled automatically. If you run into browser-launch issues (CI, Docker, etc.), make sure the required system libraries are installed — see the [Puppeteer troubleshooting guide](https://pptr.dev/troubleshooting).
|
||||||
|
|
||||||
```bash
|
## 🎯 Quick Start
|
||||||
yarn add @push.rocks/smartpdf
|
|
||||||
```
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
This package requires a Chrome or Chromium installation to be available on the system, as it uses Puppeteer for rendering. The package will automatically detect and use the appropriate executable.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
`@push.rocks/smartpdf` provides a powerful interface for PDF generation and manipulation. All examples use ESM syntax and TypeScript.
|
|
||||||
|
|
||||||
### Getting Started
|
|
||||||
First, import the necessary classes:
|
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import { SmartPdf, IPdf } from '@push.rocks/smartpdf';
|
import { SmartPdf } from '@push.rocks/smartpdf';
|
||||||
```
|
import * as fs from 'fs';
|
||||||
|
|
||||||
### Basic Setup with Automatic Port Allocation
|
// 1. Create and start
|
||||||
SmartPdf automatically finds an available port between 20000-30000 for its internal server:
|
const smartPdf = await SmartPdf.create();
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function setupSmartPdf() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
// Your PDF operations here
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Advanced Setup with Custom Port Configuration
|
|
||||||
You can specify custom port settings to avoid conflicts or meet specific requirements:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// Use a specific port
|
|
||||||
const smartPdf = await SmartPdf.create({ port: 3000 });
|
|
||||||
|
|
||||||
// Use a custom port range
|
|
||||||
const smartPdf = await SmartPdf.create({
|
|
||||||
portRangeStart: 4000,
|
|
||||||
portRangeEnd: 5000
|
|
||||||
});
|
|
||||||
|
|
||||||
// The server will find an available port in your specified range
|
|
||||||
await smartPdf.start();
|
await smartPdf.start();
|
||||||
console.log(`Server running on port: ${smartPdf.serverPort}`);
|
|
||||||
|
// 2. Generate a PDF from HTML
|
||||||
|
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
|
||||||
|
<h1>Hello, PDF World! 🌍</h1>
|
||||||
|
<p>Generated with SmartPDF.</p>
|
||||||
|
`);
|
||||||
|
|
||||||
|
// 3. Write to disk
|
||||||
|
fs.writeFileSync('my-first.pdf', pdf.buffer);
|
||||||
|
|
||||||
|
// 4. Clean up
|
||||||
|
await smartPdf.stop();
|
||||||
```
|
```
|
||||||
|
|
||||||
### Creating PDFs from HTML Strings
|
Every method returns an `IPdf` object:
|
||||||
Generate PDFs from HTML content with full CSS support:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createPdfFromHtml() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const htmlString = `
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<style>
|
|
||||||
body { font-family: Arial, sans-serif; margin: 40px; }
|
|
||||||
h1 { color: #333; }
|
|
||||||
.highlight { background-color: yellow; }
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Professional PDF Document</h1>
|
|
||||||
<p>This PDF was generated from <span class="highlight">HTML content</span>.</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`;
|
|
||||||
|
|
||||||
const pdf: IPdf = await smartPdf.getA4PdfResultForHtmlString(htmlString);
|
|
||||||
|
|
||||||
// pdf.buffer contains the PDF data
|
|
||||||
// pdf.id contains a unique identifier
|
|
||||||
// pdf.name contains the filename
|
|
||||||
// pdf.metadata contains additional information like extracted text
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Generating PDFs from Websites
|
|
||||||
Capture web pages as PDFs with two different approaches:
|
|
||||||
|
|
||||||
#### A4 Format PDF from Website
|
|
||||||
Captures the viewable area formatted for A4 paper:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createA4PdfFromWebsite() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const pdf: IPdf = await smartPdf.getPdfResultForWebsite('https://example.com');
|
|
||||||
|
|
||||||
// Save to file
|
|
||||||
await fs.writeFile('website-a4.pdf', pdf.buffer);
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Full Webpage as Single PDF
|
|
||||||
Captures the entire webpage in a single PDF, regardless of length:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createFullPdfFromWebsite() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const pdf: IPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
|
||||||
|
|
||||||
// This captures the entire scrollable area
|
|
||||||
await fs.writeFile('website-full.pdf', pdf.buffer);
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Merging Multiple PDFs
|
|
||||||
Combine multiple PDF files into a single document:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function mergePdfs() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
// Create or load your PDFs
|
|
||||||
const pdf1 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 1</h1>');
|
|
||||||
const pdf2 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 2</h1>');
|
|
||||||
const pdf3 = await smartPdf.readFileToPdfObject('./existing-document.pdf');
|
|
||||||
|
|
||||||
// Merge PDFs - order matters!
|
|
||||||
const mergedPdf: Uint8Array = await smartPdf.mergePdfs([
|
|
||||||
pdf1.buffer,
|
|
||||||
pdf2.buffer,
|
|
||||||
pdf3.buffer
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Save the merged PDF
|
|
||||||
await fs.writeFile('merged-document.pdf', mergedPdf);
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Reading PDFs and Extracting Text
|
|
||||||
Extract text content from existing PDFs:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function extractTextFromPdf() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
|
|
||||||
// Read PDF from disk
|
|
||||||
const pdf: IPdf = await smartPdf.readFileToPdfObject('/path/to/document.pdf');
|
|
||||||
|
|
||||||
// Extract all text
|
|
||||||
const extractedText = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
|
|
||||||
console.log('Extracted text:', extractedText);
|
|
||||||
|
|
||||||
// The pdf object also contains metadata with text extraction
|
|
||||||
console.log('Metadata:', pdf.metadata);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Converting PDF to PNG Images
|
|
||||||
Convert each page of a PDF into PNG images with configurable quality:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function convertPdfToPng() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
// Load a PDF
|
|
||||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
|
||||||
|
|
||||||
// Convert to PNG images with default high quality (216 DPI)
|
|
||||||
const pngImages: Uint8Array[] = await smartPdf.convertPDFToPngBytes(pdf.buffer);
|
|
||||||
|
|
||||||
// Or specify custom scale/DPI
|
|
||||||
const highResPngs = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
|
|
||||||
scale: SmartPdf.SCALE_PRINT, // 6.0 scale = ~432 DPI
|
|
||||||
maxWidth: 3000, // Optional: limit maximum width
|
|
||||||
maxHeight: 4000 // Optional: limit maximum height
|
|
||||||
});
|
|
||||||
|
|
||||||
// Save each page as a PNG
|
|
||||||
pngImages.forEach((pngBuffer, index) => {
|
|
||||||
fs.writeFileSync(`page-${index + 1}.png`, pngBuffer);
|
|
||||||
});
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Understanding Scale and DPI
|
|
||||||
PDF.js renders at 72 DPI by default. Use these scale factors for different quality levels:
|
|
||||||
- `SmartPdf.SCALE_SCREEN` (2.0): ~144 DPI - Good for screen display
|
|
||||||
- `SmartPdf.SCALE_HIGH` (3.0): ~216 DPI - High quality (default)
|
|
||||||
- `SmartPdf.SCALE_PRINT` (6.0): ~432 DPI - Print quality
|
|
||||||
- Custom DPI: `scale = SmartPdf.getScaleForDPI(300)` for 300 DPI
|
|
||||||
|
|
||||||
### Converting PDF to WebP Images
|
|
||||||
Generate web-optimized images using WebP format. WebP provides 25-35% better compression than PNG/JPEG while maintaining quality:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createWebPImages() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
// Load a PDF
|
|
||||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
|
||||||
|
|
||||||
// Create high-quality WebP images (default: 3.0 scale = 216 DPI, 85% quality)
|
|
||||||
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer);
|
|
||||||
|
|
||||||
// Save WebP images
|
|
||||||
webpImages.forEach((webpBuffer, index) => {
|
|
||||||
fs.writeFileSync(`page-${index + 1}.webp`, webpBuffer);
|
|
||||||
});
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Creating Thumbnails
|
|
||||||
Generate small thumbnail images for PDF galleries or document lists:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createThumbnails() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
|
||||||
|
|
||||||
// Create small thumbnails (0.5 scale = ~36 DPI, 70% quality)
|
|
||||||
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
|
||||||
scale: 0.5, // Small readable thumbnails
|
|
||||||
quality: 70 // Lower quality for smaller files
|
|
||||||
});
|
|
||||||
|
|
||||||
// Save thumbnails
|
|
||||||
thumbnails.forEach((thumb, index) => {
|
|
||||||
fs.writeFileSync(`thumb-${index + 1}.webp`, thumb);
|
|
||||||
});
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Constrained Dimensions
|
|
||||||
Create previews with maximum width/height constraints, useful for responsive layouts:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function createConstrainedPreviews() {
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
|
||||||
|
|
||||||
// Create previews that fit within 800x600 pixels
|
|
||||||
const previews = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
|
||||||
scale: 1.0, // Start with full size
|
|
||||||
quality: 90, // High quality
|
|
||||||
maxWidth: 800, // Maximum 800px wide
|
|
||||||
maxHeight: 600 // Maximum 600px tall
|
|
||||||
});
|
|
||||||
|
|
||||||
// The method automatically scales down to fit within constraints
|
|
||||||
previews.forEach((preview, index) => {
|
|
||||||
fs.writeFileSync(`preview-constrained-${index + 1}.webp`, preview);
|
|
||||||
});
|
|
||||||
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### WebP Options
|
|
||||||
The `convertPDFToWebpBytes` method accepts these options:
|
|
||||||
|
|
||||||
- `scale`: Scale factor for preview size (default: 3.0 for ~216 DPI)
|
|
||||||
- `quality`: WebP compression quality (default: 85, range: 0-100)
|
|
||||||
- `maxWidth`: Maximum width in pixels (optional)
|
|
||||||
- `maxHeight`: Maximum height in pixels (optional)
|
|
||||||
|
|
||||||
Common scale values:
|
|
||||||
- `0.5`: Thumbnails (~36 DPI)
|
|
||||||
- `2.0`: Screen display (~144 DPI)
|
|
||||||
- `3.0`: High quality (~216 DPI, default)
|
|
||||||
- `6.0`: Print quality (~432 DPI)
|
|
||||||
|
|
||||||
### Using External Browser Instance
|
|
||||||
For advanced use cases, you can provide your own Puppeteer browser instance:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
import puppeteer from 'puppeteer';
|
|
||||||
|
|
||||||
async function useExternalBrowser() {
|
|
||||||
// Create your own browser instance with custom options
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: true,
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
|
|
||||||
const smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start(browser);
|
|
||||||
|
|
||||||
// Use SmartPdf normally
|
|
||||||
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
|
|
||||||
|
|
||||||
// SmartPdf will not close the browser when stopping
|
|
||||||
await smartPdf.stop();
|
|
||||||
|
|
||||||
// You control the browser lifecycle
|
|
||||||
await browser.close();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Running Multiple Instances
|
|
||||||
Thanks to automatic port allocation, you can run multiple SmartPdf instances simultaneously:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function runMultipleInstances() {
|
|
||||||
// Each instance automatically finds its own free port
|
|
||||||
const instance1 = await SmartPdf.create();
|
|
||||||
const instance2 = await SmartPdf.create();
|
|
||||||
const instance3 = await SmartPdf.create();
|
|
||||||
|
|
||||||
// Start all instances
|
|
||||||
await Promise.all([
|
|
||||||
instance1.start(),
|
|
||||||
instance2.start(),
|
|
||||||
instance3.start()
|
|
||||||
]);
|
|
||||||
|
|
||||||
console.log(`Instance 1 running on port: ${instance1.serverPort}`);
|
|
||||||
console.log(`Instance 2 running on port: ${instance2.serverPort}`);
|
|
||||||
console.log(`Instance 3 running on port: ${instance3.serverPort}`);
|
|
||||||
|
|
||||||
// Use instances independently
|
|
||||||
const pdfs = await Promise.all([
|
|
||||||
instance1.getA4PdfResultForHtmlString('<h1>PDF 1</h1>'),
|
|
||||||
instance2.getA4PdfResultForHtmlString('<h1>PDF 2</h1>'),
|
|
||||||
instance3.getA4PdfResultForHtmlString('<h1>PDF 3</h1>')
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Clean up all instances
|
|
||||||
await Promise.all([
|
|
||||||
instance1.stop(),
|
|
||||||
instance2.stop(),
|
|
||||||
instance3.stop()
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error Handling
|
|
||||||
Always wrap SmartPdf operations in try-catch blocks and ensure proper cleanup:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
async function safePdfGeneration() {
|
|
||||||
let smartPdf: SmartPdf;
|
|
||||||
|
|
||||||
try {
|
|
||||||
smartPdf = await SmartPdf.create();
|
|
||||||
await smartPdf.start();
|
|
||||||
|
|
||||||
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
|
|
||||||
// Process PDF...
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.error('PDF generation failed:', error);
|
|
||||||
// Handle error appropriately
|
|
||||||
} finally {
|
|
||||||
// Always cleanup
|
|
||||||
if (smartPdf) {
|
|
||||||
await smartPdf.stop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### IPdf Interface
|
|
||||||
The `IPdf` interface represents a PDF with its metadata:
|
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
interface IPdf {
|
interface IPdf {
|
||||||
name: string; // Filename of the PDF
|
id: string | null; // Unique identifier
|
||||||
buffer: Buffer; // PDF content as buffer
|
name: string; // Filename
|
||||||
id: string | null; // Unique identifier
|
buffer: Buffer; // Raw PDF bytes
|
||||||
metadata?: {
|
metadata?: {
|
||||||
textExtraction?: string; // Extracted text content
|
textExtraction?: string; // Extracted text (when available)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Best Practices
|
## 📚 How It Works
|
||||||
|
|
||||||
1. **Always start and stop**: Initialize with `start()` and cleanup with `stop()` to properly manage resources.
|
SmartPDF spins up a lightweight HTTP server (via `@push.rocks/smartserve`) bound to `localhost` and a headless Chromium browser. When you call a generation method:
|
||||||
2. **Port management**: Use the automatic port allocation feature to avoid conflicts when running multiple instances.
|
|
||||||
3. **Error handling**: Always implement proper error handling as PDF generation can fail due to various reasons.
|
1. Your HTML is registered internally and served at `http://localhost:{port}/{id}`
|
||||||
4. **Resource cleanup**: Ensure `stop()` is called even if an error occurs to prevent memory leaks.
|
2. Puppeteer navigates to that URL, waits for the page to fully render, and captures a PDF
|
||||||
5. **HTML optimization**: When creating PDFs from HTML, ensure your HTML is well-formed and CSS is embedded or inlined.
|
3. A header-based security check ensures only the correct content is captured
|
||||||
|
4. The server and browser are torn down when you call `stop()`
|
||||||
|
|
||||||
|
This architecture means you get **pixel-perfect CSS rendering**, **web font support**, and **full JavaScript execution** — the same rendering engine that powers Chrome.
|
||||||
|
|
||||||
|
## 🏗️ Instance Management
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const smartPdf = await SmartPdf.create();
|
||||||
|
await smartPdf.start();
|
||||||
|
|
||||||
|
// ... your operations ...
|
||||||
|
|
||||||
|
await smartPdf.stop();
|
||||||
|
```
|
||||||
|
|
||||||
|
For production use, wrap in try/finally:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const smartPdf = await SmartPdf.create();
|
||||||
|
try {
|
||||||
|
await smartPdf.start();
|
||||||
|
// ... generate PDFs ...
|
||||||
|
} finally {
|
||||||
|
await smartPdf.stop();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 🔌 Smart Port Allocation
|
||||||
|
|
||||||
|
Run multiple instances without conflicts:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Each instance auto-selects a free port (default range: 20000–30000)
|
||||||
|
const instance1 = new SmartPdf();
|
||||||
|
const instance2 = new SmartPdf();
|
||||||
|
await instance1.start(); // e.g. port 20000
|
||||||
|
await instance2.start(); // e.g. port 20001
|
||||||
|
|
||||||
|
console.log(instance1.serverPort); // 20000
|
||||||
|
console.log(instance2.serverPort); // 20001
|
||||||
|
|
||||||
|
// Custom range
|
||||||
|
const custom = new SmartPdf({ portRangeStart: 4000, portRangeEnd: 5000 });
|
||||||
|
|
||||||
|
// Or pin a specific port
|
||||||
|
const pinned = new SmartPdf({ port: 3000 });
|
||||||
|
```
|
||||||
|
|
||||||
|
If a specific port is already in use, `start()` throws an error immediately instead of silently failing.
|
||||||
|
|
||||||
|
### 🌐 Bring Your Own Browser
|
||||||
|
|
||||||
|
Pass an existing Puppeteer `Browser` instance — SmartPDF won't close it when you call `stop()`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: 'new',
|
||||||
|
args: ['--no-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const smartPdf = await SmartPdf.create();
|
||||||
|
await smartPdf.start(browser); // uses your browser
|
||||||
|
|
||||||
|
await smartPdf.stop(); // server stops, browser stays open
|
||||||
|
await browser.close(); // you manage browser lifecycle
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🎨 PDF Generation
|
||||||
|
|
||||||
|
### 📝 HTML → A4 PDF
|
||||||
|
|
||||||
|
Renders at a 794×1122 viewport (A4 at 96 DPI) with full CSS support:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
|
||||||
|
<style>
|
||||||
|
body { font-family: 'Helvetica', sans-serif; margin: 40px; }
|
||||||
|
.header {
|
||||||
|
background: linear-gradient(135deg, #667eea, #764ba2);
|
||||||
|
color: white; padding: 30px; border-radius: 10px; text-align: center;
|
||||||
|
}
|
||||||
|
table { width: 100%; border-collapse: collapse; margin-top: 20px; }
|
||||||
|
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
|
||||||
|
th { background: #f5f5f5; }
|
||||||
|
</style>
|
||||||
|
|
||||||
|
<div class="header">
|
||||||
|
<h1>Invoice #2024-001</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr><th>Item</th><th>Qty</th><th>Price</th></tr>
|
||||||
|
<tr><td>Widget Pro</td><td>5</td><td>$49.99</td></tr>
|
||||||
|
<tr><td>Gizmo Ultra</td><td>2</td><td>$129.99</td></tr>
|
||||||
|
</table>
|
||||||
|
`);
|
||||||
|
|
||||||
|
fs.writeFileSync('invoice.pdf', pdf.buffer);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 🌐 Website → PDF
|
||||||
|
|
||||||
|
Two methods depending on your needs:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Standard capture — uses the document's own dimensions
|
||||||
|
const pdf = await smartPdf.getPdfResultForWebsite('https://example.com');
|
||||||
|
|
||||||
|
// Full-page capture — scrolls to bottom, captures everything as a single page
|
||||||
|
const fullPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
||||||
|
```
|
||||||
|
|
||||||
|
`getPdfResultForWebsite` uses a 1980×1200 viewport and respects the page's own width/height. `getFullWebsiteAsSinglePdf` uses a 1920px-wide viewport and measures the full scroll height, producing a single tall page.
|
||||||
|
|
||||||
|
### 🔀 Merge Multiple PDFs
|
||||||
|
|
||||||
|
Combine any number of PDF buffers into one document using `pdf-lib`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const invoice = await smartPdf.readFileToPdfObject('./invoice.pdf');
|
||||||
|
const terms = await smartPdf.readFileToPdfObject('./terms.pdf');
|
||||||
|
const appendix = await smartPdf.getA4PdfResultForHtmlString('<h1>Appendix</h1>...');
|
||||||
|
|
||||||
|
const merged = await smartPdf.mergePdfs([
|
||||||
|
invoice.buffer,
|
||||||
|
terms.buffer,
|
||||||
|
appendix.buffer,
|
||||||
|
]);
|
||||||
|
|
||||||
|
fs.writeFileSync('complete-package.pdf', merged);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 📑 Read a PDF from Disk
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const pdfObject = await smartPdf.readFileToPdfObject('./document.pdf');
|
||||||
|
console.log(pdfObject.name); // "document.pdf"
|
||||||
|
console.log(pdfObject.buffer); // <Buffer ...>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 📖 Extract Text
|
||||||
|
|
||||||
|
Pull raw text from any PDF buffer:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const text = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
|
||||||
|
console.log(text);
|
||||||
|
```
|
||||||
|
|
||||||
|
> Uses [pdf2json](https://github.com/modesty/pdf2json) under the hood. Works best with text-based PDFs; scanned documents may return limited results.
|
||||||
|
|
||||||
|
## 🖼️ PDF → Image Conversion
|
||||||
|
|
||||||
|
Convert PDF pages to raster images using Puppeteer + PDF.js. Each page becomes a separate image buffer.
|
||||||
|
|
||||||
|
### PNG — Lossless Quality
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const pngPages = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
|
||||||
|
scale: SmartPdf.SCALE_HIGH, // 3.0 = ~216 DPI (default)
|
||||||
|
});
|
||||||
|
|
||||||
|
pngPages.forEach((png, i) => {
|
||||||
|
fs.writeFileSync(`page-${i + 1}.png`, Buffer.from(png));
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### WebP — Modern & Efficient
|
||||||
|
|
||||||
|
25–60% smaller than PNG at similar visual quality:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const webpPages = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||||
|
scale: 2.0, // ~144 DPI
|
||||||
|
quality: 90, // 0–100 (default: 85)
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### JPEG — Progressive Loading
|
||||||
|
|
||||||
|
Generates true progressive JPEGs (multi-pass rendering) via sharp:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const jpegPages = await smartPdf.convertPDFToJpegBytes(pdf.buffer, {
|
||||||
|
scale: SmartPdf.SCALE_HIGH,
|
||||||
|
quality: 85, // 0–100 (default: 85)
|
||||||
|
maxWidth: 1920, // optional dimension constraints
|
||||||
|
maxHeight: 1080,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### 📏 DPI & Scale Reference
|
||||||
|
|
||||||
|
All image methods accept a `scale` parameter. PDF.js renders at 72 DPI by default, so `scale` is a multiplier:
|
||||||
|
|
||||||
|
| Constant | Value | DPI | Use Case |
|
||||||
|
|----------|-------|-----|----------|
|
||||||
|
| `SmartPdf.SCALE_SCREEN` | 2.0 | ~144 | Web display, thumbnails |
|
||||||
|
| `SmartPdf.SCALE_HIGH` | 3.0 | ~216 | General purpose (default) |
|
||||||
|
| `SmartPdf.SCALE_PRINT` | 6.0 | ~432 | Print-quality output |
|
||||||
|
|
||||||
|
Or calculate a custom scale:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const scale = SmartPdf.getScaleForDPI(300); // → 4.167
|
||||||
|
```
|
||||||
|
|
||||||
|
### 🖼️ Dimension Constraints
|
||||||
|
|
||||||
|
All image methods support `maxWidth` and `maxHeight` to cap output size while preserving aspect ratio:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// High-res render, but capped at 800×1000 px
|
||||||
|
const constrained = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||||
|
scale: SmartPdf.SCALE_HIGH,
|
||||||
|
quality: 90,
|
||||||
|
maxWidth: 800,
|
||||||
|
maxHeight: 1000,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### 📊 Format Comparison
|
||||||
|
|
||||||
|
| Format | Typical Size vs PNG | Lossy? | Transparency | Progressive | Best For |
|
||||||
|
|--------|-------------------|--------|--------------|-------------|----------|
|
||||||
|
| **PNG** | baseline | No | ✅ | — | Screenshots, diagrams, text-heavy docs |
|
||||||
|
| **WebP** | 40–75% | Yes | ✅ | — | Modern web apps, thumbnails |
|
||||||
|
| **JPEG** | 50–70% | Yes | ❌ | ✅ | Photos, complex graphics, email |
|
||||||
|
|
||||||
|
## ⚡ Parallel Processing
|
||||||
|
|
||||||
|
Process multiple URLs concurrently with separate instances:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const urls = [
|
||||||
|
'https://example.com/page1',
|
||||||
|
'https://example.com/page2',
|
||||||
|
'https://example.com/page3',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Spin up parallel instances
|
||||||
|
const instances = await Promise.all(
|
||||||
|
urls.map(() => SmartPdf.create())
|
||||||
|
);
|
||||||
|
await Promise.all(instances.map(i => i.start()));
|
||||||
|
|
||||||
|
// Generate in parallel
|
||||||
|
const pdfs = await Promise.all(
|
||||||
|
urls.map((url, i) => instances[i].getFullWebsiteAsSinglePdf(url))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Merge all results
|
||||||
|
const merged = await instances[0].mergePdfs(pdfs.map(p => p.buffer));
|
||||||
|
fs.writeFileSync('all-pages.pdf', merged);
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
await Promise.all(instances.map(i => i.stop()));
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📝 Full API Reference
|
||||||
|
|
||||||
|
### `SmartPdf` Class
|
||||||
|
|
||||||
|
#### Static Properties
|
||||||
|
|
||||||
|
| Property | Type | Value | Description |
|
||||||
|
|----------|------|-------|-------------|
|
||||||
|
| `SCALE_SCREEN` | `number` | `2.0` | ~144 DPI scale factor |
|
||||||
|
| `SCALE_HIGH` | `number` | `3.0` | ~216 DPI scale factor (default) |
|
||||||
|
| `SCALE_PRINT` | `number` | `6.0` | ~432 DPI scale factor |
|
||||||
|
|
||||||
|
#### Static Methods
|
||||||
|
|
||||||
|
| Method | Returns | Description |
|
||||||
|
|--------|---------|-------------|
|
||||||
|
| `create(options?)` | `Promise<SmartPdf>` | Factory method to create an instance |
|
||||||
|
| `getScaleForDPI(dpi)` | `number` | Converts a DPI value to a scale factor (`dpi / 72`) |
|
||||||
|
|
||||||
|
#### Instance Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| `serverPort` | `number` | The port the internal HTTP server is listening on |
|
||||||
|
|
||||||
|
#### Instance Methods
|
||||||
|
|
||||||
|
| Method | Returns | Description |
|
||||||
|
|--------|---------|-------------|
|
||||||
|
| `start(browser?)` | `Promise<void>` | Starts internal server + browser. Optionally accepts an existing Puppeteer `Browser`. |
|
||||||
|
| `stop()` | `Promise<void>` | Shuts down server and browser (unless external browser was provided). |
|
||||||
|
| `getA4PdfResultForHtmlString(html)` | `Promise<IPdf>` | Renders HTML at 794×1122 viewport → A4 PDF |
|
||||||
|
| `getPdfResultForWebsite(url)` | `Promise<IPdf>` | Captures website at 1980×1200 viewport → PDF |
|
||||||
|
| `getFullWebsiteAsSinglePdf(url)` | `Promise<IPdf>` | Captures full scrollable page at 1920px wide → single-page PDF |
|
||||||
|
| `mergePdfs(buffers)` | `Promise<Uint8Array>` | Merges an array of PDF `Uint8Array` buffers |
|
||||||
|
| `readFileToPdfObject(path)` | `Promise<IPdf>` | Reads a PDF file from disk into an `IPdf` object |
|
||||||
|
| `extractTextFromPdfBuffer(buffer)` | `Promise<string>` | Extracts raw text from a PDF buffer |
|
||||||
|
| `convertPDFToPngBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a PNG buffer |
|
||||||
|
| `convertPDFToWebpBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a WebP buffer |
|
||||||
|
| `convertPDFToJpegBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a progressive JPEG buffer |
|
||||||
|
|
||||||
|
#### Image Conversion Options
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
{
|
||||||
|
scale?: number; // DPI multiplier (default: 3.0)
|
||||||
|
quality?: number; // 0–100, WebP/JPEG only (default: 85)
|
||||||
|
maxWidth?: number; // Max output width in pixels
|
||||||
|
maxHeight?: number; // Max output height in pixels
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### `ISmartPdfOptions` Interface
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
{
|
||||||
|
port?: number; // Use a specific port
|
||||||
|
portRangeStart?: number; // Auto-allocation range start (default: 20000)
|
||||||
|
portRangeEnd?: number; // Auto-allocation range end (default: 30000)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## License and Legal Information
|
## License and Legal Information
|
||||||
|
|
||||||
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository.
|
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
|
||||||
|
|
||||||
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
### Trademarks
|
### Trademarks
|
||||||
|
|
||||||
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.
|
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH or third parties, and are not included within the scope of the MIT license granted herein.
|
||||||
|
|
||||||
|
Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines or the guidelines of the respective third-party owners, and any usage must be approved in writing. Third-party trademarks used herein are the property of their respective owners and used only in a descriptive manner, e.g. for an implementation of an API or similar.
|
||||||
|
|
||||||
### Company Information
|
### Company Information
|
||||||
|
|
||||||
Task Venture Capital GmbH
|
Task Venture Capital GmbH
|
||||||
Registered at District court Bremen HRB 35230 HB, Germany
|
Registered at District Court Bremen HRB 35230 HB, Germany
|
||||||
|
|
||||||
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
|
For any legal inquiries or further information, please contact us via email at hello@task.vc.
|
||||||
|
|
||||||
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
|
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
|
||||||
|
|||||||
119
test/test.ts
119
test/test.ts
@@ -39,8 +39,8 @@ tap.test('should create PDFs from HTML string', async () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
tap.test('should create PDFs from websites', async () => {
|
tap.test('should create PDFs from websites', async () => {
|
||||||
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
|
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://example.com');
|
||||||
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
|
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
||||||
expect(pdfA4.buffer).toBeInstanceOf(Buffer);
|
expect(pdfA4.buffer).toBeInstanceOf(Buffer);
|
||||||
expect(pdfSingle.buffer).toBeInstanceOf(Buffer);
|
expect(pdfSingle.buffer).toBeInstanceOf(Buffer);
|
||||||
});
|
});
|
||||||
@@ -82,6 +82,13 @@ tap.test('should store PNG results from both conversion functions in .nogit/test
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
tap.test('should create a third PDF for image conversion tests', async () => {
|
||||||
|
const pdfResult = await testSmartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
||||||
|
expect(pdfResult.buffer).toBeInstanceOf(Buffer);
|
||||||
|
ensureDir('.nogit');
|
||||||
|
fs.writeFileSync(path.join('.nogit', '3.pdf'), pdfResult.buffer as Buffer);
|
||||||
|
});
|
||||||
|
|
||||||
tap.test('should create WebP preview images from PDF', async () => {
|
tap.test('should create WebP preview images from PDF', async () => {
|
||||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||||
const webpPreviews = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer);
|
const webpPreviews = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer);
|
||||||
@@ -174,8 +181,114 @@ tap.test('should verify WebP files are smaller than PNG', async () => {
|
|||||||
expect(totalWebpSize).toBeLessThan(totalPngSize);
|
expect(totalWebpSize).toBeLessThan(totalPngSize);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
tap.test('should create JPEG images from PDF', async () => {
|
||||||
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||||
|
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer);
|
||||||
|
expect(jpegImages.length).toBeGreaterThan(0);
|
||||||
|
console.log('JPEG image sizes:', jpegImages.map(img => img.length));
|
||||||
|
|
||||||
|
// Save the first page as JPEG
|
||||||
|
fs.writeFileSync(path.join(testResultsDir, 'jpeg_default_page1.jpg'), Buffer.from(jpegImages[0]));
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should create JPEG images with different quality levels', async () => {
|
||||||
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||||
|
|
||||||
|
// Test different quality levels
|
||||||
|
const qualityLevels = [50, 70, 85, 95];
|
||||||
|
|
||||||
|
for (const quality of qualityLevels) {
|
||||||
|
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||||
|
scale: smartpdf.SmartPdf.SCALE_HIGH,
|
||||||
|
quality: quality
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`JPEG quality ${quality}: ${jpegImages[0].length} bytes`);
|
||||||
|
|
||||||
|
// Save first page at each quality level
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(testResultsDir, `jpeg_quality_${quality}_page1.jpg`),
|
||||||
|
Buffer.from(jpegImages[0])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should create JPEG images with max dimensions', async () => {
|
||||||
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||||
|
|
||||||
|
// Create constrained JPEG images
|
||||||
|
const constrainedJpegs = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||||
|
scale: smartpdf.SmartPdf.SCALE_HIGH,
|
||||||
|
quality: 85,
|
||||||
|
maxWidth: 1200,
|
||||||
|
maxHeight: 1200
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(constrainedJpegs.length).toBeGreaterThan(0);
|
||||||
|
console.log('Constrained JPEG sizes:', constrainedJpegs.map(img => img.length));
|
||||||
|
|
||||||
|
// Save constrained JPEG
|
||||||
|
fs.writeFileSync(path.join(testResultsDir, 'jpeg_constrained_page1.jpg'), Buffer.from(constrainedJpegs[0]));
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should compare file sizes between PNG, WebP, and JPEG', async () => {
|
||||||
|
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||||
|
|
||||||
|
// Generate all three formats at the same scale
|
||||||
|
const comparisonScale = smartpdf.SmartPdf.SCALE_HIGH; // 3.0 scale
|
||||||
|
|
||||||
|
const pngImages = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer, {
|
||||||
|
scale: comparisonScale
|
||||||
|
});
|
||||||
|
const webpImages = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
|
||||||
|
scale: comparisonScale,
|
||||||
|
quality: 85
|
||||||
|
});
|
||||||
|
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||||
|
scale: comparisonScale,
|
||||||
|
quality: 85
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(pngImages.length).toEqual(webpImages.length);
|
||||||
|
expect(pngImages.length).toEqual(jpegImages.length);
|
||||||
|
|
||||||
|
// Compare sizes
|
||||||
|
let totalPngSize = 0;
|
||||||
|
let totalWebpSize = 0;
|
||||||
|
let totalJpegSize = 0;
|
||||||
|
|
||||||
|
pngImages.forEach((png, index) => {
|
||||||
|
const pngSize = png.length;
|
||||||
|
const webpSize = webpImages[index].length;
|
||||||
|
const jpegSize = jpegImages[index].length;
|
||||||
|
|
||||||
|
totalPngSize += pngSize;
|
||||||
|
totalWebpSize += webpSize;
|
||||||
|
totalJpegSize += jpegSize;
|
||||||
|
|
||||||
|
const webpReduction = ((pngSize - webpSize) / pngSize * 100).toFixed(1);
|
||||||
|
const jpegReduction = ((pngSize - jpegSize) / pngSize * 100).toFixed(1);
|
||||||
|
|
||||||
|
console.log(`Page ${index + 1}:`);
|
||||||
|
console.log(` PNG: ${pngSize} bytes`);
|
||||||
|
console.log(` WebP: ${webpSize} bytes (${webpReduction}% smaller than PNG)`);
|
||||||
|
console.log(` JPEG: ${jpegSize} bytes (${jpegReduction}% smaller than PNG)`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const totalWebpReduction = ((totalPngSize - totalWebpSize) / totalPngSize * 100).toFixed(1);
|
||||||
|
const totalJpegReduction = ((totalPngSize - totalJpegSize) / totalPngSize * 100).toFixed(1);
|
||||||
|
|
||||||
|
console.log('\nTotal size comparison:');
|
||||||
|
console.log(`PNG: ${totalPngSize} bytes`);
|
||||||
|
console.log(`WebP: ${totalWebpSize} bytes (${totalWebpReduction}% reduction)`);
|
||||||
|
console.log(`JPEG: ${totalJpegSize} bytes (${totalJpegReduction}% reduction)`);
|
||||||
|
|
||||||
|
// WebP should be smaller than PNG; JPEG may not be for simple graphics pages
|
||||||
|
expect(totalWebpSize).toBeLessThan(totalPngSize);
|
||||||
|
});
|
||||||
|
|
||||||
tap.test('should close the SmartPdf instance properly', async () => {
|
tap.test('should close the SmartPdf instance properly', async () => {
|
||||||
await testSmartPdf.stop();
|
await testSmartPdf.stop();
|
||||||
});
|
});
|
||||||
|
|
||||||
tap.start();
|
export default tap.start();
|
||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartpdf',
|
name: '@push.rocks/smartpdf',
|
||||||
version: '3.2.2',
|
version: '4.2.0',
|
||||||
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
import * as plugins from './smartpdf.plugins.js';
|
import * as plugins from './smartpdf.plugins.js';
|
||||||
import * as paths from './smartpdf.paths.js';
|
import * as paths from './smartpdf.paths.js';
|
||||||
import { Server } from 'http';
|
|
||||||
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
|
||||||
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
|
||||||
import { execFile } from 'child_process';
|
|
||||||
|
|
||||||
declare const document: any;
|
declare const document: any;
|
||||||
|
|
||||||
export interface ISmartPdfOptions {
|
export interface ISmartPdfOptions {
|
||||||
@@ -18,7 +15,7 @@ export class SmartPdf {
|
|||||||
public static readonly SCALE_SCREEN = 2.0; // ~144 DPI - Good for screen display
|
public static readonly SCALE_SCREEN = 2.0; // ~144 DPI - Good for screen display
|
||||||
public static readonly SCALE_HIGH = 3.0; // ~216 DPI - High quality (default)
|
public static readonly SCALE_HIGH = 3.0; // ~216 DPI - High quality (default)
|
||||||
public static readonly SCALE_PRINT = 6.0; // ~432 DPI - Print quality
|
public static readonly SCALE_PRINT = 6.0; // ~432 DPI - Print quality
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate scale factor for desired DPI
|
* Calculate scale factor for desired DPI
|
||||||
* PDF.js default is 72 DPI, so scale = desiredDPI / 72
|
* PDF.js default is 72 DPI, so scale = desiredDPI / 72
|
||||||
@@ -26,7 +23,7 @@ export class SmartPdf {
|
|||||||
public static getScaleForDPI(dpi: number): number {
|
public static getScaleForDPI(dpi: number): number {
|
||||||
return dpi / 72;
|
return dpi / 72;
|
||||||
}
|
}
|
||||||
|
|
||||||
// STATIC
|
// STATIC
|
||||||
public static async create(optionsArg?: ISmartPdfOptions) {
|
public static async create(optionsArg?: ISmartPdfOptions) {
|
||||||
const smartpdfInstance = new SmartPdf(optionsArg);
|
const smartpdfInstance = new SmartPdf(optionsArg);
|
||||||
@@ -34,13 +31,14 @@ export class SmartPdf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// INSTANCE
|
// INSTANCE
|
||||||
htmlServerInstance: Server;
|
private smartserveInstance: plugins.smartserve.SmartServe;
|
||||||
serverPort: number;
|
serverPort: number;
|
||||||
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser;
|
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser;
|
||||||
externalBrowserBool: boolean = false;
|
externalBrowserBool: boolean = false;
|
||||||
private _readyDeferred: plugins.smartpromise.Deferred<void>;
|
private _readyDeferred: plugins.smartpromise.Deferred<void>;
|
||||||
private _candidates: { [key: string]: PdfCandidate } = {};
|
private _candidates: { [key: string]: PdfCandidate } = {};
|
||||||
private _options: ISmartPdfOptions;
|
private _options: ISmartPdfOptions;
|
||||||
|
private _isRunning: boolean = false;
|
||||||
|
|
||||||
constructor(optionsArg?: ISmartPdfOptions) {
|
constructor(optionsArg?: ISmartPdfOptions) {
|
||||||
this._readyDeferred = new plugins.smartpromise.Deferred();
|
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||||
@@ -52,7 +50,13 @@ export class SmartPdf {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) {
|
async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) {
|
||||||
const done = plugins.smartpromise.defer();
|
if (this._isRunning) {
|
||||||
|
throw new Error('SmartPdf is already running. Call stop() before starting again.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset readiness deferred for this start cycle
|
||||||
|
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||||
|
|
||||||
// lets set the external browser in case one is provided
|
// lets set the external browser in case one is provided
|
||||||
this.headlessBrowser = headlessBrowserArg;
|
this.headlessBrowser = headlessBrowserArg;
|
||||||
// setup puppeteer
|
// setup puppeteer
|
||||||
@@ -66,7 +70,7 @@ export class SmartPdf {
|
|||||||
|
|
||||||
// Find an available port BEFORE creating server
|
// Find an available port BEFORE creating server
|
||||||
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
|
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
|
||||||
|
|
||||||
if (this._options.port) {
|
if (this._options.port) {
|
||||||
// If a specific port is requested, check if it's available
|
// If a specific port is requested, check if it's available
|
||||||
const isPortAvailable = await smartnetworkInstance.isLocalPortUnused(this._options.port);
|
const isPortAvailable = await smartnetworkInstance.isLocalPortUnused(this._options.port);
|
||||||
@@ -76,6 +80,7 @@ export class SmartPdf {
|
|||||||
// Clean up browser if we created one
|
// Clean up browser if we created one
|
||||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||||
await this.headlessBrowser.close();
|
await this.headlessBrowser.close();
|
||||||
|
this.headlessBrowser = null;
|
||||||
}
|
}
|
||||||
throw new Error(`Requested port ${this._options.port} is already in use`);
|
throw new Error(`Requested port ${this._options.port} is already in use`);
|
||||||
}
|
}
|
||||||
@@ -89,45 +94,62 @@ export class SmartPdf {
|
|||||||
// Clean up browser if we created one
|
// Clean up browser if we created one
|
||||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||||
await this.headlessBrowser.close();
|
await this.headlessBrowser.close();
|
||||||
|
this.headlessBrowser = null;
|
||||||
}
|
}
|
||||||
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
|
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now setup server after we know we have a valid port
|
// Now setup server using smartserve
|
||||||
const app = plugins.express();
|
this.smartserveInstance = new plugins.smartserve.SmartServe({
|
||||||
app.get('/:pdfId', (req, res) => {
|
port: this.serverPort,
|
||||||
const wantedCandidate = this._candidates[req.params.pdfId];
|
hostname: 'localhost',
|
||||||
if (!wantedCandidate) {
|
});
|
||||||
console.log(`${req.url} not attached to a candidate`);
|
|
||||||
return;
|
this.smartserveInstance.setHandler(async (request) => {
|
||||||
|
const url = new URL(request.url);
|
||||||
|
const pdfId = url.pathname.slice(1); // Remove leading /
|
||||||
|
const candidate = this._candidates[pdfId];
|
||||||
|
if (!candidate) {
|
||||||
|
console.log(`${url.pathname} not attached to a candidate`);
|
||||||
|
return new Response('Not found', { status: 404 });
|
||||||
}
|
}
|
||||||
res.setHeader('pdf-id', wantedCandidate.pdfId);
|
return new Response(candidate.htmlString, {
|
||||||
res.send(wantedCandidate.htmlString);
|
headers: {
|
||||||
|
'Content-Type': 'text/html; charset=utf-8',
|
||||||
|
'pdf-id': candidate.pdfId,
|
||||||
|
},
|
||||||
|
});
|
||||||
});
|
});
|
||||||
this.htmlServerInstance = plugins.http.createServer(app);
|
|
||||||
|
await this.smartserveInstance.start();
|
||||||
this.htmlServerInstance.listen(this.serverPort, 'localhost');
|
console.log(`SmartPdf server listening on port ${this.serverPort}`);
|
||||||
this.htmlServerInstance.on('listening', () => {
|
this._isRunning = true;
|
||||||
console.log(`SmartPdf server listening on port ${this.serverPort}`);
|
this._readyDeferred.resolve();
|
||||||
this._readyDeferred.resolve();
|
|
||||||
done.resolve();
|
|
||||||
});
|
|
||||||
await done.promise;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// stop
|
// stop
|
||||||
async stop() {
|
async stop() {
|
||||||
const done = plugins.smartpromise.defer<void>();
|
if (!this._isRunning) {
|
||||||
this.htmlServerInstance.close(() => {
|
return;
|
||||||
done.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!this.externalBrowserBool) {
|
|
||||||
await this.headlessBrowser.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await done.promise;
|
this._isRunning = false;
|
||||||
|
|
||||||
|
// Close browser first to cleanly terminate keepalive connections
|
||||||
|
// before the server shuts down (prevents ECONNRESET errors)
|
||||||
|
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||||
|
await this.headlessBrowser.close();
|
||||||
|
}
|
||||||
|
this.headlessBrowser = null;
|
||||||
|
|
||||||
|
if (this.smartserveInstance) {
|
||||||
|
await this.smartserveInstance.stop();
|
||||||
|
this.smartserveInstance = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear any remaining candidates
|
||||||
|
this._candidates = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -137,124 +159,144 @@ export class SmartPdf {
|
|||||||
await this._readyDeferred.promise;
|
await this._readyDeferred.promise;
|
||||||
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
||||||
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
||||||
const page = await this.headlessBrowser.newPage();
|
let page: plugins.smartpuppeteer.puppeteer.Page;
|
||||||
await page.setViewport({
|
try {
|
||||||
width: 794,
|
page = await this.headlessBrowser.newPage();
|
||||||
height: 1122,
|
await page.setViewport({
|
||||||
});
|
width: 794,
|
||||||
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
|
height: 1122,
|
||||||
waitUntil: 'networkidle2',
|
});
|
||||||
});
|
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
|
||||||
const headers = response.headers();
|
waitUntil: 'networkidle2',
|
||||||
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
});
|
||||||
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
const headers = response.headers();
|
||||||
return;
|
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
||||||
} else {
|
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
||||||
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
|
return;
|
||||||
}
|
} else {
|
||||||
|
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
|
||||||
|
}
|
||||||
|
|
||||||
const pdfBuffer = await page.pdf({
|
const pdfBuffer = await page.pdf({
|
||||||
width: 794,
|
width: 794,
|
||||||
height: 1122,
|
height: 1122,
|
||||||
printBackground: true,
|
printBackground: true,
|
||||||
displayHeaderFooter: false,
|
displayHeaderFooter: false,
|
||||||
});
|
});
|
||||||
// Convert Uint8Array to Node Buffer
|
// Convert Uint8Array to Node Buffer
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
await page.close();
|
await page.close();
|
||||||
delete this._candidates[pdfCandidate.pdfId];
|
delete this._candidates[pdfCandidate.pdfId];
|
||||||
pdfCandidate.doneDeferred.resolve();
|
pdfCandidate.doneDeferred.resolve();
|
||||||
await pdfCandidate.doneDeferred.promise;
|
await pdfCandidate.doneDeferred.promise;
|
||||||
return {
|
return {
|
||||||
id: pdfCandidate.pdfId,
|
id: pdfCandidate.pdfId,
|
||||||
name: `${pdfCandidate.pdfId}.js`,
|
name: `${pdfCandidate.pdfId}.js`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
},
|
},
|
||||||
buffer: nodePdfBuffer,
|
buffer: nodePdfBuffer,
|
||||||
};
|
};
|
||||||
|
} catch (err) {
|
||||||
|
// Clean up candidate on error
|
||||||
|
delete this._candidates[pdfCandidate.pdfId];
|
||||||
|
if (page) {
|
||||||
|
await page.close().catch(() => {});
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async getPdfResultForWebsite(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
async getPdfResultForWebsite(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
await page.setViewport({
|
try {
|
||||||
width: 1980,
|
await page.setViewport({
|
||||||
height: 1200,
|
width: 1980,
|
||||||
});
|
height: 1200,
|
||||||
await page.emulateMediaType('screen');
|
});
|
||||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
await page.emulateMediaType('screen');
|
||||||
const pdfId = plugins.smartunique.shortId();
|
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
const pdfId = plugins.smartunique.shortId();
|
||||||
|
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||||
|
return {
|
||||||
|
documentHeight: document.height,
|
||||||
|
documentWidth: document.width,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
const pdfBuffer = await page.pdf({
|
||||||
|
height: documentHeight,
|
||||||
|
width: documentWidth,
|
||||||
|
printBackground: true,
|
||||||
|
displayHeaderFooter: false,
|
||||||
|
});
|
||||||
|
// Convert Uint8Array to Node Buffer
|
||||||
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
|
await page.close();
|
||||||
return {
|
return {
|
||||||
documentHeight: document.height,
|
id: pdfId,
|
||||||
documentWidth: document.width,
|
name: `${pdfId}.js`,
|
||||||
|
metadata: {
|
||||||
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
|
},
|
||||||
|
buffer: nodePdfBuffer,
|
||||||
};
|
};
|
||||||
});
|
} catch (err) {
|
||||||
const pdfBuffer = await page.pdf({
|
await page.close().catch(() => {});
|
||||||
height: documentHeight,
|
throw err;
|
||||||
width: documentWidth,
|
}
|
||||||
printBackground: true,
|
|
||||||
displayHeaderFooter: false,
|
|
||||||
});
|
|
||||||
// Convert Uint8Array to Node Buffer
|
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
|
||||||
await page.close();
|
|
||||||
return {
|
|
||||||
id: pdfId,
|
|
||||||
name: `${pdfId}.js`,
|
|
||||||
metadata: {
|
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
|
||||||
},
|
|
||||||
buffer: nodePdfBuffer,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
await page.setViewport({
|
try {
|
||||||
width: 1920,
|
await page.setViewport({
|
||||||
height: 1200,
|
width: 1920,
|
||||||
});
|
height: 1200,
|
||||||
await page.emulateMediaType('screen');
|
});
|
||||||
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
await page.emulateMediaType('screen');
|
||||||
const pdfId = plugins.smartunique.shortId();
|
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
|
||||||
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
const pdfId = plugins.smartunique.shortId();
|
||||||
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
// Use both document.body and document.documentElement to ensure we have a valid height and width.
|
||||||
|
const { documentHeight, documentWidth } = await page.evaluate(() => {
|
||||||
|
return {
|
||||||
|
documentHeight: Math.max(
|
||||||
|
document.body.scrollHeight,
|
||||||
|
document.documentElement.scrollHeight
|
||||||
|
) || 1200,
|
||||||
|
documentWidth: Math.max(
|
||||||
|
document.body.clientWidth,
|
||||||
|
document.documentElement.clientWidth
|
||||||
|
) || 1920,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
// Update viewport height to the full document height.
|
||||||
|
await page.setViewport({
|
||||||
|
width: 1920,
|
||||||
|
height: documentHeight,
|
||||||
|
});
|
||||||
|
const pdfBuffer = await page.pdf({
|
||||||
|
height: documentHeight,
|
||||||
|
width: 1920,
|
||||||
|
printBackground: true,
|
||||||
|
displayHeaderFooter: false,
|
||||||
|
scale: 1,
|
||||||
|
pageRanges: '1',
|
||||||
|
});
|
||||||
|
// Convert Uint8Array to Node Buffer
|
||||||
|
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
||||||
|
await page.close();
|
||||||
return {
|
return {
|
||||||
documentHeight: Math.max(
|
id: pdfId,
|
||||||
document.body.scrollHeight,
|
name: `${pdfId}.js`,
|
||||||
document.documentElement.scrollHeight
|
metadata: {
|
||||||
) || 1200,
|
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
||||||
documentWidth: Math.max(
|
},
|
||||||
document.body.clientWidth,
|
buffer: nodePdfBuffer,
|
||||||
document.documentElement.clientWidth
|
|
||||||
) || 1920,
|
|
||||||
};
|
};
|
||||||
});
|
} catch (err) {
|
||||||
// Update viewport height to the full document height.
|
await page.close().catch(() => {});
|
||||||
await page.setViewport({
|
throw err;
|
||||||
width: 1920,
|
}
|
||||||
height: documentHeight,
|
|
||||||
});
|
|
||||||
const pdfBuffer = await page.pdf({
|
|
||||||
height: documentHeight,
|
|
||||||
width: 1920,
|
|
||||||
printBackground: true,
|
|
||||||
displayHeaderFooter: false,
|
|
||||||
scale: 1,
|
|
||||||
pageRanges: '1',
|
|
||||||
});
|
|
||||||
// Convert Uint8Array to Node Buffer
|
|
||||||
const nodePdfBuffer = Buffer.from(pdfBuffer);
|
|
||||||
await page.close();
|
|
||||||
return {
|
|
||||||
id: pdfId,
|
|
||||||
name: `${pdfId}.js`,
|
|
||||||
metadata: {
|
|
||||||
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
|
|
||||||
},
|
|
||||||
buffer: nodePdfBuffer,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise<Uint8Array> {
|
public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise<Uint8Array> {
|
||||||
@@ -272,7 +314,9 @@ export class SmartPdf {
|
|||||||
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
|
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
|
||||||
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
|
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
|
||||||
const parsedPath = plugins.path.parse(absolutePath);
|
const parsedPath = plugins.path.parse(absolutePath);
|
||||||
const buffer = await plugins.smartfile.fs.toBuffer(absolutePath);
|
const smartfsInstance = new plugins.smartfs.SmartFs(new plugins.smartfs.SmartFsProviderNode());
|
||||||
|
const fileContent = await smartfsInstance.file(absolutePath).read();
|
||||||
|
const buffer = Buffer.from(fileContent);
|
||||||
return {
|
return {
|
||||||
name: parsedPath.base,
|
name: parsedPath.base,
|
||||||
buffer,
|
buffer,
|
||||||
@@ -299,31 +343,6 @@ export class SmartPdf {
|
|||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks for the presence of required dependencies: GraphicsMagick and Ghostscript.
|
|
||||||
*/
|
|
||||||
private async checkDependencies(): Promise<void> {
|
|
||||||
await Promise.all([
|
|
||||||
this.checkCommandExists('gm', ['version']),
|
|
||||||
this.checkCommandExists('gs', ['--version']),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a given command exists by trying to execute it.
|
|
||||||
*/
|
|
||||||
private checkCommandExists(command: string, args: string[]): Promise<void> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
execFile(command, args, (error, stdout, stderr) => {
|
|
||||||
if (error) {
|
|
||||||
reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`));
|
|
||||||
} else {
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
|
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
|
||||||
* This method creates a temporary HTML page that loads PDF.js from a CDN,
|
* This method creates a temporary HTML page that loads PDF.js from a CDN,
|
||||||
@@ -343,89 +362,94 @@ export class SmartPdf {
|
|||||||
// Create a new page using the headless browser.
|
// Create a new page using the headless browser.
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
|
|
||||||
// Prepare PDF data as a base64 string.
|
try {
|
||||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
// Prepare PDF data as a base64 string.
|
||||||
|
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||||
|
|
||||||
// HTML template that loads PDF.js and renders the PDF.
|
// HTML template that loads PDF.js and renders the PDF.
|
||||||
const htmlTemplate: string = `
|
const htmlTemplate: string = `
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>PDF to PNG Converter</title>
|
<title>PDF to PNG Converter</title>
|
||||||
<style>
|
<style>
|
||||||
body { margin: 0; }
|
body { margin: 0; }
|
||||||
canvas { display: block; margin: 10px auto; }
|
canvas { display: block; margin: 10px auto; }
|
||||||
</style>
|
</style>
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<script>
|
<script>
|
||||||
(async function() {
|
(async function() {
|
||||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||||
const pdfData = "__PDF_DATA__";
|
const pdfData = "__PDF_DATA__";
|
||||||
const raw = atob(pdfData);
|
const raw = atob(pdfData);
|
||||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||||
const pdf = await loadingTask.promise;
|
const pdf = await loadingTask.promise;
|
||||||
const numPages = pdf.numPages;
|
const numPages = pdf.numPages;
|
||||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||||
const page = await pdf.getPage(pageNum);
|
const page = await pdf.getPage(pageNum);
|
||||||
// Apply scale factor to viewport
|
// Apply scale factor to viewport
|
||||||
const viewport = page.getViewport({ scale: ${scale} });
|
const viewport = page.getViewport({ scale: ${scale} });
|
||||||
|
|
||||||
// Apply max width/height constraints if specified
|
|
||||||
let finalScale = ${scale};
|
|
||||||
${options.maxWidth ? `
|
|
||||||
if (viewport.width > ${options.maxWidth}) {
|
|
||||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
|
||||||
}` : ''}
|
|
||||||
${options.maxHeight ? `
|
|
||||||
if (viewport.height > ${options.maxHeight}) {
|
|
||||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
|
||||||
finalScale = Math.min(finalScale, heightScale);
|
|
||||||
}` : ''}
|
|
||||||
|
|
||||||
// Get final viewport with adjusted scale
|
|
||||||
const finalViewport = page.getViewport({ scale: finalScale });
|
|
||||||
|
|
||||||
const canvas = document.createElement('canvas');
|
|
||||||
const context = canvas.getContext('2d');
|
|
||||||
canvas.width = finalViewport.width;
|
|
||||||
canvas.height = finalViewport.height;
|
|
||||||
canvas.setAttribute('data-page', pageNum);
|
|
||||||
|
|
||||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
|
||||||
document.body.appendChild(canvas);
|
|
||||||
}
|
|
||||||
window.renderComplete = true;
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`;
|
|
||||||
|
|
||||||
// Replace the placeholder with the actual base64 PDF data.
|
// Apply max width/height constraints if specified
|
||||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
let finalScale = ${scale};
|
||||||
|
${options.maxWidth ? `
|
||||||
|
if (viewport.width > ${options.maxWidth}) {
|
||||||
|
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||||
|
}` : ''}
|
||||||
|
${options.maxHeight ? `
|
||||||
|
if (viewport.height > ${options.maxHeight}) {
|
||||||
|
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||||
|
finalScale = Math.min(finalScale, heightScale);
|
||||||
|
}` : ''}
|
||||||
|
|
||||||
// Set the page content.
|
// Get final viewport with adjusted scale
|
||||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
const finalViewport = page.getViewport({ scale: finalScale });
|
||||||
|
|
||||||
// Wait until the PDF.js rendering is complete.
|
const canvas = document.createElement('canvas');
|
||||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
const context = canvas.getContext('2d');
|
||||||
|
canvas.width = finalViewport.width;
|
||||||
|
canvas.height = finalViewport.height;
|
||||||
|
canvas.setAttribute('data-page', pageNum);
|
||||||
|
|
||||||
// Query all canvas elements (each representing a rendered PDF page).
|
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||||
const canvasElements = await page.$$('canvas');
|
document.body.appendChild(canvas);
|
||||||
const pngBuffers: Uint8Array[] = [];
|
}
|
||||||
|
window.renderComplete = true;
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
for (const canvasElement of canvasElements) {
|
// Replace the placeholder with the actual base64 PDF data.
|
||||||
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||||
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
|
||||||
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
// Set the page content.
|
||||||
|
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||||
|
|
||||||
|
// Wait until the PDF.js rendering is complete.
|
||||||
|
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||||
|
|
||||||
|
// Query all canvas elements (each representing a rendered PDF page).
|
||||||
|
const canvasElements = await page.$$('canvas');
|
||||||
|
const pngBuffers: Uint8Array[] = [];
|
||||||
|
|
||||||
|
for (const canvasElement of canvasElements) {
|
||||||
|
// Screenshot the canvas element. The screenshot will be a PNG buffer.
|
||||||
|
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
|
||||||
|
pngBuffers.push(new Uint8Array(screenshotBuffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.close();
|
||||||
|
return pngBuffers;
|
||||||
|
} catch (err) {
|
||||||
|
await page.close().catch(() => {});
|
||||||
|
throw err;
|
||||||
}
|
}
|
||||||
|
|
||||||
await page.close();
|
|
||||||
return pngBuffers;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -449,93 +473,144 @@ export class SmartPdf {
|
|||||||
// Create a new page using the headless browser
|
// Create a new page using the headless browser
|
||||||
const page = await this.headlessBrowser.newPage();
|
const page = await this.headlessBrowser.newPage();
|
||||||
|
|
||||||
// Prepare PDF data as a base64 string
|
try {
|
||||||
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
// Prepare PDF data as a base64 string
|
||||||
|
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
|
||||||
|
|
||||||
// HTML template that loads PDF.js and renders the PDF with scaling
|
// HTML template that loads PDF.js and renders the PDF with scaling
|
||||||
const htmlTemplate: string = `
|
const htmlTemplate: string = `
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>PDF to WebP Preview Converter</title>
|
<title>PDF to WebP Preview Converter</title>
|
||||||
<style>
|
<style>
|
||||||
body { margin: 0; }
|
body { margin: 0; }
|
||||||
canvas { display: block; margin: 10px auto; }
|
canvas { display: block; margin: 10px auto; }
|
||||||
</style>
|
</style>
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<script>
|
<script>
|
||||||
(async function() {
|
(async function() {
|
||||||
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
|
||||||
const pdfData = "__PDF_DATA__";
|
const pdfData = "__PDF_DATA__";
|
||||||
const raw = atob(pdfData);
|
const raw = atob(pdfData);
|
||||||
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
|
||||||
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
|
||||||
const pdf = await loadingTask.promise;
|
const pdf = await loadingTask.promise;
|
||||||
const numPages = pdf.numPages;
|
const numPages = pdf.numPages;
|
||||||
|
|
||||||
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
||||||
const page = await pdf.getPage(pageNum);
|
|
||||||
// Apply scale factor to viewport
|
|
||||||
const viewport = page.getViewport({ scale: ${scale} });
|
|
||||||
|
|
||||||
// Apply max width/height constraints if specified
|
|
||||||
let finalScale = ${scale};
|
|
||||||
${options.maxWidth ? `
|
|
||||||
if (viewport.width > ${options.maxWidth}) {
|
|
||||||
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
|
||||||
}` : ''}
|
|
||||||
${options.maxHeight ? `
|
|
||||||
if (viewport.height > ${options.maxHeight}) {
|
|
||||||
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
|
||||||
finalScale = Math.min(finalScale, heightScale);
|
|
||||||
}` : ''}
|
|
||||||
|
|
||||||
// Get final viewport with adjusted scale
|
|
||||||
const finalViewport = page.getViewport({ scale: finalScale });
|
|
||||||
|
|
||||||
const canvas = document.createElement('canvas');
|
|
||||||
const context = canvas.getContext('2d');
|
|
||||||
canvas.width = finalViewport.width;
|
|
||||||
canvas.height = finalViewport.height;
|
|
||||||
canvas.setAttribute('data-page', pageNum);
|
|
||||||
|
|
||||||
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
|
||||||
document.body.appendChild(canvas);
|
|
||||||
}
|
|
||||||
window.renderComplete = true;
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`;
|
|
||||||
|
|
||||||
// Replace the placeholder with the actual base64 PDF data
|
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
||||||
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
const page = await pdf.getPage(pageNum);
|
||||||
|
// Apply scale factor to viewport
|
||||||
|
const viewport = page.getViewport({ scale: ${scale} });
|
||||||
|
|
||||||
// Set the page content
|
// Apply max width/height constraints if specified
|
||||||
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
let finalScale = ${scale};
|
||||||
|
${options.maxWidth ? `
|
||||||
|
if (viewport.width > ${options.maxWidth}) {
|
||||||
|
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
|
||||||
|
}` : ''}
|
||||||
|
${options.maxHeight ? `
|
||||||
|
if (viewport.height > ${options.maxHeight}) {
|
||||||
|
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
|
||||||
|
finalScale = Math.min(finalScale, heightScale);
|
||||||
|
}` : ''}
|
||||||
|
|
||||||
// Wait until the PDF.js rendering is complete
|
// Get final viewport with adjusted scale
|
||||||
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
const finalViewport = page.getViewport({ scale: finalScale });
|
||||||
|
|
||||||
// Query all canvas elements (each representing a rendered PDF page)
|
const canvas = document.createElement('canvas');
|
||||||
const canvasElements = await page.$$('canvas');
|
const context = canvas.getContext('2d');
|
||||||
const webpBuffers: Uint8Array[] = [];
|
canvas.width = finalViewport.width;
|
||||||
|
canvas.height = finalViewport.height;
|
||||||
|
canvas.setAttribute('data-page', pageNum);
|
||||||
|
|
||||||
for (const canvasElement of canvasElements) {
|
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
|
||||||
// Screenshot the canvas element as WebP
|
document.body.appendChild(canvas);
|
||||||
const screenshotBuffer = (await canvasElement.screenshot({
|
}
|
||||||
type: 'webp',
|
window.renderComplete = true;
|
||||||
quality: quality,
|
})();
|
||||||
encoding: 'binary'
|
</script>
|
||||||
})) as Buffer;
|
</body>
|
||||||
webpBuffers.push(new Uint8Array(screenshotBuffer));
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Replace the placeholder with the actual base64 PDF data
|
||||||
|
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
|
||||||
|
|
||||||
|
// Set the page content
|
||||||
|
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
|
||||||
|
|
||||||
|
// Wait until the PDF.js rendering is complete
|
||||||
|
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
|
||||||
|
|
||||||
|
// Query all canvas elements (each representing a rendered PDF page)
|
||||||
|
const canvasElements = await page.$$('canvas');
|
||||||
|
const webpBuffers: Uint8Array[] = [];
|
||||||
|
|
||||||
|
for (const canvasElement of canvasElements) {
|
||||||
|
// Screenshot the canvas element as WebP
|
||||||
|
const screenshotBuffer = (await canvasElement.screenshot({
|
||||||
|
type: 'webp',
|
||||||
|
quality: quality,
|
||||||
|
encoding: 'binary'
|
||||||
|
})) as Buffer;
|
||||||
|
webpBuffers.push(new Uint8Array(screenshotBuffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.close();
|
||||||
|
return webpBuffers;
|
||||||
|
} catch (err) {
|
||||||
|
await page.close().catch(() => {});
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a PDF to progressive JPEG bytes for each page.
|
||||||
|
* This method creates progressive JPEG images that load in multiple passes,
|
||||||
|
* showing a low-quality preview first, then progressively improving.
|
||||||
|
* Uses SmartJimp for true progressive JPEG encoding.
|
||||||
|
*/
|
||||||
|
public async convertPDFToJpegBytes(
|
||||||
|
pdfBytes: Uint8Array,
|
||||||
|
options: {
|
||||||
|
scale?: number; // Scale factor for output size (default: 3.0 for 216 DPI)
|
||||||
|
quality?: number; // JPEG quality 0-100 (default: 85)
|
||||||
|
maxWidth?: number; // Maximum width in pixels (optional)
|
||||||
|
maxHeight?: number; // Maximum height in pixels (optional)
|
||||||
|
} = {}
|
||||||
|
): Promise<Uint8Array[]> {
|
||||||
|
// First, convert PDF to PNG using our existing method
|
||||||
|
const pngBuffers = await this.convertPDFToPngBytes(pdfBytes, {
|
||||||
|
scale: options.scale,
|
||||||
|
maxWidth: options.maxWidth,
|
||||||
|
maxHeight: options.maxHeight
|
||||||
|
});
|
||||||
|
|
||||||
|
// Initialize SmartJimp in sharp mode for progressive JPEG support
|
||||||
|
const smartJimpInstance = new plugins.smartjimp.SmartJimp({ mode: 'sharp' });
|
||||||
|
|
||||||
|
// Convert each PNG to progressive JPEG
|
||||||
|
const jpegBuffers: Uint8Array[] = [];
|
||||||
|
const quality = options.quality || 85;
|
||||||
|
|
||||||
|
for (const pngBuffer of pngBuffers) {
|
||||||
|
// Convert PNG buffer to progressive JPEG
|
||||||
|
const jpegBuffer = await smartJimpInstance.computeAssetVariation(
|
||||||
|
Buffer.from(pngBuffer),
|
||||||
|
{
|
||||||
|
format: 'jpeg',
|
||||||
|
progressive: true,
|
||||||
|
quality
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
jpegBuffers.push(new Uint8Array(jpegBuffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
await page.close();
|
return jpegBuffers;
|
||||||
return webpBuffers;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,28 +1,31 @@
|
|||||||
// native
|
// native
|
||||||
import * as http from 'http';
|
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
|
||||||
export { http, path };
|
export { path };
|
||||||
|
|
||||||
// @pushrocks
|
// @pushrocks
|
||||||
import * as smartbuffer from '@push.rocks/smartbuffer';
|
import * as smartbuffer from '@push.rocks/smartbuffer';
|
||||||
import * as smartfile from '@push.rocks/smartfile';
|
import * as smartfs from '@push.rocks/smartfs';
|
||||||
import * as smartdelay from '@push.rocks/smartdelay';
|
import * as smartdelay from '@push.rocks/smartdelay';
|
||||||
import * as smartpromise from '@push.rocks/smartpromise';
|
import * as smartpromise from '@push.rocks/smartpromise';
|
||||||
import * as smartpath from '@push.rocks/smartpath';
|
import * as smartpath from '@push.rocks/smartpath';
|
||||||
import * as smartpuppeteer from '@push.rocks/smartpuppeteer';
|
import * as smartpuppeteer from '@push.rocks/smartpuppeteer';
|
||||||
import * as smartnetwork from '@push.rocks/smartnetwork';
|
import * as smartnetwork from '@push.rocks/smartnetwork';
|
||||||
|
import * as smartserve from '@push.rocks/smartserve';
|
||||||
import * as smartunique from '@push.rocks/smartunique';
|
import * as smartunique from '@push.rocks/smartunique';
|
||||||
|
import * as smartjimp from '@push.rocks/smartjimp';
|
||||||
|
|
||||||
export {
|
export {
|
||||||
smartbuffer,
|
smartbuffer,
|
||||||
smartfile,
|
smartfs,
|
||||||
smartdelay,
|
smartdelay,
|
||||||
smartpromise,
|
smartpromise,
|
||||||
smartpath,
|
smartpath,
|
||||||
smartpuppeteer,
|
smartpuppeteer,
|
||||||
smartunique,
|
smartunique,
|
||||||
smartnetwork,
|
smartnetwork,
|
||||||
|
smartserve,
|
||||||
|
smartjimp,
|
||||||
};
|
};
|
||||||
|
|
||||||
// tsclass scope
|
// tsclass scope
|
||||||
@@ -31,8 +34,7 @@ import * as tsclass from '@tsclass/tsclass';
|
|||||||
export { tsclass };
|
export { tsclass };
|
||||||
|
|
||||||
// thirdparty
|
// thirdparty
|
||||||
import express from 'express';
|
|
||||||
import pdf2json from 'pdf2json';
|
import pdf2json from 'pdf2json';
|
||||||
import pdfLib from 'pdf-lib';
|
import pdfLib from 'pdf-lib';
|
||||||
|
|
||||||
export { express, pdf2json, pdfLib, };
|
export { pdf2json, pdfLib };
|
||||||
|
|||||||
Reference in New Issue
Block a user