Compare commits

..

6 Commits

Author SHA1 Message Date
5b1615d359 v4.2.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-09 17:05:19 +00:00
c1208b5216 feat(smartpdf): replace internal Express server with @push.rocks/smartserve, add PDF→WebP rendering, improve start/stop handling and bump dependencies 2026-03-09 17:05:19 +00:00
d0c5821f80 v4.1.3
Some checks failed
Default (tags) / security (push) Failing after 2s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-01 19:22:26 +00:00
bd6705ca4a fix(tests): use example.com in image conversion test and relax JPEG size assertion 2026-03-01 19:22:25 +00:00
5bc84ffaa0 v4.1.2
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-01 18:32:03 +00:00
6435d0f042 fix(smartfs): replace smartfile with smartfs, update file reading to use SmartFs, remove GraphicsMagick/Ghostscript dependency checks, bump dev and runtime dependencies, update tests and docs, and adjust npmextra configuration 2026-03-01 18:32:03 +00:00
9 changed files with 2807 additions and 3168 deletions

View File

@@ -1,5 +1,33 @@
# Changelog
## 2026-03-09 - 4.2.0 - feat(smartpdf)
replace internal Express server with @push.rocks/smartserve, add PDF→WebP rendering, improve start/stop handling and bump dependencies
- Replace internal Express HTTP implementation with @push.rocks/smartserve and update README wording to reflect HTTP server usage
- Add PDF→WebP rendering: use pdf.js in-page rendering, capture canvases via Puppeteer to produce WebP buffers; added robust wait/timeout and error handling
- Add start/stop guards: _isRunning flag, reset readiness Deferred on start, and throw if start called while running
- Remove direct http/express exports from plugins and stop exporting express; export smartserve from plugins
- Improve JPEG conversion to produce progressive JPEGs via SmartJimp (sharp mode)
- Bump dependencies/devDependencies: @push.rocks/smartfs to ^1.5.0, add @push.rocks/smartserve ^2.0.1; devDeps @git.zone/tsbuild ^4.3.0, @git.zone/tstest ^3.3.0, @types/node ^25.3.5
## 2026-03-01 - 4.1.3 - fix(tests)
use example.com in image conversion test and relax JPEG size assertion
- Replaced https://www.wikipedia.org with https://example.com in test/test.ts for the third PDF generation test
- Removed the strict expectation that JPEG size must be smaller than PNG; now only asserts that WebP is smaller than PNG
- Updated test comment to note that JPEG may not be smaller for simple graphics pages
## 2026-03-01 - 4.1.2 - fix(smartfs)
replace smartfile with smartfs, update file reading to use SmartFs, remove GraphicsMagick/Ghostscript dependency checks, bump dev and runtime dependencies, update tests and docs, and adjust npmextra configuration
- Replace usage/export of @push.rocks/smartfile with @push.rocks/smartfs and update readFileToPdfObject to use SmartFs + SmartFsProviderNode
- Remove execFile import and the GraphicsMagick/Ghostscript dependency-checking helpers from smartpdf (no more gm/gs checks)
- Bump devDependencies: @git.zone/tsbuild ^4.1.2, @git.zone/tsdoc ^1.12.0, @git.zone/tsrun ^2.0.1, @git.zone/tstest ^3.1.8, @types/node ^25.3.2
- Bump runtime dependencies: @push.rocks/smartfs ^1.3.1, @push.rocks/smartnetwork ^4.4.0, @tsclass/tsclass ^9.3.0, @types/express ^5.0.6, express ^5.2.1, pdf2json ^4.0.2
- Tests updated: switched example URLs to example.com, added a third PDF generation test that writes .nogit/3.pdf, and exported tap.start() as default
- npmextra.json reorganized to namespaced keys, added release.registries and accessLevel, and adjusted tsdoc/legal entries
- Documentation/readme refreshed: added issue reporting/security section, feature table, and various wording/formatting updates
## 2025-08-02 - 4.1.0 - feat(smartpdf)
Add progressive JPEG generation support

View File

@@ -1,9 +1,5 @@
{
"npmci": {
"npmGlobalTools": [],
"npmAccessLevel": "public"
},
"gitzone": {
"@git.zone/cli": {
"projectType": "npm",
"module": {
"githost": "code.foss.global",
@@ -26,9 +22,19 @@
"text extraction",
"PDF management"
]
},
"release": {
"registries": [
"https://verdaccio.lossless.digital",
"https://registry.npmjs.org"
],
"accessLevel": "public"
}
},
"tsdoc": {
"@git.zone/tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
},
"@ship.zone/szci": {
"npmGlobalTools": []
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@push.rocks/smartpdf",
"version": "4.1.1",
"version": "4.2.0",
"private": false,
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
"main": "dist_ts/index.js",
@@ -14,27 +14,26 @@
"buildDocs": "tsdoc"
},
"devDependencies": {
"@git.zone/tsbuild": "^2.6.4",
"@git.zone/tsdoc": "^1.5.0",
"@git.zone/tsrun": "^1.3.3",
"@git.zone/tstest": "^2.3.2",
"@types/node": "^24.1.0"
"@git.zone/tsbuild": "^4.3.0",
"@git.zone/tsdoc": "^1.12.0",
"@git.zone/tsrun": "^2.0.1",
"@git.zone/tstest": "^3.3.0",
"@types/node": "^25.3.5"
},
"dependencies": {
"@push.rocks/smartbuffer": "^3.0.5",
"@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.2.5",
"@push.rocks/smartfs": "^1.5.0",
"@push.rocks/smartjimp": "^1.2.0",
"@push.rocks/smartnetwork": "^4.1.2",
"@push.rocks/smartnetwork": "^4.4.0",
"@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpromise": "^4.2.3",
"@push.rocks/smartpuppeteer": "^2.0.5",
"@push.rocks/smartserve": "^2.0.1",
"@push.rocks/smartunique": "^3.0.9",
"@tsclass/tsclass": "^9.2.0",
"@types/express": "^5.0.3",
"express": "^5.1.0",
"@tsclass/tsclass": "^9.3.0",
"pdf-lib": "^1.17.1",
"pdf2json": "3.2.0"
"pdf2json": "^4.0.2"
},
"files": [
"ts/**/*",

4551
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

634
readme.md
View File

@@ -1,425 +1,423 @@
# @push.rocks/smartpdf 📄✨
> **Transform HTML, websites, and PDFs into beautiful documents with just a few lines of code!**
> **Transform HTML, websites, and PDFs into beautiful documents and images with just a few lines of code.**
[![npm version](https://img.shields.io/npm/v/@push.rocks/smartpdf.svg?style=flat-square)](https://www.npmjs.com/package/@push.rocks/smartpdf)
[![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg?style=flat-square)](https://www.typescriptlang.org/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](./license)
## Issue Reporting and Security
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
## 🚀 Why SmartPDF?
SmartPDF is your Swiss Army knife for PDF operations in Node.js. Whether you're generating invoices, creating reports, or converting web pages to PDFs, we've got you covered with a simple, powerful API.
SmartPDF is your Swiss Army knife for PDF operations in Node.js. Whether you're generating invoices from HTML, snapshotting web pages, merging documents, or converting PDF pages to images — SmartPDF handles it all through a clean, async-first TypeScript API backed by headless Chromium.
### ✨ Features at a Glance
- 📝 **HTML to PDF** - Transform HTML strings with full CSS support
- 🌐 **Website to PDF** - Capture any website as a perfectly formatted PDF
- 🔀 **PDF Merging** - Combine multiple PDFs into one
- 🖼️ **PDF to Images** - Convert PDFs to PNG, WebP, or progressive JPEG
- 📑 **Text Extraction** - Pull text content from existing PDFs
- 🎯 **Smart Port Management** - Automatic port allocation for concurrent instances
- 💪 **TypeScript First** - Full type safety and IntelliSense support
-**High Performance** - Optimized for speed and reliability
| Feature | Description |
|---------|-------------|
| 📝 **HTML → PDF** | Render any HTML string (with full CSS) into an A4-sized PDF |
| 🌐 **Website → PDF** | Capture a live URL as a PDF — either A4 or full-page scroll |
| 🔀 **PDF Merging** | Combine multiple PDF buffers into a single document |
| 🖼️ **PDF → Images** | Convert PDF pages to **PNG**, **WebP**, or progressive **JPEG** |
| 📑 **Text Extraction** | Pull raw text content from any PDF buffer |
| 🔌 **Smart Port Management** | Automatic port allocation so multiple instances never collide |
| 🎛️ **DPI Control** | Built-in scale constants for screen, high-quality, and print resolutions |
| 🌐 **BYO Browser** | Optionally pass your own Puppeteer `Browser` instance |
## 📦 Installation
```bash
# Using npm
npm install @push.rocks/smartpdf --save
# Using yarn
yarn add @push.rocks/smartpdf
# Using pnpm (recommended)
pnpm add @push.rocks/smartpdf
```
> **Prerequisites:** SmartPDF uses headless Chromium via Puppeteer under the hood. On most systems this is handled automatically. If you run into browser-launch issues (CI, Docker, etc.), make sure the required system libraries are installed — see the [Puppeteer troubleshooting guide](https://pptr.dev/troubleshooting).
## 🎯 Quick Start
```typescript
import { SmartPdf } from '@push.rocks/smartpdf';
import * as fs from 'fs';
// Create and start SmartPdf
// 1. Create and start
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Generate a PDF from HTML
// 2. Generate a PDF from HTML
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
<h1>Hello, PDF World! 🌍</h1>
<p>This is my first SmartPDF document.</p>
<p>Generated with SmartPDF.</p>
`);
// Save it
await fs.writeFile('my-first-pdf.pdf', pdf.buffer);
// 3. Write to disk
fs.writeFileSync('my-first.pdf', pdf.buffer);
// Don't forget to clean up!
// 4. Clean up
await smartPdf.stop();
```
## 📚 Core Concepts
Every method returns an `IPdf` object:
### 🏗️ Instance Management
```typescript
interface IPdf {
id: string | null; // Unique identifier
name: string; // Filename
buffer: Buffer; // Raw PDF bytes
metadata?: {
textExtraction?: string; // Extracted text (when available)
};
}
```
SmartPDF uses a client-server architecture for maximum performance. Always remember:
## 📚 How It Works
1. **Create** an instance
2. **Start** the server
3. **Do your PDF magic**
4. **Stop** the server
SmartPDF spins up a lightweight HTTP server (via `@push.rocks/smartserve`) bound to `localhost` and a headless Chromium browser. When you call a generation method:
1. Your HTML is registered internally and served at `http://localhost:{port}/{id}`
2. Puppeteer navigates to that URL, waits for the page to fully render, and captures a PDF
3. A header-based security check ensures only the correct content is captured
4. The server and browser are torn down when you call `stop()`
This architecture means you get **pixel-perfect CSS rendering**, **web font support**, and **full JavaScript execution** — the same rendering engine that powers Chrome.
## 🏗️ Instance Management
```typescript
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// ... your PDF operations ...
// ... your operations ...
await smartPdf.stop();
```
For production use, wrap in try/finally:
```typescript
const smartPdf = await SmartPdf.create();
try {
await smartPdf.start();
// ... generate PDFs ...
} finally {
await smartPdf.stop();
}
```
### 🔌 Smart Port Allocation
Run multiple instances without port conflicts:
Run multiple instances without conflicts:
```typescript
// Each instance automatically finds a free port
const instance1 = await SmartPdf.create(); // Port: 20000
const instance2 = await SmartPdf.create(); // Port: 20001
const instance3 = await SmartPdf.create(); // Port: 20002
// Each instance auto-selects a free port (default range: 2000030000)
const instance1 = new SmartPdf();
const instance2 = new SmartPdf();
await instance1.start(); // e.g. port 20000
await instance2.start(); // e.g. port 20001
// Or specify custom settings
const customInstance = await SmartPdf.create({
port: 3000, // Use specific port
portRangeStart: 4000, // Or define a range
portRangeEnd: 5000
});
console.log(instance1.serverPort); // 20000
console.log(instance2.serverPort); // 20001
// Custom range
const custom = new SmartPdf({ portRangeStart: 4000, portRangeEnd: 5000 });
// Or pin a specific port
const pinned = new SmartPdf({ port: 3000 });
```
## 🎨 PDF Generation
If a specific port is already in use, `start()` throws an error immediately instead of silently failing.
### 📝 From HTML String
### 🌐 Bring Your Own Browser
Create beautiful PDFs from HTML with full CSS support:
```typescript
const smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
<!DOCTYPE html>
<html>
<head>
<style>
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
body {
font-family: 'Roboto', sans-serif;
margin: 40px;
color: #333;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px;
border-radius: 10px;
text-align: center;
}
.content {
margin-top: 30px;
line-height: 1.6;
}
.highlight {
background-color: #ffd93d;
padding: 2px 6px;
border-radius: 3px;
}
</style>
</head>
<body>
<div class="header">
<h1>Invoice #2024-001</h1>
<p>Generated on ${new Date().toLocaleDateString()}</p>
</div>
<div class="content">
<h2>Bill To:</h2>
<p>Acme Corporation</p>
<p>Total: <span class="highlight">$1,234.56</span></p>
</div>
</body>
</html>
`);
await fs.writeFile('invoice.pdf', pdf.buffer);
await smartPdf.stop();
```
### 🌐 From Website
Capture any website as a PDF with two powerful methods:
#### Standard A4 Format
Perfect for articles and documents:
```typescript
const pdf = await smartPdf.getPdfResultForWebsite('https://example.com');
```
#### Full Page Capture
Capture the entire scrollable area:
```typescript
const fullPagePdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
```
### 🔀 Merge Multiple PDFs
Combine PDFs like a pro:
```typescript
// Load your PDFs
const invoice = await smartPdf.readFileToPdfObject('./invoice.pdf');
const terms = await smartPdf.readFileToPdfObject('./terms.pdf');
const contract = await smartPdf.getA4PdfResultForHtmlString('<h1>Contract</h1>...');
// Merge them in order
const mergedPdf = await smartPdf.mergePdfs([
contract.buffer,
invoice.buffer,
terms.buffer
]);
await fs.writeFile('complete-document.pdf', mergedPdf);
```
## 🖼️ Image Generation
### 🎨 Convert PDF to Images
SmartPDF supports three image formats, each with its own strengths:
#### PNG - Crystal Clear Quality
```typescript
const pngImages = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
scale: SmartPdf.SCALE_HIGH // 216 DPI - perfect for most uses
});
// Save each page
pngImages.forEach((png, index) => {
fs.writeFileSync(`page-${index + 1}.png`, png);
});
```
#### WebP - Modern & Efficient
```typescript
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
quality: 90, // 0-100 quality scale
scale: 2.0 // 144 DPI - great for web
});
```
#### JPEG - Progressive Loading
```typescript
const jpegImages = await smartPdf.convertPDFToJpegBytes(pdf.buffer, {
quality: 85, // Balance between size and quality
scale: SmartPdf.SCALE_SCREEN, // 144 DPI
maxWidth: 1920 // Constrain dimensions
});
```
### 📏 DPI & Scale Guide
SmartPDF makes it easy to get the right resolution:
```typescript
// Built-in scale constants
SmartPdf.SCALE_SCREEN // 2.0 = ~144 DPI (web display)
SmartPdf.SCALE_HIGH // 3.0 = ~216 DPI (high quality, default)
SmartPdf.SCALE_PRINT // 6.0 = ~432 DPI (print quality)
// Or calculate your own
const scale = SmartPdf.getScaleForDPI(300); // Get scale for 300 DPI
```
### 🖼️ Thumbnail Generation
Create perfect thumbnails for document previews:
```typescript
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 0.5, // Small but readable
quality: 70, // Lower quality for tiny files
maxWidth: 200, // Constrain to thumbnail size
maxHeight: 200
});
```
## 📊 Format Comparison
Choose the right format for your needs:
| Format | File Size | Best For | Special Features |
|--------|-----------|----------|------------------|
| **PNG** | Largest | Screenshots, diagrams, text | Lossless, transparency |
| **JPEG** | 30-50% of PNG | Photos, complex images | Progressive loading |
| **WebP** | 25-40% of PNG | Modern web apps | Best compression |
## 🛡️ Best Practices
### 1. Always Use Try-Finally
```typescript
let smartPdf: SmartPdf;
try {
smartPdf = await SmartPdf.create();
await smartPdf.start();
// Your PDF operations
} finally {
if (smartPdf) {
await smartPdf.stop(); // Always cleanup!
}
}
```
### 2. Optimize HTML for PDFs
```typescript
const optimizedHtml = `
<style>
/* Use print-friendly styles */
@media print {
.no-print { display: none; }
}
/* Avoid page breaks in wrong places */
h1, h2, h3 { page-break-after: avoid; }
table { page-break-inside: avoid; }
</style>
${yourContent}
`;
```
### 3. Handle Large Documents
For documents with many pages:
```typescript
// Process in batches
const pages = await smartPdf.convertPDFToPngBytes(largePdf.buffer);
for (let i = 0; i < pages.length; i += 10) {
const batch = pages.slice(i, i + 10);
await processBatch(batch);
}
```
## 🎯 Advanced Usage
### 🌐 Custom Browser Instance
Bring your own Puppeteer instance:
Pass an existing Puppeteer `Browser` instance — SmartPDF won't close it when you call `stop()`:
```typescript
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-dev-shm-usage']
args: ['--no-sandbox'],
});
const smartPdf = await SmartPdf.create();
await smartPdf.start(browser);
await smartPdf.start(browser); // uses your browser
// SmartPdf won't close your browser
await smartPdf.stop();
await browser.close(); // You manage it
await smartPdf.stop(); // server stops, browser stays open
await browser.close(); // you manage browser lifecycle
```
### ⚡ Parallel Processing
## 🎨 PDF Generation
Process multiple PDFs concurrently:
### 📝 HTML → A4 PDF
Renders at a 794×1122 viewport (A4 at 96 DPI) with full CSS support:
```typescript
const urls = ['https://example1.com', 'https://example2.com', 'https://example3.com'];
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
<style>
body { font-family: 'Helvetica', sans-serif; margin: 40px; }
.header {
background: linear-gradient(135deg, #667eea, #764ba2);
color: white; padding: 30px; border-radius: 10px; text-align: center;
}
table { width: 100%; border-collapse: collapse; margin-top: 20px; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background: #f5f5f5; }
</style>
const pdfs = await Promise.all(
urls.map(url => smartPdf.getFullWebsiteAsSinglePdf(url))
);
<div class="header">
<h1>Invoice #2024-001</h1>
</div>
// Or with multiple instances for maximum performance
<table>
<tr><th>Item</th><th>Qty</th><th>Price</th></tr>
<tr><td>Widget Pro</td><td>5</td><td>$49.99</td></tr>
<tr><td>Gizmo Ultra</td><td>2</td><td>$129.99</td></tr>
</table>
`);
fs.writeFileSync('invoice.pdf', pdf.buffer);
```
### 🌐 Website → PDF
Two methods depending on your needs:
```typescript
// Standard capture — uses the document's own dimensions
const pdf = await smartPdf.getPdfResultForWebsite('https://example.com');
// Full-page capture — scrolls to bottom, captures everything as a single page
const fullPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
```
`getPdfResultForWebsite` uses a 1980×1200 viewport and respects the page's own width/height. `getFullWebsiteAsSinglePdf` uses a 1920px-wide viewport and measures the full scroll height, producing a single tall page.
### 🔀 Merge Multiple PDFs
Combine any number of PDF buffers into one document using `pdf-lib`:
```typescript
const invoice = await smartPdf.readFileToPdfObject('./invoice.pdf');
const terms = await smartPdf.readFileToPdfObject('./terms.pdf');
const appendix = await smartPdf.getA4PdfResultForHtmlString('<h1>Appendix</h1>...');
const merged = await smartPdf.mergePdfs([
invoice.buffer,
terms.buffer,
appendix.buffer,
]);
fs.writeFileSync('complete-package.pdf', merged);
```
### 📑 Read a PDF from Disk
```typescript
const pdfObject = await smartPdf.readFileToPdfObject('./document.pdf');
console.log(pdfObject.name); // "document.pdf"
console.log(pdfObject.buffer); // <Buffer ...>
```
### 📖 Extract Text
Pull raw text from any PDF buffer:
```typescript
const text = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
console.log(text);
```
> Uses [pdf2json](https://github.com/modesty/pdf2json) under the hood. Works best with text-based PDFs; scanned documents may return limited results.
## 🖼️ PDF → Image Conversion
Convert PDF pages to raster images using Puppeteer + PDF.js. Each page becomes a separate image buffer.
### PNG — Lossless Quality
```typescript
const pngPages = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
scale: SmartPdf.SCALE_HIGH, // 3.0 = ~216 DPI (default)
});
pngPages.forEach((png, i) => {
fs.writeFileSync(`page-${i + 1}.png`, Buffer.from(png));
});
```
### WebP — Modern & Efficient
2560% smaller than PNG at similar visual quality:
```typescript
const webpPages = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: 2.0, // ~144 DPI
quality: 90, // 0100 (default: 85)
});
```
### JPEG — Progressive Loading
Generates true progressive JPEGs (multi-pass rendering) via sharp:
```typescript
const jpegPages = await smartPdf.convertPDFToJpegBytes(pdf.buffer, {
scale: SmartPdf.SCALE_HIGH,
quality: 85, // 0100 (default: 85)
maxWidth: 1920, // optional dimension constraints
maxHeight: 1080,
});
```
### 📏 DPI & Scale Reference
All image methods accept a `scale` parameter. PDF.js renders at 72 DPI by default, so `scale` is a multiplier:
| Constant | Value | DPI | Use Case |
|----------|-------|-----|----------|
| `SmartPdf.SCALE_SCREEN` | 2.0 | ~144 | Web display, thumbnails |
| `SmartPdf.SCALE_HIGH` | 3.0 | ~216 | General purpose (default) |
| `SmartPdf.SCALE_PRINT` | 6.0 | ~432 | Print-quality output |
Or calculate a custom scale:
```typescript
const scale = SmartPdf.getScaleForDPI(300); // → 4.167
```
### 🖼️ Dimension Constraints
All image methods support `maxWidth` and `maxHeight` to cap output size while preserving aspect ratio:
```typescript
// High-res render, but capped at 800×1000 px
const constrained = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
scale: SmartPdf.SCALE_HIGH,
quality: 90,
maxWidth: 800,
maxHeight: 1000,
});
```
### 📊 Format Comparison
| Format | Typical Size vs PNG | Lossy? | Transparency | Progressive | Best For |
|--------|-------------------|--------|--------------|-------------|----------|
| **PNG** | baseline | No | ✅ | — | Screenshots, diagrams, text-heavy docs |
| **WebP** | 4075% | Yes | ✅ | — | Modern web apps, thumbnails |
| **JPEG** | 5070% | Yes | ❌ | ✅ | Photos, complex graphics, email |
## ⚡ Parallel Processing
Process multiple URLs concurrently with separate instances:
```typescript
const urls = [
'https://example.com/page1',
'https://example.com/page2',
'https://example.com/page3',
];
// Spin up parallel instances
const instances = await Promise.all(
Array(3).fill(null).map(() => SmartPdf.create())
urls.map(() => SmartPdf.create())
);
await Promise.all(instances.map(i => i.start()));
// Process in parallel across instances
const results = await Promise.all(
urls.map((url, i) => instances[i % instances.length].getFullWebsiteAsSinglePdf(url))
// Generate in parallel
const pdfs = await Promise.all(
urls.map((url, i) => instances[i].getFullWebsiteAsSinglePdf(url))
);
// Cleanup all instances
// Merge all results
const merged = await instances[0].mergePdfs(pdfs.map(p => p.buffer));
fs.writeFileSync('all-pages.pdf', merged);
// Clean up
await Promise.all(instances.map(i => i.stop()));
```
## 📝 API Reference
## 📝 Full API Reference
### Class: SmartPdf
### `SmartPdf` Class
#### Static Properties
| Property | Type | Value | Description |
|----------|------|-------|-------------|
| `SCALE_SCREEN` | `number` | `2.0` | ~144 DPI scale factor |
| `SCALE_HIGH` | `number` | `3.0` | ~216 DPI scale factor (default) |
| `SCALE_PRINT` | `number` | `6.0` | ~432 DPI scale factor |
#### Static Methods
- `create(options?: ISmartPdfOptions)` - Create a new SmartPdf instance
- `getScaleForDPI(dpi: number)` - Calculate scale factor for desired DPI
| Method | Returns | Description |
|--------|---------|-------------|
| `create(options?)` | `Promise<SmartPdf>` | Factory method to create an instance |
| `getScaleForDPI(dpi)` | `number` | Converts a DPI value to a scale factor (`dpi / 72`) |
#### Instance Properties
| Property | Type | Description |
|----------|------|-------------|
| `serverPort` | `number` | The port the internal HTTP server is listening on |
#### Instance Methods
- `start(browser?: Browser)` - Start the PDF server
- `stop()` - Stop the PDF server
- `getA4PdfResultForHtmlString(html: string)` - Generate A4 PDF from HTML
- `getPdfResultForWebsite(url: string)` - Generate A4 PDF from website
- `getFullWebsiteAsSinglePdf(url: string)` - Capture full webpage as PDF
- `mergePdfs(buffers: Uint8Array[])` - Merge multiple PDFs
- `readFileToPdfObject(path: string)` - Read PDF file from disk
- `extractTextFromPdfBuffer(buffer: Buffer)` - Extract text from PDF
- `convertPDFToPngBytes(buffer: Uint8Array, options?)` - Convert to PNG
- `convertPDFToWebpBytes(buffer: Uint8Array, options?)` - Convert to WebP
- `convertPDFToJpegBytes(buffer: Uint8Array, options?)` - Convert to JPEG
### Interface: IPdf
| Method | Returns | Description |
|--------|---------|-------------|
| `start(browser?)` | `Promise<void>` | Starts internal server + browser. Optionally accepts an existing Puppeteer `Browser`. |
| `stop()` | `Promise<void>` | Shuts down server and browser (unless external browser was provided). |
| `getA4PdfResultForHtmlString(html)` | `Promise<IPdf>` | Renders HTML at 794×1122 viewport → A4 PDF |
| `getPdfResultForWebsite(url)` | `Promise<IPdf>` | Captures website at 1980×1200 viewport → PDF |
| `getFullWebsiteAsSinglePdf(url)` | `Promise<IPdf>` | Captures full scrollable page at 1920px wide → single-page PDF |
| `mergePdfs(buffers)` | `Promise<Uint8Array>` | Merges an array of PDF `Uint8Array` buffers |
| `readFileToPdfObject(path)` | `Promise<IPdf>` | Reads a PDF file from disk into an `IPdf` object |
| `extractTextFromPdfBuffer(buffer)` | `Promise<string>` | Extracts raw text from a PDF buffer |
| `convertPDFToPngBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a PNG buffer |
| `convertPDFToWebpBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a WebP buffer |
| `convertPDFToJpegBytes(buffer, opts?)` | `Promise<Uint8Array[]>` | Converts each PDF page to a progressive JPEG buffer |
#### Image Conversion Options
```typescript
interface IPdf {
name: string; // Filename
buffer: Buffer; // PDF content
id: string | null; // Unique identifier
metadata?: {
textExtraction?: string; // Extracted text
};
{
scale?: number; // DPI multiplier (default: 3.0)
quality?: number; // 0100, WebP/JPEG only (default: 85)
maxWidth?: number; // Max output width in pixels
maxHeight?: number; // Max output height in pixels
}
```
## 🤝 Contributing
### `ISmartPdfOptions` Interface
We love contributions! Please feel free to submit a Pull Request.
```typescript
{
port?: number; // Use a specific port
portRangeStart?: number; // Auto-allocation range start (default: 20000)
portRangeEnd?: number; // Auto-allocation range end (default: 30000)
}
```
## License and Legal Information
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository.
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
### Trademarks
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH or third parties, and are not included within the scope of the MIT license granted herein.
Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines or the guidelines of the respective third-party owners, and any usage must be approved in writing. Third-party trademarks used herein are the property of their respective owners and used only in a descriptive manner, e.g. for an implementation of an API or similar.
### Company Information
Task Venture Capital GmbH
Registered at District court Bremen HRB 35230 HB, Germany
Registered at District Court Bremen HRB 35230 HB, Germany
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
For any legal inquiries or further information, please contact us via email at hello@task.vc.
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.

View File

@@ -39,8 +39,8 @@ tap.test('should create PDFs from HTML string', async () => {
});
tap.test('should create PDFs from websites', async () => {
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://www.wikipedia.org');
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://www.wikipedia.org');
const pdfA4 = await testSmartPdf.getPdfResultForWebsite('https://example.com');
const pdfSingle = await testSmartPdf.getFullWebsiteAsSinglePdf('https://example.com');
expect(pdfA4.buffer).toBeInstanceOf(Buffer);
expect(pdfSingle.buffer).toBeInstanceOf(Buffer);
});
@@ -82,6 +82,13 @@ tap.test('should store PNG results from both conversion functions in .nogit/test
});
});
tap.test('should create a third PDF for image conversion tests', async () => {
const pdfResult = await testSmartPdf.getFullWebsiteAsSinglePdf('https://example.com');
expect(pdfResult.buffer).toBeInstanceOf(Buffer);
ensureDir('.nogit');
fs.writeFileSync(path.join('.nogit', '3.pdf'), pdfResult.buffer as Buffer);
});
tap.test('should create WebP preview images from PDF', async () => {
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
const webpPreviews = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer);
@@ -276,8 +283,7 @@ tap.test('should compare file sizes between PNG, WebP, and JPEG', async () => {
console.log(`WebP: ${totalWebpSize} bytes (${totalWebpReduction}% reduction)`);
console.log(`JPEG: ${totalJpegSize} bytes (${totalJpegReduction}% reduction)`);
// JPEG and WebP should both be smaller than PNG
expect(totalJpegSize).toBeLessThan(totalPngSize);
// WebP should be smaller than PNG; JPEG may not be for simple graphics pages
expect(totalWebpSize).toBeLessThan(totalPngSize);
});
@@ -285,4 +291,4 @@ tap.test('should close the SmartPdf instance properly', async () => {
await testSmartPdf.stop();
});
tap.start();
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartpdf',
version: '3.2.2',
version: '4.2.0',
description: 'A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.'
}

View File

@@ -1,10 +1,7 @@
import * as plugins from './smartpdf.plugins.js';
import * as paths from './smartpdf.paths.js';
import { Server } from 'http';
import { PdfCandidate } from './smartpdf.classes.pdfcandidate.js';
import { type IPdf } from '@tsclass/tsclass/dist_ts/business/pdf.js';
import { execFile } from 'child_process';
declare const document: any;
export interface ISmartPdfOptions {
@@ -34,13 +31,14 @@ export class SmartPdf {
}
// INSTANCE
htmlServerInstance: Server;
private smartserveInstance: plugins.smartserve.SmartServe;
serverPort: number;
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser;
externalBrowserBool: boolean = false;
private _readyDeferred: plugins.smartpromise.Deferred<void>;
private _candidates: { [key: string]: PdfCandidate } = {};
private _options: ISmartPdfOptions;
private _isRunning: boolean = false;
constructor(optionsArg?: ISmartPdfOptions) {
this._readyDeferred = new plugins.smartpromise.Deferred();
@@ -52,7 +50,13 @@ export class SmartPdf {
}
async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) {
const done = plugins.smartpromise.defer();
if (this._isRunning) {
throw new Error('SmartPdf is already running. Call stop() before starting again.');
}
// Reset readiness deferred for this start cycle
this._readyDeferred = new plugins.smartpromise.Deferred();
// lets set the external browser in case one is provided
this.headlessBrowser = headlessBrowserArg;
// setup puppeteer
@@ -76,6 +80,7 @@ export class SmartPdf {
// Clean up browser if we created one
if (!this.externalBrowserBool && this.headlessBrowser) {
await this.headlessBrowser.close();
this.headlessBrowser = null;
}
throw new Error(`Requested port ${this._options.port} is already in use`);
}
@@ -89,45 +94,62 @@ export class SmartPdf {
// Clean up browser if we created one
if (!this.externalBrowserBool && this.headlessBrowser) {
await this.headlessBrowser.close();
this.headlessBrowser = null;
}
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
}
}
// Now setup server after we know we have a valid port
const app = plugins.express();
app.get('/:pdfId', (req, res) => {
const wantedCandidate = this._candidates[req.params.pdfId];
if (!wantedCandidate) {
console.log(`${req.url} not attached to a candidate`);
return;
}
res.setHeader('pdf-id', wantedCandidate.pdfId);
res.send(wantedCandidate.htmlString);
// Now setup server using smartserve
this.smartserveInstance = new plugins.smartserve.SmartServe({
port: this.serverPort,
hostname: 'localhost',
});
this.htmlServerInstance = plugins.http.createServer(app);
this.htmlServerInstance.listen(this.serverPort, 'localhost');
this.htmlServerInstance.on('listening', () => {
console.log(`SmartPdf server listening on port ${this.serverPort}`);
this._readyDeferred.resolve();
done.resolve();
this.smartserveInstance.setHandler(async (request) => {
const url = new URL(request.url);
const pdfId = url.pathname.slice(1); // Remove leading /
const candidate = this._candidates[pdfId];
if (!candidate) {
console.log(`${url.pathname} not attached to a candidate`);
return new Response('Not found', { status: 404 });
}
return new Response(candidate.htmlString, {
headers: {
'Content-Type': 'text/html; charset=utf-8',
'pdf-id': candidate.pdfId,
},
});
});
await done.promise;
await this.smartserveInstance.start();
console.log(`SmartPdf server listening on port ${this.serverPort}`);
this._isRunning = true;
this._readyDeferred.resolve();
}
// stop
async stop() {
const done = plugins.smartpromise.defer<void>();
this.htmlServerInstance.close(() => {
done.resolve();
});
if (!this.externalBrowserBool) {
await this.headlessBrowser.close();
if (!this._isRunning) {
return;
}
await done.promise;
this._isRunning = false;
// Close browser first to cleanly terminate keepalive connections
// before the server shuts down (prevents ECONNRESET errors)
if (!this.externalBrowserBool && this.headlessBrowser) {
await this.headlessBrowser.close();
}
this.headlessBrowser = null;
if (this.smartserveInstance) {
await this.smartserveInstance.stop();
this.smartserveInstance = null;
}
// Clear any remaining candidates
this._candidates = {};
}
/**
@@ -137,124 +159,144 @@ export class SmartPdf {
await this._readyDeferred.promise;
const pdfCandidate = new PdfCandidate(htmlStringArg);
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 794,
height: 1122,
});
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
waitUntil: 'networkidle2',
});
const headers = response.headers();
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
return;
} else {
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
}
let page: plugins.smartpuppeteer.puppeteer.Page;
try {
page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 794,
height: 1122,
});
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
waitUntil: 'networkidle2',
});
const headers = response.headers();
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
return;
} else {
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
}
const pdfBuffer = await page.pdf({
width: 794,
height: 1122,
printBackground: true,
displayHeaderFooter: false,
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
delete this._candidates[pdfCandidate.pdfId];
pdfCandidate.doneDeferred.resolve();
await pdfCandidate.doneDeferred.promise;
return {
id: pdfCandidate.pdfId,
name: `${pdfCandidate.pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
const pdfBuffer = await page.pdf({
width: 794,
height: 1122,
printBackground: true,
displayHeaderFooter: false,
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
delete this._candidates[pdfCandidate.pdfId];
pdfCandidate.doneDeferred.resolve();
await pdfCandidate.doneDeferred.promise;
return {
id: pdfCandidate.pdfId,
name: `${pdfCandidate.pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
} catch (err) {
// Clean up candidate on error
delete this._candidates[pdfCandidate.pdfId];
if (page) {
await page.close().catch(() => {});
}
throw err;
}
}
async getPdfResultForWebsite(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 1980,
height: 1200,
});
await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId();
const { documentHeight, documentWidth } = await page.evaluate(() => {
try {
await page.setViewport({
width: 1980,
height: 1200,
});
await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId();
const { documentHeight, documentWidth } = await page.evaluate(() => {
return {
documentHeight: document.height,
documentWidth: document.width,
};
});
const pdfBuffer = await page.pdf({
height: documentHeight,
width: documentWidth,
printBackground: true,
displayHeaderFooter: false,
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
return {
documentHeight: document.height,
documentWidth: document.width,
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
});
const pdfBuffer = await page.pdf({
height: documentHeight,
width: documentWidth,
printBackground: true,
displayHeaderFooter: false,
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
return {
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
} catch (err) {
await page.close().catch(() => {});
throw err;
}
}
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
const page = await this.headlessBrowser.newPage();
await page.setViewport({
width: 1920,
height: 1200,
});
await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId();
// Use both document.body and document.documentElement to ensure we have a valid height and width.
const { documentHeight, documentWidth } = await page.evaluate(() => {
try {
await page.setViewport({
width: 1920,
height: 1200,
});
await page.emulateMediaType('screen');
const response = await page.goto(websiteUrl, { waitUntil: 'networkidle2' });
const pdfId = plugins.smartunique.shortId();
// Use both document.body and document.documentElement to ensure we have a valid height and width.
const { documentHeight, documentWidth } = await page.evaluate(() => {
return {
documentHeight: Math.max(
document.body.scrollHeight,
document.documentElement.scrollHeight
) || 1200,
documentWidth: Math.max(
document.body.clientWidth,
document.documentElement.clientWidth
) || 1920,
};
});
// Update viewport height to the full document height.
await page.setViewport({
width: 1920,
height: documentHeight,
});
const pdfBuffer = await page.pdf({
height: documentHeight,
width: 1920,
printBackground: true,
displayHeaderFooter: false,
scale: 1,
pageRanges: '1',
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
return {
documentHeight: Math.max(
document.body.scrollHeight,
document.documentElement.scrollHeight
) || 1200,
documentWidth: Math.max(
document.body.clientWidth,
document.documentElement.clientWidth
) || 1920,
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
});
// Update viewport height to the full document height.
await page.setViewport({
width: 1920,
height: documentHeight,
});
const pdfBuffer = await page.pdf({
height: documentHeight,
width: 1920,
printBackground: true,
displayHeaderFooter: false,
scale: 1,
pageRanges: '1',
});
// Convert Uint8Array to Node Buffer
const nodePdfBuffer = Buffer.from(pdfBuffer);
await page.close();
return {
id: pdfId,
name: `${pdfId}.js`,
metadata: {
textExtraction: await this.extractTextFromPdfBuffer(nodePdfBuffer),
},
buffer: nodePdfBuffer,
};
} catch (err) {
await page.close().catch(() => {});
throw err;
}
}
public async mergePdfs(inputPdfBuffers: Uint8Array[]): Promise<Uint8Array> {
@@ -272,7 +314,9 @@ export class SmartPdf {
public async readFileToPdfObject(pathArg: string): Promise<plugins.tsclass.business.IPdf> {
const absolutePath = plugins.smartpath.transform.makeAbsolute(pathArg);
const parsedPath = plugins.path.parse(absolutePath);
const buffer = await plugins.smartfile.fs.toBuffer(absolutePath);
const smartfsInstance = new plugins.smartfs.SmartFs(new plugins.smartfs.SmartFsProviderNode());
const fileContent = await smartfsInstance.file(absolutePath).read();
const buffer = Buffer.from(fileContent);
return {
name: parsedPath.base,
buffer,
@@ -299,31 +343,6 @@ export class SmartPdf {
return deferred.promise;
}
/**
* Checks for the presence of required dependencies: GraphicsMagick and Ghostscript.
*/
private async checkDependencies(): Promise<void> {
await Promise.all([
this.checkCommandExists('gm', ['version']),
this.checkCommandExists('gs', ['--version']),
]);
}
/**
* Checks if a given command exists by trying to execute it.
*/
private checkCommandExists(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
execFile(command, args, (error, stdout, stderr) => {
if (error) {
reject(new Error(`Dependency check failed: ${command} is not installed or not in the PATH. ${error.message}`));
} else {
resolve();
}
});
});
}
/**
* Converts a PDF to PNG bytes for each page using Puppeteer and PDF.js.
* This method creates a temporary HTML page that loads PDF.js from a CDN,
@@ -343,89 +362,94 @@ export class SmartPdf {
// Create a new page using the headless browser.
const page = await this.headlessBrowser.newPage();
// Prepare PDF data as a base64 string.
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
try {
// Prepare PDF data as a base64 string.
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
// HTML template that loads PDF.js and renders the PDF.
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to PNG Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
// HTML template that loads PDF.js and renders the PDF.
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to PNG Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
// Replace the placeholder with the actual base64 PDF data.
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Replace the placeholder with the actual base64 PDF data.
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Set the page content.
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Set the page content.
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Wait until the PDF.js rendering is complete.
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Wait until the PDF.js rendering is complete.
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Query all canvas elements (each representing a rendered PDF page).
const canvasElements = await page.$$('canvas');
const pngBuffers: Uint8Array[] = [];
// Query all canvas elements (each representing a rendered PDF page).
const canvasElements = await page.$$('canvas');
const pngBuffers: Uint8Array[] = [];
for (const canvasElement of canvasElements) {
// Screenshot the canvas element. The screenshot will be a PNG buffer.
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
pngBuffers.push(new Uint8Array(screenshotBuffer));
for (const canvasElement of canvasElements) {
// Screenshot the canvas element. The screenshot will be a PNG buffer.
const screenshotBuffer = (await canvasElement.screenshot({ encoding: 'binary' })) as Buffer;
pngBuffers.push(new Uint8Array(screenshotBuffer));
}
await page.close();
return pngBuffers;
} catch (err) {
await page.close().catch(() => {});
throw err;
}
await page.close();
return pngBuffers;
}
/**
@@ -449,94 +473,99 @@ export class SmartPdf {
// Create a new page using the headless browser
const page = await this.headlessBrowser.newPage();
// Prepare PDF data as a base64 string
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
try {
// Prepare PDF data as a base64 string
const base64Pdf: string = Buffer.from(pdfBytes).toString('base64');
// HTML template that loads PDF.js and renders the PDF with scaling
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to WebP Preview Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
// HTML template that loads PDF.js and renders the PDF with scaling
const htmlTemplate: string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>PDF to WebP Preview Converter</title>
<style>
body { margin: 0; }
canvas { display: block; margin: 10px auto; }
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
</head>
<body>
<script>
(async function() {
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
const pdfData = "__PDF_DATA__";
const raw = atob(pdfData);
const pdfArray = new Uint8Array([...raw].map(c => c.charCodeAt(0)));
const loadingTask = pdfjsLib.getDocument({data: pdfArray});
const pdf = await loadingTask.promise;
const numPages = pdf.numPages;
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
// Apply scale factor to viewport
const viewport = page.getViewport({ scale: ${scale} });
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Apply max width/height constraints if specified
let finalScale = ${scale};
${options.maxWidth ? `
if (viewport.width > ${options.maxWidth}) {
finalScale = ${options.maxWidth} / (viewport.width / ${scale});
}` : ''}
${options.maxHeight ? `
if (viewport.height > ${options.maxHeight}) {
const heightScale = ${options.maxHeight} / (viewport.height / ${scale});
finalScale = Math.min(finalScale, heightScale);
}` : ''}
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
// Get final viewport with adjusted scale
const finalViewport = page.getViewport({ scale: finalScale });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.width = finalViewport.width;
canvas.height = finalViewport.height;
canvas.setAttribute('data-page', pageNum);
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
await page.render({ canvasContext: context, viewport: finalViewport }).promise;
document.body.appendChild(canvas);
}
window.renderComplete = true;
})();
</script>
</body>
</html>
`;
// Replace the placeholder with the actual base64 PDF data
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Replace the placeholder with the actual base64 PDF data
const htmlContent: string = htmlTemplate.replace("__PDF_DATA__", base64Pdf);
// Set the page content
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Set the page content
await page.setContent(htmlContent, { waitUntil: 'networkidle0' });
// Wait until the PDF.js rendering is complete
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Wait until the PDF.js rendering is complete
await page.waitForFunction(() => (window as any).renderComplete === true, { timeout: 30000 });
// Query all canvas elements (each representing a rendered PDF page)
const canvasElements = await page.$$('canvas');
const webpBuffers: Uint8Array[] = [];
// Query all canvas elements (each representing a rendered PDF page)
const canvasElements = await page.$$('canvas');
const webpBuffers: Uint8Array[] = [];
for (const canvasElement of canvasElements) {
// Screenshot the canvas element as WebP
const screenshotBuffer = (await canvasElement.screenshot({
type: 'webp',
quality: quality,
encoding: 'binary'
})) as Buffer;
webpBuffers.push(new Uint8Array(screenshotBuffer));
for (const canvasElement of canvasElements) {
// Screenshot the canvas element as WebP
const screenshotBuffer = (await canvasElement.screenshot({
type: 'webp',
quality: quality,
encoding: 'binary'
})) as Buffer;
webpBuffers.push(new Uint8Array(screenshotBuffer));
}
await page.close();
return webpBuffers;
} catch (err) {
await page.close().catch(() => {});
throw err;
}
await page.close();
return webpBuffers;
}
/**
@@ -575,8 +604,6 @@ export class SmartPdf {
{
format: 'jpeg',
progressive: true,
// SmartJimp uses a different quality scale, need to check if adjustment is needed
// For now, pass through the quality value
quality
}
);

View File

@@ -1,29 +1,30 @@
// native
import * as http from 'http';
import * as path from 'path';
export { http, path };
export { path };
// @pushrocks
import * as smartbuffer from '@push.rocks/smartbuffer';
import * as smartfile from '@push.rocks/smartfile';
import * as smartfs from '@push.rocks/smartfs';
import * as smartdelay from '@push.rocks/smartdelay';
import * as smartpromise from '@push.rocks/smartpromise';
import * as smartpath from '@push.rocks/smartpath';
import * as smartpuppeteer from '@push.rocks/smartpuppeteer';
import * as smartnetwork from '@push.rocks/smartnetwork';
import * as smartserve from '@push.rocks/smartserve';
import * as smartunique from '@push.rocks/smartunique';
import * as smartjimp from '@push.rocks/smartjimp';
export {
smartbuffer,
smartfile,
smartfs,
smartdelay,
smartpromise,
smartpath,
smartpuppeteer,
smartunique,
smartnetwork,
smartserve,
smartjimp,
};
@@ -33,8 +34,7 @@ import * as tsclass from '@tsclass/tsclass';
export { tsclass };
// thirdparty
import express from 'express';
import pdf2json from 'pdf2json';
import pdfLib from 'pdf-lib';
export { express, pdf2json, pdfLib, };
export { pdf2json, pdfLib };