Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
be574df599 |
@@ -9,7 +9,7 @@
|
||||
"author": "Lossless GmbH",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"test": "(tstest test/ --verbose --timeout 60)",
|
||||
"test": "(tstest test/ --verbose --timeout 120)",
|
||||
"build": "(tsbuild tsfolders --allowimplicitany)",
|
||||
"buildDocs": "tsdoc"
|
||||
},
|
||||
@@ -24,6 +24,7 @@
|
||||
"@push.rocks/smartbuffer": "^3.0.5",
|
||||
"@push.rocks/smartdelay": "^3.0.5",
|
||||
"@push.rocks/smartfile": "^11.2.5",
|
||||
"@push.rocks/smartjimp": "^1.2.0",
|
||||
"@push.rocks/smartnetwork": "^4.1.2",
|
||||
"@push.rocks/smartpath": "^6.0.0",
|
||||
"@push.rocks/smartpromise": "^4.2.3",
|
||||
|
839
pnpm-lock.yaml
generated
839
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
2
pnpm-workspace.yaml
Normal file
2
pnpm-workspace.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
onlyBuiltDependencies:
|
||||
- sharp
|
741
readme.md
741
readme.md
@@ -1,420 +1,409 @@
|
||||
# @push.rocks/smartpdf
|
||||
Create PDFs on the fly from HTML, websites, or existing PDFs with advanced features like text extraction, PDF merging, and PNG conversion.
|
||||
# @push.rocks/smartpdf 📄✨
|
||||
|
||||
## Install
|
||||
To install `@push.rocks/smartpdf`, use npm or yarn:
|
||||
> **Transform HTML, websites, and PDFs into beautiful documents with just a few lines of code!**
|
||||
|
||||
[](https://www.npmjs.com/package/@push.rocks/smartpdf)
|
||||
[](https://www.typescriptlang.org/)
|
||||
[](./license)
|
||||
|
||||
## 🚀 Why SmartPDF?
|
||||
|
||||
SmartPDF is your Swiss Army knife for PDF operations in Node.js. Whether you're generating invoices, creating reports, or converting web pages to PDFs, we've got you covered with a simple, powerful API.
|
||||
|
||||
### ✨ Features at a Glance
|
||||
|
||||
- 📝 **HTML to PDF** - Transform HTML strings with full CSS support
|
||||
- 🌐 **Website to PDF** - Capture any website as a perfectly formatted PDF
|
||||
- 🔀 **PDF Merging** - Combine multiple PDFs into one
|
||||
- 🖼️ **PDF to Images** - Convert PDFs to PNG, WebP, or progressive JPEG
|
||||
- 📑 **Text Extraction** - Pull text content from existing PDFs
|
||||
- 🎯 **Smart Port Management** - Automatic port allocation for concurrent instances
|
||||
- 💪 **TypeScript First** - Full type safety and IntelliSense support
|
||||
- ⚡ **High Performance** - Optimized for speed and reliability
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
```bash
|
||||
# Using npm
|
||||
npm install @push.rocks/smartpdf --save
|
||||
```
|
||||
|
||||
Or with yarn:
|
||||
|
||||
```bash
|
||||
# Using yarn
|
||||
yarn add @push.rocks/smartpdf
|
||||
|
||||
# Using pnpm (recommended)
|
||||
pnpm add @push.rocks/smartpdf
|
||||
```
|
||||
|
||||
## Requirements
|
||||
This package requires a Chrome or Chromium installation to be available on the system, as it uses Puppeteer for rendering. The package will automatically detect and use the appropriate executable.
|
||||
|
||||
## Usage
|
||||
`@push.rocks/smartpdf` provides a powerful interface for PDF generation and manipulation. All examples use ESM syntax and TypeScript.
|
||||
|
||||
### Getting Started
|
||||
First, import the necessary classes:
|
||||
## 🎯 Quick Start
|
||||
|
||||
```typescript
|
||||
import { SmartPdf, IPdf } from '@push.rocks/smartpdf';
|
||||
import { SmartPdf } from '@push.rocks/smartpdf';
|
||||
|
||||
// Create and start SmartPdf
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
// Generate a PDF from HTML
|
||||
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
|
||||
<h1>Hello, PDF World! 🌍</h1>
|
||||
<p>This is my first SmartPDF document.</p>
|
||||
`);
|
||||
|
||||
// Save it
|
||||
await fs.writeFile('my-first-pdf.pdf', pdf.buffer);
|
||||
|
||||
// Don't forget to clean up!
|
||||
await smartPdf.stop();
|
||||
```
|
||||
|
||||
### Basic Setup with Automatic Port Allocation
|
||||
SmartPdf automatically finds an available port between 20000-30000 for its internal server:
|
||||
## 📚 Core Concepts
|
||||
|
||||
### 🏗️ Instance Management
|
||||
|
||||
SmartPDF uses a client-server architecture for maximum performance. Always remember:
|
||||
|
||||
1. **Create** an instance
|
||||
2. **Start** the server
|
||||
3. **Do your PDF magic**
|
||||
4. **Stop** the server
|
||||
|
||||
```typescript
|
||||
async function setupSmartPdf() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
// Your PDF operations here
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
// ... your PDF operations ...
|
||||
await smartPdf.stop();
|
||||
```
|
||||
|
||||
### Advanced Setup with Custom Port Configuration
|
||||
You can specify custom port settings to avoid conflicts or meet specific requirements:
|
||||
### 🔌 Smart Port Allocation
|
||||
|
||||
Run multiple instances without port conflicts:
|
||||
|
||||
```typescript
|
||||
// Use a specific port
|
||||
const smartPdf = await SmartPdf.create({ port: 3000 });
|
||||
// Each instance automatically finds a free port
|
||||
const instance1 = await SmartPdf.create(); // Port: 20000
|
||||
const instance2 = await SmartPdf.create(); // Port: 20001
|
||||
const instance3 = await SmartPdf.create(); // Port: 20002
|
||||
|
||||
// Use a custom port range
|
||||
const smartPdf = await SmartPdf.create({
|
||||
portRangeStart: 4000,
|
||||
portRangeEnd: 5000
|
||||
// Or specify custom settings
|
||||
const customInstance = await SmartPdf.create({
|
||||
port: 3000, // Use specific port
|
||||
portRangeStart: 4000, // Or define a range
|
||||
portRangeEnd: 5000
|
||||
});
|
||||
```
|
||||
|
||||
## 🎨 PDF Generation
|
||||
|
||||
### 📝 From HTML String
|
||||
|
||||
Create beautiful PDFs from HTML with full CSS support:
|
||||
|
||||
```typescript
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf = await smartPdf.getA4PdfResultForHtmlString(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
|
||||
|
||||
body {
|
||||
font-family: 'Roboto', sans-serif;
|
||||
margin: 40px;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.header {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
padding: 30px;
|
||||
border-radius: 10px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.content {
|
||||
margin-top: 30px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.highlight {
|
||||
background-color: #ffd93d;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>Invoice #2024-001</h1>
|
||||
<p>Generated on ${new Date().toLocaleDateString()}</p>
|
||||
</div>
|
||||
<div class="content">
|
||||
<h2>Bill To:</h2>
|
||||
<p>Acme Corporation</p>
|
||||
<p>Total: <span class="highlight">$1,234.56</span></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`);
|
||||
|
||||
await fs.writeFile('invoice.pdf', pdf.buffer);
|
||||
await smartPdf.stop();
|
||||
```
|
||||
|
||||
### 🌐 From Website
|
||||
|
||||
Capture any website as a PDF with two powerful methods:
|
||||
|
||||
#### Standard A4 Format
|
||||
Perfect for articles and documents:
|
||||
|
||||
```typescript
|
||||
const pdf = await smartPdf.getPdfResultForWebsite('https://example.com');
|
||||
```
|
||||
|
||||
#### Full Page Capture
|
||||
Capture the entire scrollable area:
|
||||
|
||||
```typescript
|
||||
const fullPagePdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
||||
```
|
||||
|
||||
### 🔀 Merge Multiple PDFs
|
||||
|
||||
Combine PDFs like a pro:
|
||||
|
||||
```typescript
|
||||
// Load your PDFs
|
||||
const invoice = await smartPdf.readFileToPdfObject('./invoice.pdf');
|
||||
const terms = await smartPdf.readFileToPdfObject('./terms.pdf');
|
||||
const contract = await smartPdf.getA4PdfResultForHtmlString('<h1>Contract</h1>...');
|
||||
|
||||
// Merge them in order
|
||||
const mergedPdf = await smartPdf.mergePdfs([
|
||||
contract.buffer,
|
||||
invoice.buffer,
|
||||
terms.buffer
|
||||
]);
|
||||
|
||||
await fs.writeFile('complete-document.pdf', mergedPdf);
|
||||
```
|
||||
|
||||
## 🖼️ Image Generation
|
||||
|
||||
### 🎨 Convert PDF to Images
|
||||
|
||||
SmartPDF supports three image formats, each with its own strengths:
|
||||
|
||||
#### PNG - Crystal Clear Quality
|
||||
|
||||
```typescript
|
||||
const pngImages = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
|
||||
scale: SmartPdf.SCALE_HIGH // 216 DPI - perfect for most uses
|
||||
});
|
||||
|
||||
// The server will find an available port in your specified range
|
||||
await smartPdf.start();
|
||||
console.log(`Server running on port: ${smartPdf.serverPort}`);
|
||||
// Save each page
|
||||
pngImages.forEach((png, index) => {
|
||||
fs.writeFileSync(`page-${index + 1}.png`, png);
|
||||
});
|
||||
```
|
||||
|
||||
### Creating PDFs from HTML Strings
|
||||
Generate PDFs from HTML content with full CSS support:
|
||||
#### WebP - Modern & Efficient
|
||||
|
||||
```typescript
|
||||
async function createPdfFromHtml() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||
quality: 90, // 0-100 quality scale
|
||||
scale: 2.0 // 144 DPI - great for web
|
||||
});
|
||||
```
|
||||
|
||||
#### JPEG - Progressive Loading
|
||||
|
||||
```typescript
|
||||
const jpegImages = await smartPdf.convertPDFToJpegBytes(pdf.buffer, {
|
||||
quality: 85, // Balance between size and quality
|
||||
scale: SmartPdf.SCALE_SCREEN, // 144 DPI
|
||||
maxWidth: 1920 // Constrain dimensions
|
||||
});
|
||||
```
|
||||
|
||||
### 📏 DPI & Scale Guide
|
||||
|
||||
SmartPDF makes it easy to get the right resolution:
|
||||
|
||||
```typescript
|
||||
// Built-in scale constants
|
||||
SmartPdf.SCALE_SCREEN // 2.0 = ~144 DPI (web display)
|
||||
SmartPdf.SCALE_HIGH // 3.0 = ~216 DPI (high quality, default)
|
||||
SmartPdf.SCALE_PRINT // 6.0 = ~432 DPI (print quality)
|
||||
|
||||
// Or calculate your own
|
||||
const scale = SmartPdf.getScaleForDPI(300); // Get scale for 300 DPI
|
||||
```
|
||||
|
||||
### 🖼️ Thumbnail Generation
|
||||
|
||||
Create perfect thumbnails for document previews:
|
||||
|
||||
```typescript
|
||||
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||
scale: 0.5, // Small but readable
|
||||
quality: 70, // Lower quality for tiny files
|
||||
maxWidth: 200, // Constrain to thumbnail size
|
||||
maxHeight: 200
|
||||
});
|
||||
```
|
||||
|
||||
## 📊 Format Comparison
|
||||
|
||||
Choose the right format for your needs:
|
||||
|
||||
| Format | File Size | Best For | Special Features |
|
||||
|--------|-----------|----------|------------------|
|
||||
| **PNG** | Largest | Screenshots, diagrams, text | Lossless, transparency |
|
||||
| **JPEG** | 30-50% of PNG | Photos, complex images | Progressive loading |
|
||||
| **WebP** | 25-40% of PNG | Modern web apps | Best compression |
|
||||
|
||||
## 🛡️ Best Practices
|
||||
|
||||
### 1. Always Use Try-Finally
|
||||
|
||||
```typescript
|
||||
let smartPdf: SmartPdf;
|
||||
|
||||
try {
|
||||
smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const htmlString = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 40px; }
|
||||
h1 { color: #333; }
|
||||
.highlight { background-color: yellow; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Professional PDF Document</h1>
|
||||
<p>This PDF was generated from <span class="highlight">HTML content</span>.</p>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
// Your PDF operations
|
||||
|
||||
const pdf: IPdf = await smartPdf.getA4PdfResultForHtmlString(htmlString);
|
||||
|
||||
// pdf.buffer contains the PDF data
|
||||
// pdf.id contains a unique identifier
|
||||
// pdf.name contains the filename
|
||||
// pdf.metadata contains additional information like extracted text
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
### Generating PDFs from Websites
|
||||
Capture web pages as PDFs with two different approaches:
|
||||
|
||||
#### A4 Format PDF from Website
|
||||
Captures the viewable area formatted for A4 paper:
|
||||
|
||||
```typescript
|
||||
async function createA4PdfFromWebsite() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf: IPdf = await smartPdf.getPdfResultForWebsite('https://example.com');
|
||||
|
||||
// Save to file
|
||||
await fs.writeFile('website-a4.pdf', pdf.buffer);
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
#### Full Webpage as Single PDF
|
||||
Captures the entire webpage in a single PDF, regardless of length:
|
||||
|
||||
```typescript
|
||||
async function createFullPdfFromWebsite() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf: IPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
|
||||
|
||||
// This captures the entire scrollable area
|
||||
await fs.writeFile('website-full.pdf', pdf.buffer);
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
### Merging Multiple PDFs
|
||||
Combine multiple PDF files into a single document:
|
||||
|
||||
```typescript
|
||||
async function mergePdfs() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
// Create or load your PDFs
|
||||
const pdf1 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 1</h1>');
|
||||
const pdf2 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 2</h1>');
|
||||
const pdf3 = await smartPdf.readFileToPdfObject('./existing-document.pdf');
|
||||
|
||||
// Merge PDFs - order matters!
|
||||
const mergedPdf: Uint8Array = await smartPdf.mergePdfs([
|
||||
pdf1.buffer,
|
||||
pdf2.buffer,
|
||||
pdf3.buffer
|
||||
]);
|
||||
|
||||
// Save the merged PDF
|
||||
await fs.writeFile('merged-document.pdf', mergedPdf);
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
### Reading PDFs and Extracting Text
|
||||
Extract text content from existing PDFs:
|
||||
|
||||
```typescript
|
||||
async function extractTextFromPdf() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
|
||||
// Read PDF from disk
|
||||
const pdf: IPdf = await smartPdf.readFileToPdfObject('/path/to/document.pdf');
|
||||
|
||||
// Extract all text
|
||||
const extractedText = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
|
||||
console.log('Extracted text:', extractedText);
|
||||
|
||||
// The pdf object also contains metadata with text extraction
|
||||
console.log('Metadata:', pdf.metadata);
|
||||
}
|
||||
```
|
||||
|
||||
### Converting PDF to PNG Images
|
||||
Convert each page of a PDF into PNG images with configurable quality:
|
||||
|
||||
```typescript
|
||||
async function convertPdfToPng() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
// Load a PDF
|
||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
||||
|
||||
// Convert to PNG images with default high quality (216 DPI)
|
||||
const pngImages: Uint8Array[] = await smartPdf.convertPDFToPngBytes(pdf.buffer);
|
||||
|
||||
// Or specify custom scale/DPI
|
||||
const highResPngs = await smartPdf.convertPDFToPngBytes(pdf.buffer, {
|
||||
scale: SmartPdf.SCALE_PRINT, // 6.0 scale = ~432 DPI
|
||||
maxWidth: 3000, // Optional: limit maximum width
|
||||
maxHeight: 4000 // Optional: limit maximum height
|
||||
});
|
||||
|
||||
// Save each page as a PNG
|
||||
pngImages.forEach((pngBuffer, index) => {
|
||||
fs.writeFileSync(`page-${index + 1}.png`, pngBuffer);
|
||||
});
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
#### Understanding Scale and DPI
|
||||
PDF.js renders at 72 DPI by default. Use these scale factors for different quality levels:
|
||||
- `SmartPdf.SCALE_SCREEN` (2.0): ~144 DPI - Good for screen display
|
||||
- `SmartPdf.SCALE_HIGH` (3.0): ~216 DPI - High quality (default)
|
||||
- `SmartPdf.SCALE_PRINT` (6.0): ~432 DPI - Print quality
|
||||
- Custom DPI: `scale = SmartPdf.getScaleForDPI(300)` for 300 DPI
|
||||
|
||||
### Converting PDF to WebP Images
|
||||
Generate web-optimized images using WebP format. WebP provides 25-35% better compression than PNG/JPEG while maintaining quality:
|
||||
|
||||
```typescript
|
||||
async function createWebPImages() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
// Load a PDF
|
||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
||||
|
||||
// Create high-quality WebP images (default: 3.0 scale = 216 DPI, 85% quality)
|
||||
const webpImages = await smartPdf.convertPDFToWebpBytes(pdf.buffer);
|
||||
|
||||
// Save WebP images
|
||||
webpImages.forEach((webpBuffer, index) => {
|
||||
fs.writeFileSync(`page-${index + 1}.webp`, webpBuffer);
|
||||
});
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
#### Creating Thumbnails
|
||||
Generate small thumbnail images for PDF galleries or document lists:
|
||||
|
||||
```typescript
|
||||
async function createThumbnails() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
||||
|
||||
// Create small thumbnails (0.5 scale = ~36 DPI, 70% quality)
|
||||
const thumbnails = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||
scale: 0.5, // Small readable thumbnails
|
||||
quality: 70 // Lower quality for smaller files
|
||||
});
|
||||
|
||||
// Save thumbnails
|
||||
thumbnails.forEach((thumb, index) => {
|
||||
fs.writeFileSync(`thumb-${index + 1}.webp`, thumb);
|
||||
});
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
#### Constrained Dimensions
|
||||
Create previews with maximum width/height constraints, useful for responsive layouts:
|
||||
|
||||
```typescript
|
||||
async function createConstrainedPreviews() {
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
|
||||
|
||||
// Create previews that fit within 800x600 pixels
|
||||
const previews = await smartPdf.convertPDFToWebpBytes(pdf.buffer, {
|
||||
scale: 1.0, // Start with full size
|
||||
quality: 90, // High quality
|
||||
maxWidth: 800, // Maximum 800px wide
|
||||
maxHeight: 600 // Maximum 600px tall
|
||||
});
|
||||
|
||||
// The method automatically scales down to fit within constraints
|
||||
previews.forEach((preview, index) => {
|
||||
fs.writeFileSync(`preview-constrained-${index + 1}.webp`, preview);
|
||||
});
|
||||
|
||||
await smartPdf.stop();
|
||||
}
|
||||
```
|
||||
|
||||
#### WebP Options
|
||||
The `convertPDFToWebpBytes` method accepts these options:
|
||||
|
||||
- `scale`: Scale factor for preview size (default: 3.0 for ~216 DPI)
|
||||
- `quality`: WebP compression quality (default: 85, range: 0-100)
|
||||
- `maxWidth`: Maximum width in pixels (optional)
|
||||
- `maxHeight`: Maximum height in pixels (optional)
|
||||
|
||||
Common scale values:
|
||||
- `0.5`: Thumbnails (~36 DPI)
|
||||
- `2.0`: Screen display (~144 DPI)
|
||||
- `3.0`: High quality (~216 DPI, default)
|
||||
- `6.0`: Print quality (~432 DPI)
|
||||
|
||||
### Using External Browser Instance
|
||||
For advanced use cases, you can provide your own Puppeteer browser instance:
|
||||
|
||||
```typescript
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function useExternalBrowser() {
|
||||
// Create your own browser instance with custom options
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start(browser);
|
||||
|
||||
// Use SmartPdf normally
|
||||
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
|
||||
|
||||
// SmartPdf will not close the browser when stopping
|
||||
await smartPdf.stop();
|
||||
|
||||
// You control the browser lifecycle
|
||||
await browser.close();
|
||||
}
|
||||
```
|
||||
|
||||
### Running Multiple Instances
|
||||
Thanks to automatic port allocation, you can run multiple SmartPdf instances simultaneously:
|
||||
|
||||
```typescript
|
||||
async function runMultipleInstances() {
|
||||
// Each instance automatically finds its own free port
|
||||
const instance1 = await SmartPdf.create();
|
||||
const instance2 = await SmartPdf.create();
|
||||
const instance3 = await SmartPdf.create();
|
||||
|
||||
// Start all instances
|
||||
await Promise.all([
|
||||
instance1.start(),
|
||||
instance2.start(),
|
||||
instance3.start()
|
||||
]);
|
||||
|
||||
console.log(`Instance 1 running on port: ${instance1.serverPort}`);
|
||||
console.log(`Instance 2 running on port: ${instance2.serverPort}`);
|
||||
console.log(`Instance 3 running on port: ${instance3.serverPort}`);
|
||||
|
||||
// Use instances independently
|
||||
const pdfs = await Promise.all([
|
||||
instance1.getA4PdfResultForHtmlString('<h1>PDF 1</h1>'),
|
||||
instance2.getA4PdfResultForHtmlString('<h1>PDF 2</h1>'),
|
||||
instance3.getA4PdfResultForHtmlString('<h1>PDF 3</h1>')
|
||||
]);
|
||||
|
||||
// Clean up all instances
|
||||
await Promise.all([
|
||||
instance1.stop(),
|
||||
instance2.stop(),
|
||||
instance3.stop()
|
||||
]);
|
||||
}
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
Always wrap SmartPdf operations in try-catch blocks and ensure proper cleanup:
|
||||
|
||||
```typescript
|
||||
async function safePdfGeneration() {
|
||||
let smartPdf: SmartPdf;
|
||||
|
||||
try {
|
||||
smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start();
|
||||
|
||||
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
|
||||
// Process PDF...
|
||||
|
||||
} catch (error) {
|
||||
console.error('PDF generation failed:', error);
|
||||
// Handle error appropriately
|
||||
} finally {
|
||||
// Always cleanup
|
||||
if (smartPdf) {
|
||||
await smartPdf.stop();
|
||||
}
|
||||
} finally {
|
||||
if (smartPdf) {
|
||||
await smartPdf.stop(); // Always cleanup!
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### IPdf Interface
|
||||
The `IPdf` interface represents a PDF with its metadata:
|
||||
### 2. Optimize HTML for PDFs
|
||||
|
||||
```typescript
|
||||
const optimizedHtml = `
|
||||
<style>
|
||||
/* Use print-friendly styles */
|
||||
@media print {
|
||||
.no-print { display: none; }
|
||||
}
|
||||
|
||||
/* Avoid page breaks in wrong places */
|
||||
h1, h2, h3 { page-break-after: avoid; }
|
||||
table { page-break-inside: avoid; }
|
||||
</style>
|
||||
${yourContent}
|
||||
`;
|
||||
```
|
||||
|
||||
### 3. Handle Large Documents
|
||||
|
||||
For documents with many pages:
|
||||
|
||||
```typescript
|
||||
// Process in batches
|
||||
const pages = await smartPdf.convertPDFToPngBytes(largePdf.buffer);
|
||||
|
||||
for (let i = 0; i < pages.length; i += 10) {
|
||||
const batch = pages.slice(i, i + 10);
|
||||
await processBatch(batch);
|
||||
}
|
||||
```
|
||||
|
||||
## 🎯 Advanced Usage
|
||||
|
||||
### 🌐 Custom Browser Instance
|
||||
|
||||
Bring your own Puppeteer instance:
|
||||
|
||||
```typescript
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-dev-shm-usage']
|
||||
});
|
||||
|
||||
const smartPdf = await SmartPdf.create();
|
||||
await smartPdf.start(browser);
|
||||
|
||||
// SmartPdf won't close your browser
|
||||
await smartPdf.stop();
|
||||
await browser.close(); // You manage it
|
||||
```
|
||||
|
||||
### ⚡ Parallel Processing
|
||||
|
||||
Process multiple PDFs concurrently:
|
||||
|
||||
```typescript
|
||||
const urls = ['https://example1.com', 'https://example2.com', 'https://example3.com'];
|
||||
|
||||
const pdfs = await Promise.all(
|
||||
urls.map(url => smartPdf.getFullWebsiteAsSinglePdf(url))
|
||||
);
|
||||
|
||||
// Or with multiple instances for maximum performance
|
||||
const instances = await Promise.all(
|
||||
Array(3).fill(null).map(() => SmartPdf.create())
|
||||
);
|
||||
|
||||
await Promise.all(instances.map(i => i.start()));
|
||||
|
||||
// Process in parallel across instances
|
||||
const results = await Promise.all(
|
||||
urls.map((url, i) => instances[i % instances.length].getFullWebsiteAsSinglePdf(url))
|
||||
);
|
||||
|
||||
// Cleanup all instances
|
||||
await Promise.all(instances.map(i => i.stop()));
|
||||
```
|
||||
|
||||
## 📝 API Reference
|
||||
|
||||
### Class: SmartPdf
|
||||
|
||||
#### Static Methods
|
||||
- `create(options?: ISmartPdfOptions)` - Create a new SmartPdf instance
|
||||
- `getScaleForDPI(dpi: number)` - Calculate scale factor for desired DPI
|
||||
|
||||
#### Instance Methods
|
||||
- `start(browser?: Browser)` - Start the PDF server
|
||||
- `stop()` - Stop the PDF server
|
||||
- `getA4PdfResultForHtmlString(html: string)` - Generate A4 PDF from HTML
|
||||
- `getPdfResultForWebsite(url: string)` - Generate A4 PDF from website
|
||||
- `getFullWebsiteAsSinglePdf(url: string)` - Capture full webpage as PDF
|
||||
- `mergePdfs(buffers: Uint8Array[])` - Merge multiple PDFs
|
||||
- `readFileToPdfObject(path: string)` - Read PDF file from disk
|
||||
- `extractTextFromPdfBuffer(buffer: Buffer)` - Extract text from PDF
|
||||
- `convertPDFToPngBytes(buffer: Uint8Array, options?)` - Convert to PNG
|
||||
- `convertPDFToWebpBytes(buffer: Uint8Array, options?)` - Convert to WebP
|
||||
- `convertPDFToJpegBytes(buffer: Uint8Array, options?)` - Convert to JPEG
|
||||
|
||||
### Interface: IPdf
|
||||
|
||||
```typescript
|
||||
interface IPdf {
|
||||
name: string; // Filename of the PDF
|
||||
buffer: Buffer; // PDF content as buffer
|
||||
id: string | null; // Unique identifier
|
||||
name: string; // Filename
|
||||
buffer: Buffer; // PDF content
|
||||
id: string | null; // Unique identifier
|
||||
metadata?: {
|
||||
textExtraction?: string; // Extracted text content
|
||||
textExtraction?: string; // Extracted text
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
## 🤝 Contributing
|
||||
|
||||
1. **Always start and stop**: Initialize with `start()` and cleanup with `stop()` to properly manage resources.
|
||||
2. **Port management**: Use the automatic port allocation feature to avoid conflicts when running multiple instances.
|
||||
3. **Error handling**: Always implement proper error handling as PDF generation can fail due to various reasons.
|
||||
4. **Resource cleanup**: Ensure `stop()` is called even if an error occurs to prevent memory leaks.
|
||||
5. **HTML optimization**: When creating PDFs from HTML, ensure your HTML is well-formed and CSS is embedded or inlined.
|
||||
We love contributions! Please feel free to submit a Pull Request.
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
|
107
test/test.ts
107
test/test.ts
@@ -174,6 +174,113 @@ tap.test('should verify WebP files are smaller than PNG', async () => {
|
||||
expect(totalWebpSize).toBeLessThan(totalPngSize);
|
||||
});
|
||||
|
||||
tap.test('should create JPEG images from PDF', async () => {
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer);
|
||||
expect(jpegImages.length).toBeGreaterThan(0);
|
||||
console.log('JPEG image sizes:', jpegImages.map(img => img.length));
|
||||
|
||||
// Save the first page as JPEG
|
||||
fs.writeFileSync(path.join(testResultsDir, 'jpeg_default_page1.jpg'), Buffer.from(jpegImages[0]));
|
||||
});
|
||||
|
||||
tap.test('should create JPEG images with different quality levels', async () => {
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||
|
||||
// Test different quality levels
|
||||
const qualityLevels = [50, 70, 85, 95];
|
||||
|
||||
for (const quality of qualityLevels) {
|
||||
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||
scale: smartpdf.SmartPdf.SCALE_HIGH,
|
||||
quality: quality
|
||||
});
|
||||
|
||||
console.log(`JPEG quality ${quality}: ${jpegImages[0].length} bytes`);
|
||||
|
||||
// Save first page at each quality level
|
||||
fs.writeFileSync(
|
||||
path.join(testResultsDir, `jpeg_quality_${quality}_page1.jpg`),
|
||||
Buffer.from(jpegImages[0])
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('should create JPEG images with max dimensions', async () => {
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||
|
||||
// Create constrained JPEG images
|
||||
const constrainedJpegs = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||
scale: smartpdf.SmartPdf.SCALE_HIGH,
|
||||
quality: 85,
|
||||
maxWidth: 1200,
|
||||
maxHeight: 1200
|
||||
});
|
||||
|
||||
expect(constrainedJpegs.length).toBeGreaterThan(0);
|
||||
console.log('Constrained JPEG sizes:', constrainedJpegs.map(img => img.length));
|
||||
|
||||
// Save constrained JPEG
|
||||
fs.writeFileSync(path.join(testResultsDir, 'jpeg_constrained_page1.jpg'), Buffer.from(constrainedJpegs[0]));
|
||||
});
|
||||
|
||||
tap.test('should compare file sizes between PNG, WebP, and JPEG', async () => {
|
||||
const pdfObject = await testSmartPdf.readFileToPdfObject('.nogit/3.pdf');
|
||||
|
||||
// Generate all three formats at the same scale
|
||||
const comparisonScale = smartpdf.SmartPdf.SCALE_HIGH; // 3.0 scale
|
||||
|
||||
const pngImages = await testSmartPdf.convertPDFToPngBytes(pdfObject.buffer, {
|
||||
scale: comparisonScale
|
||||
});
|
||||
const webpImages = await testSmartPdf.convertPDFToWebpBytes(pdfObject.buffer, {
|
||||
scale: comparisonScale,
|
||||
quality: 85
|
||||
});
|
||||
const jpegImages = await testSmartPdf.convertPDFToJpegBytes(pdfObject.buffer, {
|
||||
scale: comparisonScale,
|
||||
quality: 85
|
||||
});
|
||||
|
||||
expect(pngImages.length).toEqual(webpImages.length);
|
||||
expect(pngImages.length).toEqual(jpegImages.length);
|
||||
|
||||
// Compare sizes
|
||||
let totalPngSize = 0;
|
||||
let totalWebpSize = 0;
|
||||
let totalJpegSize = 0;
|
||||
|
||||
pngImages.forEach((png, index) => {
|
||||
const pngSize = png.length;
|
||||
const webpSize = webpImages[index].length;
|
||||
const jpegSize = jpegImages[index].length;
|
||||
|
||||
totalPngSize += pngSize;
|
||||
totalWebpSize += webpSize;
|
||||
totalJpegSize += jpegSize;
|
||||
|
||||
const webpReduction = ((pngSize - webpSize) / pngSize * 100).toFixed(1);
|
||||
const jpegReduction = ((pngSize - jpegSize) / pngSize * 100).toFixed(1);
|
||||
|
||||
console.log(`Page ${index + 1}:`);
|
||||
console.log(` PNG: ${pngSize} bytes`);
|
||||
console.log(` WebP: ${webpSize} bytes (${webpReduction}% smaller than PNG)`);
|
||||
console.log(` JPEG: ${jpegSize} bytes (${jpegReduction}% smaller than PNG)`);
|
||||
});
|
||||
|
||||
const totalWebpReduction = ((totalPngSize - totalWebpSize) / totalPngSize * 100).toFixed(1);
|
||||
const totalJpegReduction = ((totalPngSize - totalJpegSize) / totalPngSize * 100).toFixed(1);
|
||||
|
||||
console.log('\nTotal size comparison:');
|
||||
console.log(`PNG: ${totalPngSize} bytes`);
|
||||
console.log(`WebP: ${totalWebpSize} bytes (${totalWebpReduction}% reduction)`);
|
||||
console.log(`JPEG: ${totalJpegSize} bytes (${totalJpegReduction}% reduction)`);
|
||||
|
||||
// JPEG and WebP should both be smaller than PNG
|
||||
expect(totalJpegSize).toBeLessThan(totalPngSize);
|
||||
expect(totalWebpSize).toBeLessThan(totalPngSize);
|
||||
});
|
||||
|
||||
tap.test('should close the SmartPdf instance properly', async () => {
|
||||
await testSmartPdf.stop();
|
||||
});
|
||||
|
@@ -538,4 +538,52 @@ export class SmartPdf {
|
||||
await page.close();
|
||||
return webpBuffers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a PDF to progressive JPEG bytes for each page.
|
||||
* This method creates progressive JPEG images that load in multiple passes,
|
||||
* showing a low-quality preview first, then progressively improving.
|
||||
* Uses SmartJimp for true progressive JPEG encoding.
|
||||
*/
|
||||
public async convertPDFToJpegBytes(
|
||||
pdfBytes: Uint8Array,
|
||||
options: {
|
||||
scale?: number; // Scale factor for output size (default: 3.0 for 216 DPI)
|
||||
quality?: number; // JPEG quality 0-100 (default: 85)
|
||||
maxWidth?: number; // Maximum width in pixels (optional)
|
||||
maxHeight?: number; // Maximum height in pixels (optional)
|
||||
} = {}
|
||||
): Promise<Uint8Array[]> {
|
||||
// First, convert PDF to PNG using our existing method
|
||||
const pngBuffers = await this.convertPDFToPngBytes(pdfBytes, {
|
||||
scale: options.scale,
|
||||
maxWidth: options.maxWidth,
|
||||
maxHeight: options.maxHeight
|
||||
});
|
||||
|
||||
// Initialize SmartJimp in sharp mode for progressive JPEG support
|
||||
const smartJimpInstance = new plugins.smartjimp.SmartJimp({ mode: 'sharp' });
|
||||
|
||||
// Convert each PNG to progressive JPEG
|
||||
const jpegBuffers: Uint8Array[] = [];
|
||||
const quality = options.quality || 85;
|
||||
|
||||
for (const pngBuffer of pngBuffers) {
|
||||
// Convert PNG buffer to progressive JPEG
|
||||
const jpegBuffer = await smartJimpInstance.computeAssetVariation(
|
||||
Buffer.from(pngBuffer),
|
||||
{
|
||||
format: 'jpeg',
|
||||
progressive: true,
|
||||
// SmartJimp uses a different quality scale, need to check if adjustment is needed
|
||||
// For now, pass through the quality value
|
||||
quality
|
||||
}
|
||||
);
|
||||
|
||||
jpegBuffers.push(new Uint8Array(jpegBuffer));
|
||||
}
|
||||
|
||||
return jpegBuffers;
|
||||
}
|
||||
}
|
@@ -13,6 +13,7 @@ import * as smartpath from '@push.rocks/smartpath';
|
||||
import * as smartpuppeteer from '@push.rocks/smartpuppeteer';
|
||||
import * as smartnetwork from '@push.rocks/smartnetwork';
|
||||
import * as smartunique from '@push.rocks/smartunique';
|
||||
import * as smartjimp from '@push.rocks/smartjimp';
|
||||
|
||||
export {
|
||||
smartbuffer,
|
||||
@@ -23,6 +24,7 @@ export {
|
||||
smartpuppeteer,
|
||||
smartunique,
|
||||
smartnetwork,
|
||||
smartjimp,
|
||||
};
|
||||
|
||||
// tsclass scope
|
||||
|
Reference in New Issue
Block a user