Compare commits

...

1 Commits

Author SHA1 Message Date
Juergen Kunz
a4c3415838 feat(smartpdf): add automatic port allocation and multi-instance support 2025-08-01 16:09:17 +00:00
7 changed files with 2102 additions and 2176 deletions

View File

@@ -1,5 +1,16 @@
# Changelog # Changelog
## 2025-08-01 - 3.3.0 - feat(smartpdf)
Add automatic port allocation and multi-instance support
- Added ISmartPdfOptions interface with port configuration options
- Implemented automatic port allocation between 20000-30000 by default
- Added support for custom port ranges via portRangeStart/portRangeEnd options
- Added support for specific port assignment via port option
- Fixed resource cleanup when port allocation fails
- Multiple SmartPdf instances can now run simultaneously without port conflicts
- Updated readme with comprehensive documentation for all features
## 2025-02-25 - 3.2.2 - fix(SmartPdf) ## 2025-02-25 - 3.2.2 - fix(SmartPdf)
Fix buffer handling for PDF conversion and text extraction Fix buffer handling for PDF conversion and text extraction

View File

@@ -1,6 +1,6 @@
{ {
"name": "@push.rocks/smartpdf", "name": "@push.rocks/smartpdf",
"version": "3.2.2", "version": "3.3.0",
"private": false, "private": false,
"description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.", "description": "A library for creating PDFs dynamically from HTML or websites with additional features like merging PDFs.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
@@ -9,32 +9,31 @@
"author": "Lossless GmbH", "author": "Lossless GmbH",
"license": "MIT", "license": "MIT",
"scripts": { "scripts": {
"test": "(tstest test/ --web)", "test": "(tstest test/ --verbose --timeout 60)",
"build": "(tsbuild --web --allowimplicitany)", "build": "(tsbuild tsfolders --allowimplicitany)",
"buildDocs": "tsdoc" "buildDocs": "tsdoc"
}, },
"devDependencies": { "devDependencies": {
"@git.zone/tsbuild": "^2.2.1", "@git.zone/tsbuild": "^2.6.4",
"@git.zone/tsdoc": "^1.4.3", "@git.zone/tsdoc": "^1.5.0",
"@git.zone/tsrun": "^1.3.3", "@git.zone/tsrun": "^1.3.3",
"@git.zone/tstest": "^1.0.96", "@git.zone/tstest": "^2.3.2",
"@push.rocks/tapbundle": "^5.5.6", "@types/node": "^24.1.0"
"@types/node": "^22.13.5"
}, },
"dependencies": { "dependencies": {
"@push.rocks/smartbuffer": "^3.0.4", "@push.rocks/smartbuffer": "^3.0.5",
"@push.rocks/smartdelay": "^3.0.5", "@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.2.0", "@push.rocks/smartfile": "^11.2.5",
"@push.rocks/smartnetwork": "^3.0.0", "@push.rocks/smartnetwork": "^4.1.2",
"@push.rocks/smartpath": "^5.0.18", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpromise": "^4.2.3", "@push.rocks/smartpromise": "^4.2.3",
"@push.rocks/smartpuppeteer": "^2.0.5", "@push.rocks/smartpuppeteer": "^2.0.5",
"@push.rocks/smartunique": "^3.0.9", "@push.rocks/smartunique": "^3.0.9",
"@tsclass/tsclass": "^4.4.0", "@tsclass/tsclass": "^9.2.0",
"@types/express": "^5.0.0", "@types/express": "^5.0.3",
"express": "^4.21.2", "express": "^5.1.0",
"pdf-lib": "^1.17.1", "pdf-lib": "^1.17.1",
"pdf2json": "3.1.5" "pdf2json": "3.2.0"
}, },
"files": [ "files": [
"ts/**/*", "ts/**/*",
@@ -69,5 +68,6 @@
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://code.foss.global/push.rocks/smartpdf.git" "url": "https://code.foss.global/push.rocks/smartpdf.git"
} },
"packageManager": "pnpm@10.11.0+sha512.6540583f41cc5f628eb3d9773ecee802f4f9ef9923cc45b69890fb47991d4b092964694ec3a4f738a420c918a333062c8b925d312f42e4f0c263eb603551f977"
} }

3799
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

277
readme.md
View File

@@ -1,8 +1,8 @@
# @push.rocks/smartpdf # @push.rocks/smartpdf
Create PDFs on the fly Create PDFs on the fly from HTML, websites, or existing PDFs with advanced features like text extraction, PDF merging, and PNG conversion.
## Install ## Install
To install `@push.rocks/smartpdf`, use the following command with npm: To install `@push.rocks/smartpdf`, use npm or yarn:
```bash ```bash
npm install @push.rocks/smartpdf --save npm install @push.rocks/smartpdf --save
@@ -14,87 +14,304 @@ Or with yarn:
yarn add @push.rocks/smartpdf yarn add @push.rocks/smartpdf
``` ```
## Requirements
This package requires a Chrome or Chromium installation to be available on the system, as it uses Puppeteer for rendering. The package will automatically detect and use the appropriate executable.
## Usage ## Usage
This documentation will guide you through using `@push.rocks/smartpdf` to create PDFs in various ways, such as from HTML strings or full web pages, and provides examples on how to merge multiple PDFs into one. Remember, all examples provided here use ESM syntax and TypeScript. `@push.rocks/smartpdf` provides a powerful interface for PDF generation and manipulation. All examples use ESM syntax and TypeScript.
### Getting Started ### Getting Started
First, ensure you have the package installed and you can import it into your TypeScript project: First, import the necessary classes:
```typescript ```typescript
import { SmartPdf, IPdf } from '@push.rocks/smartpdf'; import { SmartPdf, IPdf } from '@push.rocks/smartpdf';
``` ```
### Creating a PDF from an HTML String ### Basic Setup with Automatic Port Allocation
To create a PDF from a simple HTML string, youll need to instantiate `SmartPdf` and call `getA4PdfResultForHtmlString`. SmartPdf automatically finds an available port between 20000-30000 for its internal server:
```typescript
async function setupSmartPdf() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Your PDF operations here
await smartPdf.stop();
}
```
### Advanced Setup with Custom Port Configuration
You can specify custom port settings to avoid conflicts or meet specific requirements:
```typescript
// Use a specific port
const smartPdf = await SmartPdf.create({ port: 3000 });
// Use a custom port range
const smartPdf = await SmartPdf.create({
portRangeStart: 4000,
portRangeEnd: 5000
});
// The server will find an available port in your specified range
await smartPdf.start();
console.log(`Server running on port: ${smartPdf.serverPort}`);
```
### Creating PDFs from HTML Strings
Generate PDFs from HTML content with full CSS support:
```typescript ```typescript
async function createPdfFromHtml() { async function createPdfFromHtml() {
const smartPdf = await SmartPdf.create(); const smartPdf = await SmartPdf.create();
await smartPdf.start(); await smartPdf.start();
const htmlString = `<h1>Hello World</h1>`;
const htmlString = `
<!DOCTYPE html>
<html>
<head>
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
h1 { color: #333; }
.highlight { background-color: yellow; }
</style>
</head>
<body>
<h1>Professional PDF Document</h1>
<p>This PDF was generated from <span class="highlight">HTML content</span>.</p>
</body>
</html>
`;
const pdf: IPdf = await smartPdf.getA4PdfResultForHtmlString(htmlString); const pdf: IPdf = await smartPdf.getA4PdfResultForHtmlString(htmlString);
console.log(pdf.buffer); // This is your PDF buffer
// pdf.buffer contains the PDF data
// pdf.id contains a unique identifier
// pdf.name contains the filename
// pdf.metadata contains additional information like extracted text
await smartPdf.stop(); await smartPdf.stop();
} }
createPdfFromHtml();
``` ```
### Generating a PDF from a Website ### Generating PDFs from Websites
You may want to capture a full webpage as a PDF. `SmartPdf` provides two methods to accomplish this. One captures the viewable area as an A4 pdf, and the other captures the entire webpage. Capture web pages as PDFs with two different approaches:
#### A4 PDF from a Website #### A4 Format PDF from Website
Captures the viewable area formatted for A4 paper:
```typescript ```typescript
async function createA4PdfFromWebsite() { async function createA4PdfFromWebsite() {
const smartPdf = await SmartPdf.create(); const smartPdf = await SmartPdf.create();
await smartPdf.start(); await smartPdf.start();
const pdf: IPdf = await smartPdf.getPdfResultForWebsite('https://example.com'); const pdf: IPdf = await smartPdf.getPdfResultForWebsite('https://example.com');
console.log(pdf.buffer); // PDF buffer of the webpage
// Save to file
await fs.writeFile('website-a4.pdf', pdf.buffer);
await smartPdf.stop(); await smartPdf.stop();
} }
createA4PdfFromWebsite();
``` ```
#### Full Webpage as a Single PDF #### Full Webpage as Single PDF
Captures the entire webpage in a single PDF, regardless of length:
```typescript ```typescript
async function createFullPdfFromWebsite() { async function createFullPdfFromWebsite() {
const smartPdf = await SmartPdf.create(); const smartPdf = await SmartPdf.create();
await smartPdf.start(); await smartPdf.start();
const pdf: IPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com'); const pdf: IPdf = await smartPdf.getFullWebsiteAsSinglePdf('https://example.com');
console.log(pdf.buffer); // PDF buffer with the full webpage
// This captures the entire scrollable area
await fs.writeFile('website-full.pdf', pdf.buffer);
await smartPdf.stop(); await smartPdf.stop();
} }
createFullPdfFromWebsite();
``` ```
### Merging Multiple PDFs ### Merging Multiple PDFs
If you have multiple PDF objects (`IPdf`) that you wish to merge into a single PDF file, you can use the `mergePdfs` method. Combine multiple PDF files into a single document:
```typescript ```typescript
async function mergePdfs() { async function mergePdfs() {
const smartPdf = await SmartPdf.create(); const smartPdf = await SmartPdf.create();
// Assume pdf1 and pdf2 are objects of type IPdf that you want to merge await smartPdf.start();
const mergedPdf: IPdf = await smartPdf.mergePdfs([pdf1, pdf2]);
console.log(mergedPdf.buffer); // Buffer of the merged PDF // Create or load your PDFs
const pdf1 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 1</h1>');
const pdf2 = await smartPdf.getA4PdfResultForHtmlString('<h1>Document 2</h1>');
const pdf3 = await smartPdf.readFileToPdfObject('./existing-document.pdf');
// Merge PDFs - order matters!
const mergedPdf: Uint8Array = await smartPdf.mergePdfs([
pdf1.buffer,
pdf2.buffer,
pdf3.buffer
]);
// Save the merged PDF
await fs.writeFile('merged-document.pdf', mergedPdf);
await smartPdf.stop();
} }
mergePdfs();
``` ```
### Reading PDF from Disk and Extracting Text ### Reading PDFs and Extracting Text
To read a PDF from the disk and extract its text content: Extract text content from existing PDFs:
```typescript ```typescript
async function readAndExtractFromPdf() { async function extractTextFromPdf() {
const smartPdf = await SmartPdf.create(); const smartPdf = await SmartPdf.create();
const pdf: IPdf = await smartPdf.readFileToPdfObject('/path/to/your/pdf/file.pdf');
// Read PDF from disk
const pdf: IPdf = await smartPdf.readFileToPdfObject('/path/to/document.pdf');
// Extract all text
const extractedText = await smartPdf.extractTextFromPdfBuffer(pdf.buffer); const extractedText = await smartPdf.extractTextFromPdfBuffer(pdf.buffer);
console.log(extractedText); // Extracted text from the PDF console.log('Extracted text:', extractedText);
// The pdf object also contains metadata with text extraction
console.log('Metadata:', pdf.metadata);
} }
readAndExtractFromPdf();
``` ```
This guide provides a comprehensive overview of generating PDFs using `@push.rocks/smartpdf`. Remember to start and stop your `SmartPdf` instance to properly initialize and clean up resources, especially when working with server-side rendering or capturing web pages. ### Converting PDF to PNG Images
Convert each page of a PDF into PNG images:
```typescript
async function convertPdfToPng() {
const smartPdf = await SmartPdf.create();
await smartPdf.start();
// Load a PDF
const pdf = await smartPdf.readFileToPdfObject('./document.pdf');
// Convert to PNG images (one per page)
const pngImages: Uint8Array[] = await smartPdf.convertPDFToPngBytes(pdf.buffer);
// Save each page as a PNG
pngImages.forEach((pngBuffer, index) => {
fs.writeFileSync(`page-${index + 1}.png`, pngBuffer);
});
await smartPdf.stop();
}
```
### Using External Browser Instance
For advanced use cases, you can provide your own Puppeteer browser instance:
```typescript
import puppeteer from 'puppeteer';
async function useExternalBrowser() {
// Create your own browser instance with custom options
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const smartPdf = await SmartPdf.create();
await smartPdf.start(browser);
// Use SmartPdf normally
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
// SmartPdf will not close the browser when stopping
await smartPdf.stop();
// You control the browser lifecycle
await browser.close();
}
```
### Running Multiple Instances
Thanks to automatic port allocation, you can run multiple SmartPdf instances simultaneously:
```typescript
async function runMultipleInstances() {
// Each instance automatically finds its own free port
const instance1 = await SmartPdf.create();
const instance2 = await SmartPdf.create();
const instance3 = await SmartPdf.create();
// Start all instances
await Promise.all([
instance1.start(),
instance2.start(),
instance3.start()
]);
console.log(`Instance 1 running on port: ${instance1.serverPort}`);
console.log(`Instance 2 running on port: ${instance2.serverPort}`);
console.log(`Instance 3 running on port: ${instance3.serverPort}`);
// Use instances independently
const pdfs = await Promise.all([
instance1.getA4PdfResultForHtmlString('<h1>PDF 1</h1>'),
instance2.getA4PdfResultForHtmlString('<h1>PDF 2</h1>'),
instance3.getA4PdfResultForHtmlString('<h1>PDF 3</h1>')
]);
// Clean up all instances
await Promise.all([
instance1.stop(),
instance2.stop(),
instance3.stop()
]);
}
```
### Error Handling
Always wrap SmartPdf operations in try-catch blocks and ensure proper cleanup:
```typescript
async function safePdfGeneration() {
let smartPdf: SmartPdf;
try {
smartPdf = await SmartPdf.create();
await smartPdf.start();
const pdf = await smartPdf.getA4PdfResultForHtmlString('<h1>Hello</h1>');
// Process PDF...
} catch (error) {
console.error('PDF generation failed:', error);
// Handle error appropriately
} finally {
// Always cleanup
if (smartPdf) {
await smartPdf.stop();
}
}
}
```
### IPdf Interface
The `IPdf` interface represents a PDF with its metadata:
```typescript
interface IPdf {
name: string; // Filename of the PDF
buffer: Buffer; // PDF content as buffer
id: string | null; // Unique identifier
metadata?: {
textExtraction?: string; // Extracted text content
};
}
```
## Best Practices
1. **Always start and stop**: Initialize with `start()` and cleanup with `stop()` to properly manage resources.
2. **Port management**: Use the automatic port allocation feature to avoid conflicts when running multiple instances.
3. **Error handling**: Always implement proper error handling as PDF generation can fail due to various reasons.
4. **Resource cleanup**: Ensure `stop()` is called even if an error occurs to prevent memory leaks.
5. **HTML optimization**: When creating PDFs from HTML, ensure your HTML is well-formed and CSS is embedded or inlined.
## License and Legal Information ## License and Legal Information
@@ -113,4 +330,4 @@ Registered at District court Bremen HRB 35230 HB, Germany
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc. For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works. By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.

97
test/test.port.ts Normal file
View File

@@ -0,0 +1,97 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartpdf from '../ts/index.js';
tap.test('should create multiple SmartPdf instances with automatic port allocation', async () => {
const instance1 = new smartpdf.SmartPdf();
const instance2 = new smartpdf.SmartPdf();
const instance3 = new smartpdf.SmartPdf();
// Start all instances
await instance1.start();
await instance2.start();
await instance3.start();
// Verify all instances have different ports
expect(instance1.serverPort).toBeGreaterThanOrEqual(20000);
expect(instance1.serverPort).toBeLessThanOrEqual(30000);
expect(instance2.serverPort).toBeGreaterThanOrEqual(20000);
expect(instance2.serverPort).toBeLessThanOrEqual(30000);
expect(instance3.serverPort).toBeGreaterThanOrEqual(20000);
expect(instance3.serverPort).toBeLessThanOrEqual(30000);
// Ensure all ports are different
expect(instance1.serverPort).not.toEqual(instance2.serverPort);
expect(instance1.serverPort).not.toEqual(instance3.serverPort);
expect(instance2.serverPort).not.toEqual(instance3.serverPort);
console.log(`Instance 1 port: ${instance1.serverPort}`);
console.log(`Instance 2 port: ${instance2.serverPort}`);
console.log(`Instance 3 port: ${instance3.serverPort}`);
// Test that all instances work correctly
const pdf1 = await instance1.getA4PdfResultForHtmlString('<h1>Instance 1</h1>');
const pdf2 = await instance2.getA4PdfResultForHtmlString('<h1>Instance 2</h1>');
const pdf3 = await instance3.getA4PdfResultForHtmlString('<h1>Instance 3</h1>');
expect(pdf1.buffer).toBeInstanceOf(Buffer);
expect(pdf2.buffer).toBeInstanceOf(Buffer);
expect(pdf3.buffer).toBeInstanceOf(Buffer);
// Clean up
await instance1.stop();
await instance2.stop();
await instance3.stop();
});
tap.test('should create SmartPdf instance with custom port range', async () => {
const customInstance = new smartpdf.SmartPdf({
portRangeStart: 25000,
portRangeEnd: 26000
});
await customInstance.start();
expect(customInstance.serverPort).toBeGreaterThanOrEqual(25000);
expect(customInstance.serverPort).toBeLessThanOrEqual(26000);
console.log(`Custom range instance port: ${customInstance.serverPort}`);
await customInstance.stop();
});
tap.test('should create SmartPdf instance with specific port', async () => {
const specificPortInstance = new smartpdf.SmartPdf({
port: 28888
});
await specificPortInstance.start();
expect(specificPortInstance.serverPort).toEqual(28888);
console.log(`Specific port instance: ${specificPortInstance.serverPort}`);
await specificPortInstance.stop();
});
tap.test('should throw error when specific port is already in use', async () => {
const instance1 = new smartpdf.SmartPdf({ port: 29999 });
await instance1.start();
const instance2 = new smartpdf.SmartPdf({ port: 29999 });
let errorThrown = false;
try {
await instance2.start();
} catch (error) {
errorThrown = true;
expect(error.message).toInclude('already in use');
}
expect(errorThrown).toBeTrue();
await instance1.stop();
});
export default tap.start();

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartpdf from '../ts/index.js'; import * as smartpdf from '../ts/index.js';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';

View File

@@ -7,10 +7,16 @@ import { execFile } from 'child_process';
declare const document: any; declare const document: any;
export interface ISmartPdfOptions {
port?: number;
portRangeStart?: number;
portRangeEnd?: number;
}
export class SmartPdf { export class SmartPdf {
// STATIC // STATIC
public static async create() { public static async create(optionsArg?: ISmartPdfOptions) {
const smartpdfInstance = new SmartPdf(); const smartpdfInstance = new SmartPdf(optionsArg);
return smartpdfInstance; return smartpdfInstance;
} }
@@ -21,9 +27,15 @@ export class SmartPdf {
externalBrowserBool: boolean = false; externalBrowserBool: boolean = false;
private _readyDeferred: plugins.smartpromise.Deferred<void>; private _readyDeferred: plugins.smartpromise.Deferred<void>;
private _candidates: { [key: string]: PdfCandidate } = {}; private _candidates: { [key: string]: PdfCandidate } = {};
private _options: ISmartPdfOptions;
constructor() { constructor(optionsArg?: ISmartPdfOptions) {
this._readyDeferred = new plugins.smartpromise.Deferred(); this._readyDeferred = new plugins.smartpromise.Deferred();
this._options = {
portRangeStart: 20000,
portRangeEnd: 30000,
...optionsArg
};
} }
async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) { async start(headlessBrowserArg?: plugins.smartpuppeteer.puppeteer.Browser) {
@@ -39,7 +51,37 @@ export class SmartPdf {
}); });
} }
// setup server // Find an available port BEFORE creating server
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
if (this._options.port) {
// If a specific port is requested, check if it's available
const isPortAvailable = await smartnetworkInstance.isLocalPortUnused(this._options.port);
if (isPortAvailable) {
this.serverPort = this._options.port;
} else {
// Clean up browser if we created one
if (!this.externalBrowserBool && this.headlessBrowser) {
await this.headlessBrowser.close();
}
throw new Error(`Requested port ${this._options.port} is already in use`);
}
} else {
// Find a free port in the specified range
this.serverPort = await smartnetworkInstance.findFreePort(
this._options.portRangeStart,
this._options.portRangeEnd
);
if (!this.serverPort) {
// Clean up browser if we created one
if (!this.externalBrowserBool && this.headlessBrowser) {
await this.headlessBrowser.close();
}
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
}
}
// Now setup server after we know we have a valid port
const app = plugins.express(); const app = plugins.express();
app.get('/:pdfId', (req, res) => { app.get('/:pdfId', (req, res) => {
const wantedCandidate = this._candidates[req.params.pdfId]; const wantedCandidate = this._candidates[req.params.pdfId];
@@ -51,10 +93,10 @@ export class SmartPdf {
res.send(wantedCandidate.htmlString); res.send(wantedCandidate.htmlString);
}); });
this.htmlServerInstance = plugins.http.createServer(app); this.htmlServerInstance = plugins.http.createServer(app);
const smartnetworkInstance = new plugins.smartnetwork.SmartNetwork();
const portAvailable = smartnetworkInstance.isLocalPortUnused(3210); this.htmlServerInstance.listen(this.serverPort, 'localhost');
this.htmlServerInstance.listen(3210, 'localhost');
this.htmlServerInstance.on('listening', () => { this.htmlServerInstance.on('listening', () => {
console.log(`SmartPdf server listening on port ${this.serverPort}`);
this._readyDeferred.resolve(); this._readyDeferred.resolve();
done.resolve(); done.resolve();
}); });
@@ -87,7 +129,7 @@ export class SmartPdf {
width: 794, width: 794,
height: 1122, height: 1122,
}); });
const response = await page.goto(`http://localhost:3210/${pdfCandidate.pdfId}`, { const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
waitUntil: 'networkidle2', waitUntil: 'networkidle2',
}); });
const headers = response.headers(); const headers = response.headers();