Files
smartpreview/ts_web/pdfprocessor.ts
Juergen Kunz bc1c7edd35 feat(initial): add comprehensive PDF to JPEG preview library with dual-environment support
- Add Node.js implementation using @push.rocks/smartpdf
- Add browser implementation with PDF.js and Web Workers
- Support configurable quality, dimensions, and page selection
- Include comprehensive TypeScript definitions and error handling
- Provide extensive test coverage for both environments
- Add download functionality and browser compatibility checking
2025-08-03 21:44:01 +00:00

423 lines
12 KiB
TypeScript

import type {
IWebPdfProcessor,
IPreviewOptions,
IPreviewResult,
TWebInputType,
IWorkerMessage,
IPdfProcessRequest,
IPdfProcessResponse,
IWebPreviewOptions
} from './interfaces.js';
import { PreviewError } from './interfaces.js';
/**
* PDF processor implementation for browser using PDF.js worker
*/
export class WebPdfProcessor implements IWebPdfProcessor {
public readonly inputFormat = 'pdf' as const;
public readonly outputFormat = 'jpeg' as const;
private worker: Worker | null = null;
private isInitialized = false;
private pendingRequests = new Map<string, {
resolve: (result: IPdfProcessResponse) => void;
reject: (error: Error) => void;
timeout?: number;
}>();
private requestIdCounter = 0;
/**
* Initialize the PDF processor with worker
*/
public async init(): Promise<void> {
if (this.isInitialized) {
return;
}
try {
// Create worker from blob URL to avoid CORS issues
const workerBlob = await this.createWorkerBlob();
const workerUrl = URL.createObjectURL(workerBlob);
this.worker = new Worker(workerUrl);
this.setupWorkerEventHandlers();
// Wait for worker to be ready
await this.waitForWorkerReady();
this.isInitialized = true;
} catch (error) {
throw new PreviewError(
'PROCESSING_FAILED',
'Failed to initialize PDF processor',
error instanceof Error ? error : new Error(String(error))
);
}
}
/**
* Process PDF and generate JPEG preview
*/
public async processPreview(input: TWebInputType, options: IWebPreviewOptions = {}): Promise<IPreviewResult> {
if (!this.isInitialized || !this.worker) {
throw new PreviewError('PROCESSING_FAILED', 'PDF processor not initialized');
}
try {
// Convert input to ArrayBuffer
const arrayBuffer = await this.inputToArrayBuffer(input);
// Validate PDF
this.validatePdfBuffer(arrayBuffer);
// Set default options
const processOptions = {
quality: options.quality ?? 80,
width: options.width ?? 800,
height: options.height ?? 600,
page: options.page ?? 1,
scale: options.scale ?? 1.0,
};
// Validate options
this.validateOptions(processOptions);
// Process with worker
const response = await this.processWithWorker(arrayBuffer, processOptions, options.timeout);
// Create blob from response
const blob = new Blob([response.imageData], { type: 'image/jpeg' });
// Generate data URL if requested
let dataUrl = '';
if (options.generateDataUrl !== false) {
dataUrl = await this.blobToDataUrl(blob);
}
return {
blob,
dimensions: {
width: response.width,
height: response.height,
},
size: response.imageData.byteLength,
mimeType: 'image/jpeg',
dataUrl,
};
} catch (error) {
if (error instanceof PreviewError) {
throw error;
}
throw new PreviewError(
'PROCESSING_FAILED',
'Failed to process PDF',
error instanceof Error ? error : new Error(String(error))
);
}
}
/**
* Clean up resources
*/
public async cleanup(): Promise<void> {
if (this.worker) {
// Cancel pending requests
for (const [_id, request] of this.pendingRequests) {
request.reject(new Error('Worker cleanup'));
if (request.timeout) {
clearTimeout(request.timeout);
}
}
this.pendingRequests.clear();
// Terminate worker
this.worker.terminate();
this.worker = null;
}
this.isInitialized = false;
}
/**
* Convert various input types to ArrayBuffer
*/
private async inputToArrayBuffer(input: TWebInputType): Promise<ArrayBuffer> {
if (input instanceof ArrayBuffer) {
return input;
}
if (input instanceof Uint8Array) {
return input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
}
if (input instanceof File || input instanceof Blob) {
return await input.arrayBuffer();
}
if (typeof input === 'string') {
// Assume it's a data URL or base64
if (input.startsWith('data:')) {
const base64 = input.split(',')[1];
const binaryString = atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
throw new PreviewError('INVALID_INPUT', 'String input must be a data URL');
}
throw new PreviewError('INVALID_INPUT', 'Unsupported input type');
}
/**
* Validate PDF buffer
*/
private validatePdfBuffer(buffer: ArrayBuffer): void {
if (buffer.byteLength < 4) {
throw new PreviewError('INVALID_INPUT', 'Input is too small to be a valid PDF');
}
const header = new Uint8Array(buffer, 0, 4);
const pdfMagic = new Uint8Array([37, 80, 68, 70]); // %PDF
for (let i = 0; i < 4; i++) {
if (header[i] !== pdfMagic[i]) {
throw new PreviewError('INVALID_INPUT', 'Input is not a valid PDF file');
}
}
}
/**
* Validate processing options
*/
private validateOptions(options: Required<IPreviewOptions>): void {
if (options.quality < 1 || options.quality > 100) {
throw new PreviewError('INVALID_OPTIONS', 'Quality must be between 1 and 100');
}
if (options.width <= 0 || options.height <= 0) {
throw new PreviewError('INVALID_OPTIONS', 'Width and height must be positive numbers');
}
if (options.page < 1) {
throw new PreviewError('INVALID_OPTIONS', 'Page number must be 1 or greater');
}
if (options.scale <= 0) {
throw new PreviewError('INVALID_OPTIONS', 'Scale must be a positive number');
}
}
/**
* Process PDF with worker
*/
private async processWithWorker(
pdfData: ArrayBuffer,
options: Required<IPreviewOptions>,
timeout = 30000
): Promise<IPdfProcessResponse> {
return new Promise((resolve, reject) => {
const requestId = `req_${++this.requestIdCounter}`;
// Set up timeout
const timeoutHandle = setTimeout(() => {
this.pendingRequests.delete(requestId);
reject(new PreviewError('WORKER_TIMEOUT', `Worker timeout after ${timeout}ms`));
}, timeout);
// Store request
this.pendingRequests.set(requestId, {
resolve,
reject,
timeout: timeoutHandle as any,
});
// Send request to worker
const request: IPdfProcessRequest = {
pdfData,
options,
};
this.worker!.postMessage({
type: 'PROCESS_PDF',
id: requestId,
data: request,
} as IWorkerMessage);
});
}
/**
* Create worker blob from source code
*/
private async createWorkerBlob(): Promise<Blob> {
// In a real implementation, you would bundle the worker code
// For now, we'll create a minimal worker that loads PDF.js from CDN
const workerCode = `
// Import PDF.js from CDN
importScripts('https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js');
// Configure PDF.js
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
let isInitialized = false;
// Initialize
async function initialize() {
if (isInitialized) return;
isInitialized = true;
postMessage({ type: 'WORKER_READY', id: 'init' });
}
// Process PDF
async function processPdf(requestId, request) {
try {
const pdf = await pdfjsLib.getDocument({ data: request.pdfData }).promise;
const page = await pdf.getPage(request.options.page);
const viewport = page.getViewport({ scale: request.options.scale });
let { width, height } = viewport;
if (request.options.width && width > request.options.width) {
const scale = request.options.width / width;
width = request.options.width;
height = height * scale;
}
if (request.options.height && height > request.options.height) {
const scale = request.options.height / height;
height = request.options.height;
width = width * scale;
}
const scaledViewport = page.getViewport({
scale: Math.min(width / viewport.width, height / viewport.height) * request.options.scale
});
const canvas = new OffscreenCanvas(scaledViewport.width, scaledViewport.height);
const context = canvas.getContext('2d');
await page.render({ canvasContext: context, viewport: scaledViewport }).promise;
const blob = await canvas.convertToBlob({
type: 'image/jpeg',
quality: request.options.quality / 100,
});
const arrayBuffer = await blob.arrayBuffer();
postMessage({
type: 'PROCESS_COMPLETE',
id: requestId,
data: {
imageData: arrayBuffer,
width: scaledViewport.width,
height: scaledViewport.height,
}
});
} catch (error) {
postMessage({
type: 'PROCESS_ERROR',
id: requestId,
error: error.message
});
}
}
// Message handler
self.addEventListener('message', async (event) => {
const { type, id, data } = event.data;
switch (type) {
case 'INIT':
await initialize();
break;
case 'PROCESS_PDF':
await processPdf(id, data);
break;
}
});
// Auto-initialize
initialize();
`;
return new Blob([workerCode], { type: 'application/javascript' });
}
/**
* Set up worker event handlers
*/
private setupWorkerEventHandlers(): void {
if (!this.worker) return;
this.worker.addEventListener('message', (event: MessageEvent<IWorkerMessage>) => {
const { type, id, data, error } = event.data;
const request = this.pendingRequests.get(id);
if (!request) return;
// Clear timeout
if (request.timeout) {
clearTimeout(request.timeout);
}
// Remove from pending
this.pendingRequests.delete(id);
switch (type) {
case 'PROCESS_COMPLETE':
request.resolve(data as IPdfProcessResponse);
break;
case 'PROCESS_ERROR':
request.reject(new PreviewError('WORKER_ERROR', error || 'Unknown worker error'));
break;
}
});
this.worker.addEventListener('error', (event) => {
// Handle worker errors
for (const [_id, request] of this.pendingRequests) {
request.reject(new PreviewError('WORKER_ERROR', `Worker error: ${event.message}`));
if (request.timeout) {
clearTimeout(request.timeout);
}
}
this.pendingRequests.clear();
});
}
/**
* Wait for worker to be ready
*/
private async waitForWorkerReady(timeout = 10000): Promise<void> {
return new Promise((resolve, reject) => {
const timeoutHandle = setTimeout(() => {
reject(new PreviewError('WORKER_TIMEOUT', 'Worker initialization timeout'));
}, timeout);
const messageHandler = (event: MessageEvent<IWorkerMessage>) => {
if (event.data.type === 'WORKER_READY') {
clearTimeout(timeoutHandle);
this.worker!.removeEventListener('message', messageHandler);
resolve();
}
};
this.worker!.addEventListener('message', messageHandler);
this.worker!.postMessage({ type: 'INIT', id: 'init' });
});
}
/**
* Convert blob to data URL
*/
private async blobToDataUrl(blob: Blob): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result as string);
reader.onerror = () => reject(new Error('Failed to read blob as data URL'));
reader.readAsDataURL(blob);
});
}
}