- Add Node.js implementation using @push.rocks/smartpdf - Add browser implementation with PDF.js and Web Workers - Support configurable quality, dimensions, and page selection - Include comprehensive TypeScript definitions and error handling - Provide extensive test coverage for both environments - Add download functionality and browser compatibility checking
423 lines
12 KiB
TypeScript
423 lines
12 KiB
TypeScript
import type {
|
|
IWebPdfProcessor,
|
|
IPreviewOptions,
|
|
IPreviewResult,
|
|
TWebInputType,
|
|
IWorkerMessage,
|
|
IPdfProcessRequest,
|
|
IPdfProcessResponse,
|
|
IWebPreviewOptions
|
|
} from './interfaces.js';
|
|
import { PreviewError } from './interfaces.js';
|
|
|
|
/**
|
|
* PDF processor implementation for browser using PDF.js worker
|
|
*/
|
|
export class WebPdfProcessor implements IWebPdfProcessor {
|
|
public readonly inputFormat = 'pdf' as const;
|
|
public readonly outputFormat = 'jpeg' as const;
|
|
|
|
private worker: Worker | null = null;
|
|
private isInitialized = false;
|
|
private pendingRequests = new Map<string, {
|
|
resolve: (result: IPdfProcessResponse) => void;
|
|
reject: (error: Error) => void;
|
|
timeout?: number;
|
|
}>();
|
|
private requestIdCounter = 0;
|
|
|
|
/**
|
|
* Initialize the PDF processor with worker
|
|
*/
|
|
public async init(): Promise<void> {
|
|
if (this.isInitialized) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Create worker from blob URL to avoid CORS issues
|
|
const workerBlob = await this.createWorkerBlob();
|
|
const workerUrl = URL.createObjectURL(workerBlob);
|
|
|
|
this.worker = new Worker(workerUrl);
|
|
this.setupWorkerEventHandlers();
|
|
|
|
// Wait for worker to be ready
|
|
await this.waitForWorkerReady();
|
|
|
|
this.isInitialized = true;
|
|
} catch (error) {
|
|
throw new PreviewError(
|
|
'PROCESSING_FAILED',
|
|
'Failed to initialize PDF processor',
|
|
error instanceof Error ? error : new Error(String(error))
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process PDF and generate JPEG preview
|
|
*/
|
|
public async processPreview(input: TWebInputType, options: IWebPreviewOptions = {}): Promise<IPreviewResult> {
|
|
if (!this.isInitialized || !this.worker) {
|
|
throw new PreviewError('PROCESSING_FAILED', 'PDF processor not initialized');
|
|
}
|
|
|
|
try {
|
|
// Convert input to ArrayBuffer
|
|
const arrayBuffer = await this.inputToArrayBuffer(input);
|
|
|
|
// Validate PDF
|
|
this.validatePdfBuffer(arrayBuffer);
|
|
|
|
// Set default options
|
|
const processOptions = {
|
|
quality: options.quality ?? 80,
|
|
width: options.width ?? 800,
|
|
height: options.height ?? 600,
|
|
page: options.page ?? 1,
|
|
scale: options.scale ?? 1.0,
|
|
};
|
|
|
|
// Validate options
|
|
this.validateOptions(processOptions);
|
|
|
|
// Process with worker
|
|
const response = await this.processWithWorker(arrayBuffer, processOptions, options.timeout);
|
|
|
|
// Create blob from response
|
|
const blob = new Blob([response.imageData], { type: 'image/jpeg' });
|
|
|
|
// Generate data URL if requested
|
|
let dataUrl = '';
|
|
if (options.generateDataUrl !== false) {
|
|
dataUrl = await this.blobToDataUrl(blob);
|
|
}
|
|
|
|
return {
|
|
blob,
|
|
dimensions: {
|
|
width: response.width,
|
|
height: response.height,
|
|
},
|
|
size: response.imageData.byteLength,
|
|
mimeType: 'image/jpeg',
|
|
dataUrl,
|
|
};
|
|
} catch (error) {
|
|
if (error instanceof PreviewError) {
|
|
throw error;
|
|
}
|
|
throw new PreviewError(
|
|
'PROCESSING_FAILED',
|
|
'Failed to process PDF',
|
|
error instanceof Error ? error : new Error(String(error))
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up resources
|
|
*/
|
|
public async cleanup(): Promise<void> {
|
|
if (this.worker) {
|
|
// Cancel pending requests
|
|
for (const [_id, request] of this.pendingRequests) {
|
|
request.reject(new Error('Worker cleanup'));
|
|
if (request.timeout) {
|
|
clearTimeout(request.timeout);
|
|
}
|
|
}
|
|
this.pendingRequests.clear();
|
|
|
|
// Terminate worker
|
|
this.worker.terminate();
|
|
this.worker = null;
|
|
}
|
|
this.isInitialized = false;
|
|
}
|
|
|
|
/**
|
|
* Convert various input types to ArrayBuffer
|
|
*/
|
|
private async inputToArrayBuffer(input: TWebInputType): Promise<ArrayBuffer> {
|
|
if (input instanceof ArrayBuffer) {
|
|
return input;
|
|
}
|
|
|
|
if (input instanceof Uint8Array) {
|
|
return input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
|
|
}
|
|
|
|
if (input instanceof File || input instanceof Blob) {
|
|
return await input.arrayBuffer();
|
|
}
|
|
|
|
if (typeof input === 'string') {
|
|
// Assume it's a data URL or base64
|
|
if (input.startsWith('data:')) {
|
|
const base64 = input.split(',')[1];
|
|
const binaryString = atob(base64);
|
|
const bytes = new Uint8Array(binaryString.length);
|
|
for (let i = 0; i < binaryString.length; i++) {
|
|
bytes[i] = binaryString.charCodeAt(i);
|
|
}
|
|
return bytes.buffer;
|
|
}
|
|
throw new PreviewError('INVALID_INPUT', 'String input must be a data URL');
|
|
}
|
|
|
|
throw new PreviewError('INVALID_INPUT', 'Unsupported input type');
|
|
}
|
|
|
|
/**
|
|
* Validate PDF buffer
|
|
*/
|
|
private validatePdfBuffer(buffer: ArrayBuffer): void {
|
|
if (buffer.byteLength < 4) {
|
|
throw new PreviewError('INVALID_INPUT', 'Input is too small to be a valid PDF');
|
|
}
|
|
|
|
const header = new Uint8Array(buffer, 0, 4);
|
|
const pdfMagic = new Uint8Array([37, 80, 68, 70]); // %PDF
|
|
|
|
for (let i = 0; i < 4; i++) {
|
|
if (header[i] !== pdfMagic[i]) {
|
|
throw new PreviewError('INVALID_INPUT', 'Input is not a valid PDF file');
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate processing options
|
|
*/
|
|
private validateOptions(options: Required<IPreviewOptions>): void {
|
|
if (options.quality < 1 || options.quality > 100) {
|
|
throw new PreviewError('INVALID_OPTIONS', 'Quality must be between 1 and 100');
|
|
}
|
|
|
|
if (options.width <= 0 || options.height <= 0) {
|
|
throw new PreviewError('INVALID_OPTIONS', 'Width and height must be positive numbers');
|
|
}
|
|
|
|
if (options.page < 1) {
|
|
throw new PreviewError('INVALID_OPTIONS', 'Page number must be 1 or greater');
|
|
}
|
|
|
|
if (options.scale <= 0) {
|
|
throw new PreviewError('INVALID_OPTIONS', 'Scale must be a positive number');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process PDF with worker
|
|
*/
|
|
private async processWithWorker(
|
|
pdfData: ArrayBuffer,
|
|
options: Required<IPreviewOptions>,
|
|
timeout = 30000
|
|
): Promise<IPdfProcessResponse> {
|
|
return new Promise((resolve, reject) => {
|
|
const requestId = `req_${++this.requestIdCounter}`;
|
|
|
|
// Set up timeout
|
|
const timeoutHandle = setTimeout(() => {
|
|
this.pendingRequests.delete(requestId);
|
|
reject(new PreviewError('WORKER_TIMEOUT', `Worker timeout after ${timeout}ms`));
|
|
}, timeout);
|
|
|
|
// Store request
|
|
this.pendingRequests.set(requestId, {
|
|
resolve,
|
|
reject,
|
|
timeout: timeoutHandle as any,
|
|
});
|
|
|
|
// Send request to worker
|
|
const request: IPdfProcessRequest = {
|
|
pdfData,
|
|
options,
|
|
};
|
|
|
|
this.worker!.postMessage({
|
|
type: 'PROCESS_PDF',
|
|
id: requestId,
|
|
data: request,
|
|
} as IWorkerMessage);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Create worker blob from source code
|
|
*/
|
|
private async createWorkerBlob(): Promise<Blob> {
|
|
// In a real implementation, you would bundle the worker code
|
|
// For now, we'll create a minimal worker that loads PDF.js from CDN
|
|
const workerCode = `
|
|
// Import PDF.js from CDN
|
|
importScripts('https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js');
|
|
|
|
// Configure PDF.js
|
|
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
|
|
|
|
let isInitialized = false;
|
|
|
|
// Initialize
|
|
async function initialize() {
|
|
if (isInitialized) return;
|
|
isInitialized = true;
|
|
postMessage({ type: 'WORKER_READY', id: 'init' });
|
|
}
|
|
|
|
// Process PDF
|
|
async function processPdf(requestId, request) {
|
|
try {
|
|
const pdf = await pdfjsLib.getDocument({ data: request.pdfData }).promise;
|
|
const page = await pdf.getPage(request.options.page);
|
|
|
|
const viewport = page.getViewport({ scale: request.options.scale });
|
|
let { width, height } = viewport;
|
|
|
|
if (request.options.width && width > request.options.width) {
|
|
const scale = request.options.width / width;
|
|
width = request.options.width;
|
|
height = height * scale;
|
|
}
|
|
if (request.options.height && height > request.options.height) {
|
|
const scale = request.options.height / height;
|
|
height = request.options.height;
|
|
width = width * scale;
|
|
}
|
|
|
|
const scaledViewport = page.getViewport({
|
|
scale: Math.min(width / viewport.width, height / viewport.height) * request.options.scale
|
|
});
|
|
|
|
const canvas = new OffscreenCanvas(scaledViewport.width, scaledViewport.height);
|
|
const context = canvas.getContext('2d');
|
|
|
|
await page.render({ canvasContext: context, viewport: scaledViewport }).promise;
|
|
|
|
const blob = await canvas.convertToBlob({
|
|
type: 'image/jpeg',
|
|
quality: request.options.quality / 100,
|
|
});
|
|
|
|
const arrayBuffer = await blob.arrayBuffer();
|
|
|
|
postMessage({
|
|
type: 'PROCESS_COMPLETE',
|
|
id: requestId,
|
|
data: {
|
|
imageData: arrayBuffer,
|
|
width: scaledViewport.width,
|
|
height: scaledViewport.height,
|
|
}
|
|
});
|
|
} catch (error) {
|
|
postMessage({
|
|
type: 'PROCESS_ERROR',
|
|
id: requestId,
|
|
error: error.message
|
|
});
|
|
}
|
|
}
|
|
|
|
// Message handler
|
|
self.addEventListener('message', async (event) => {
|
|
const { type, id, data } = event.data;
|
|
|
|
switch (type) {
|
|
case 'INIT':
|
|
await initialize();
|
|
break;
|
|
case 'PROCESS_PDF':
|
|
await processPdf(id, data);
|
|
break;
|
|
}
|
|
});
|
|
|
|
// Auto-initialize
|
|
initialize();
|
|
`;
|
|
|
|
return new Blob([workerCode], { type: 'application/javascript' });
|
|
}
|
|
|
|
/**
|
|
* Set up worker event handlers
|
|
*/
|
|
private setupWorkerEventHandlers(): void {
|
|
if (!this.worker) return;
|
|
|
|
this.worker.addEventListener('message', (event: MessageEvent<IWorkerMessage>) => {
|
|
const { type, id, data, error } = event.data;
|
|
|
|
const request = this.pendingRequests.get(id);
|
|
if (!request) return;
|
|
|
|
// Clear timeout
|
|
if (request.timeout) {
|
|
clearTimeout(request.timeout);
|
|
}
|
|
|
|
// Remove from pending
|
|
this.pendingRequests.delete(id);
|
|
|
|
switch (type) {
|
|
case 'PROCESS_COMPLETE':
|
|
request.resolve(data as IPdfProcessResponse);
|
|
break;
|
|
|
|
case 'PROCESS_ERROR':
|
|
request.reject(new PreviewError('WORKER_ERROR', error || 'Unknown worker error'));
|
|
break;
|
|
}
|
|
});
|
|
|
|
this.worker.addEventListener('error', (event) => {
|
|
// Handle worker errors
|
|
for (const [_id, request] of this.pendingRequests) {
|
|
request.reject(new PreviewError('WORKER_ERROR', `Worker error: ${event.message}`));
|
|
if (request.timeout) {
|
|
clearTimeout(request.timeout);
|
|
}
|
|
}
|
|
this.pendingRequests.clear();
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Wait for worker to be ready
|
|
*/
|
|
private async waitForWorkerReady(timeout = 10000): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
const timeoutHandle = setTimeout(() => {
|
|
reject(new PreviewError('WORKER_TIMEOUT', 'Worker initialization timeout'));
|
|
}, timeout);
|
|
|
|
const messageHandler = (event: MessageEvent<IWorkerMessage>) => {
|
|
if (event.data.type === 'WORKER_READY') {
|
|
clearTimeout(timeoutHandle);
|
|
this.worker!.removeEventListener('message', messageHandler);
|
|
resolve();
|
|
}
|
|
};
|
|
|
|
this.worker!.addEventListener('message', messageHandler);
|
|
this.worker!.postMessage({ type: 'INIT', id: 'init' });
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Convert blob to data URL
|
|
*/
|
|
private async blobToDataUrl(blob: Blob): Promise<string> {
|
|
return new Promise((resolve, reject) => {
|
|
const reader = new FileReader();
|
|
reader.onload = () => resolve(reader.result as string);
|
|
reader.onerror = () => reject(new Error('Failed to read blob as data URL'));
|
|
reader.readAsDataURL(blob);
|
|
});
|
|
}
|
|
} |