/** * @file test.perf-03.pdf-extraction.ts * @description Performance tests for PDF extraction operations */ import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { PDFDocument, rgb } from 'pdf-lib'; // Simple performance tracking class SimplePerformanceTracker { private measurements: Map = new Map(); private name: string; constructor(name: string) { this.name = name; } addMeasurement(key: string, time: number): void { if (!this.measurements.has(key)) { this.measurements.set(key, []); } this.measurements.get(key)!.push(time); } getStats(key: string) { const times = this.measurements.get(key) || []; if (times.length === 0) return null; const sorted = [...times].sort((a, b) => a - b); return { avg: times.reduce((a, b) => a + b, 0) / times.length, min: sorted[0], max: sorted[sorted.length - 1], p95: sorted[Math.floor(sorted.length * 0.95)] }; } printSummary(): void { console.log(`\n${this.name} - Performance Summary:`); for (const [key, times] of this.measurements) { const stats = this.getStats(key); if (stats) { console.log(` ${key}: avg=${stats.avg.toFixed(2)}ms, min=${stats.min.toFixed(2)}ms, max=${stats.max.toFixed(2)}ms, p95=${stats.p95.toFixed(2)}ms`); } } } } const performanceTracker = new SimplePerformanceTracker('PERF-03: PDF Extraction Speed'); // Helper to create test PDFs with embedded XML async function createTestPdf(name: string, xmlContent: string, pages: number = 1): Promise { const pdfDoc = await PDFDocument.create(); // Add pages for (let i = 0; i < pages; i++) { const page = pdfDoc.addPage([595, 842]); // A4 page.drawText(`Test Invoice ${name} - Page ${i + 1}`, { x: 50, y: 750, size: 20 }); // Add some content page.drawRectangle({ x: 50, y: 600, width: 495, height: 100, borderColor: rgb(0, 0, 0), borderWidth: 1 }); } // Attach the XML await pdfDoc.attach( Buffer.from(xmlContent, 'utf8'), 'invoice.xml', { mimeType: 'application/xml', description: `Invoice ${name}` } ); return Buffer.from(await pdfDoc.save()); } // Helper to create test XML function createTestXml(id: string, lineItems: number = 10): string { const lines = Array(lineItems).fill(null).map((_, i) => ` ${i + 1} 1 100.00 Product ${i + 1} 100.00 `).join(''); return ` ${id} 2025-01-25 380 EUR Test Supplier Berlin 10115 DE Test Customer Munich 80331 DE ${100 * lineItems}.00 ${lines} `; } tap.test('PERF-03: Basic PDF extraction performance', async () => { const testCases = [ { name: 'Small PDF', pages: 1, lineItems: 10 }, { name: 'Medium PDF', pages: 10, lineItems: 50 }, { name: 'Large PDF', pages: 50, lineItems: 200 } ]; const iterations = 20; for (const testCase of testCases) { const xmlContent = createTestXml(`PDF-${testCase.name}`, testCase.lineItems); const pdfBuffer = await createTestPdf(testCase.name, xmlContent, testCase.pages); const times: number[] = []; console.log(`Testing ${testCase.name}: ${(pdfBuffer.length / 1024).toFixed(2)} KB`); for (let i = 0; i < iterations; i++) { const startTime = performance.now(); const einvoice = await EInvoice.fromPdf(pdfBuffer); const endTime = performance.now(); const duration = endTime - startTime; times.push(duration); performanceTracker.addMeasurement(`extract-${testCase.name}`, duration); if (i === 0) { // Verify extraction worked expect(einvoice.id).toContain(testCase.name); } } const avg = times.reduce((a, b) => a + b, 0) / times.length; const bytesPerMs = pdfBuffer.length / avg; console.log(` Average extraction time: ${avg.toFixed(3)}ms`); console.log(` Throughput: ${(bytesPerMs / 1024).toFixed(2)} KB/ms`); // Performance expectations expect(avg).toBeLessThan(testCase.pages * 10 + 100); // Allow 10ms per page + 100ms base } }); tap.test('PERF-03: Different attachment methods performance', async () => { const xmlContent = createTestXml('ATTACHMENT-TEST', 20); // Test different PDF structures const testCases = [ { name: 'Standard attachment', create: async () => { const pdfDoc = await PDFDocument.create(); pdfDoc.addPage(); await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', { mimeType: 'application/xml' }); return Buffer.from(await pdfDoc.save()); } }, { name: 'With AFRelationship', create: async () => { const pdfDoc = await PDFDocument.create(); pdfDoc.addPage(); await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', { mimeType: 'application/xml', afRelationship: plugins.AFRelationship.Data }); return Buffer.from(await pdfDoc.save()); } }, { name: 'Multiple attachments', create: async () => { const pdfDoc = await PDFDocument.create(); pdfDoc.addPage(); // Main invoice await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', { mimeType: 'application/xml' }); // Additional files await pdfDoc.attach(Buffer.from('data'), 'extra.xml', { mimeType: 'application/xml' }); return Buffer.from(await pdfDoc.save()); } } ]; for (const testCase of testCases) { const pdfBuffer = await testCase.create(); const times: number[] = []; for (let i = 0; i < 30; i++) { const startTime = performance.now(); const einvoice = await EInvoice.fromPdf(pdfBuffer); const endTime = performance.now(); times.push(endTime - startTime); if (i === 0) { expect(einvoice.id).toEqual('ATTACHMENT-TEST'); } } const avg = times.reduce((a, b) => a + b, 0) / times.length; console.log(`${testCase.name}: avg=${avg.toFixed(3)}ms`); performanceTracker.addMeasurement(`attachment-${testCase.name}`, avg); // All methods should be reasonably fast expect(avg).toBeLessThan(50); } }); tap.test('PERF-03: XML size impact on extraction', async () => { const sizes = [1, 10, 50, 100, 500]; for (const size of sizes) { const xmlContent = createTestXml(`SIZE-${size}`, size); const pdfBuffer = await createTestPdf(`Size test ${size} items`, xmlContent); const times: number[] = []; for (let i = 0; i < 20; i++) { const startTime = performance.now(); await EInvoice.fromPdf(pdfBuffer); const endTime = performance.now(); times.push(endTime - startTime); } const avg = times.reduce((a, b) => a + b, 0) / times.length; const xmlSizeKB = (xmlContent.length / 1024).toFixed(2); console.log(`XML with ${size} items (${xmlSizeKB} KB): avg=${avg.toFixed(3)}ms`); performanceTracker.addMeasurement(`xml-size-${size}`, avg); // Extraction time should scale reasonably with XML size expect(avg).toBeLessThan(size * 0.5 + 30); } }); tap.test('PERF-03: Concurrent PDF extraction', async () => { const xmlContent = createTestXml('CONCURRENT', 20); const pdfBuffer = await createTestPdf('Concurrent test', xmlContent); const concurrentCounts = [1, 5, 10]; for (const count of concurrentCounts) { const startTime = performance.now(); const promises = Array(count).fill(null).map(() => EInvoice.fromPdf(pdfBuffer) ); const results = await Promise.all(promises); const endTime = performance.now(); const totalTime = endTime - startTime; const avgTimePerExtraction = totalTime / count; console.log(`Concurrent extractions (${count}): total=${totalTime.toFixed(2)}ms, avg per extraction=${avgTimePerExtraction.toFixed(2)}ms`); // Verify all extractions succeeded expect(results.every(e => e.id === 'CONCURRENT')).toEqual(true); // Concurrent operations should be efficient expect(avgTimePerExtraction).toBeLessThan(100); } }); tap.test('PERF-03: Error handling performance', async () => { const errorCases = [ { name: 'PDF without XML', create: async () => { const pdfDoc = await PDFDocument.create(); pdfDoc.addPage(); // No XML attachment return Buffer.from(await pdfDoc.save()); } }, { name: 'Invalid PDF', create: async () => Buffer.from('Not a PDF') }, { name: 'Corrupted attachment', create: async () => { const pdfDoc = await PDFDocument.create(); pdfDoc.addPage(); await pdfDoc.attach(Buffer.from('<<>>'), 'invoice.xml', { mimeType: 'application/xml' }); return Buffer.from(await pdfDoc.save()); } } ]; for (const errorCase of errorCases) { const pdfBuffer = await errorCase.create(); const times: number[] = []; for (let i = 0; i < 20; i++) { const startTime = performance.now(); try { await EInvoice.fromPdf(pdfBuffer); } catch (error) { // Expected error } const endTime = performance.now(); times.push(endTime - startTime); } const avg = times.reduce((a, b) => a + b, 0) / times.length; console.log(`${errorCase.name} - Error handling: avg=${avg.toFixed(3)}ms`); // Error cases should fail fast expect(avg).toBeLessThan(10); } }); tap.test('PERF-03: Memory efficiency during extraction', async () => { // Create a large PDF with many pages const xmlContent = createTestXml('MEMORY-TEST', 100); const largePdf = await createTestPdf('Memory test', xmlContent, 100); console.log(`Large PDF size: ${(largePdf.length / 1024 / 1024).toFixed(2)} MB`); const initialMemory = process.memoryUsage(); const extractionTimes: number[] = []; // Extract multiple times to check for memory leaks for (let i = 0; i < 10; i++) { const startTime = performance.now(); const einvoice = await EInvoice.fromPdf(largePdf); const endTime = performance.now(); extractionTimes.push(endTime - startTime); expect(einvoice.id).toEqual('MEMORY-TEST'); } const finalMemory = process.memoryUsage(); const memoryIncrease = (finalMemory.heapUsed - initialMemory.heapUsed) / 1024 / 1024; console.log(`Memory increase after 10 extractions: ${memoryIncrease.toFixed(2)} MB`); console.log(`Average extraction time: ${(extractionTimes.reduce((a, b) => a + b, 0) / extractionTimes.length).toFixed(2)}ms`); // Memory increase should be reasonable expect(memoryIncrease).toBeLessThan(100); // Less than 100MB increase }); tap.test('PERF-03: Performance Summary', async () => { performanceTracker.printSummary(); // Overall performance check const stats = performanceTracker.getStats('extract-Small PDF'); if (stats) { console.log(`\nSmall PDF extraction performance: avg=${stats.avg.toFixed(2)}ms`); expect(stats.avg).toBeLessThan(50); // Small PDFs should extract very quickly } console.log('\nPDF extraction performance tests completed successfully'); }); tap.start();