import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('PDF-08: Large PDF Performance - should handle large PDFs efficiently', async (t) => { // PDF-08: Verify performance with large PDF files // This test ensures the system can handle large PDFs without memory issues const performanceTracker = new PerformanceTracker('PDF-08: Large PDF Performance'); const corpusLoader = new CorpusLoader(); t.test('Process PDFs of increasing size', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; // Test different PDF sizes const sizes = [ { pages: 1, name: '1-page', expectedTime: 100 }, { pages: 10, name: '10-page', expectedTime: 200 }, { pages: 50, name: '50-page', expectedTime: 500 }, { pages: 100, name: '100-page', expectedTime: 1000 } ]; for (const sizeTest of sizes) { const sizeStartTime = performance.now(); const pdfDoc = await PDFDocument.create(); // Create multiple pages for (let i = 0; i < sizeTest.pages; i++) { const page = pdfDoc.addPage([595, 842]); // A4 // Add content to each page page.drawText(`Invoice Page ${i + 1} of ${sizeTest.pages}`, { x: 50, y: 750, size: 20 }); // Add some graphics to increase file size page.drawRectangle({ x: 50, y: 600, width: 495, height: 100, borderColor: { red: 0, green: 0, blue: 0 }, borderWidth: 1 }); // Add text content for (let j = 0; j < 20; j++) { page.drawText(`Line item ${j + 1}: Product description with details`, { x: 60, y: 580 - (j * 20), size: 10 }); } } // Add invoice XML const xmlContent = ` LARGE-PDF-${sizeTest.name} 2025-01-25 Test invoice for ${sizeTest.pages} page PDF ${sizeTest.pages * 20} `; await pdfDoc.attach( Buffer.from(xmlContent, 'utf8'), 'invoice.xml', { mimeType: 'application/xml', description: `Invoice for ${sizeTest.pages} page document` } ); const pdfBytes = await pdfDoc.save(); const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2); // Test extraction performance const extractStartTime = performance.now(); const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain(`LARGE-PDF-${sizeTest.name}`); const extractTime = performance.now() - extractStartTime; console.log(`${sizeTest.name} (${sizeMB} MB): Extraction took ${extractTime.toFixed(2)}ms`); // Check if extraction time is reasonable expect(extractTime).toBeLessThan(sizeTest.expectedTime); } catch (error) { console.log(`${sizeTest.name} extraction error:`, error.message); } const sizeElapsed = performance.now() - sizeStartTime; performanceTracker.addMeasurement(`size-${sizeTest.name}`, sizeElapsed); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('increasing-sizes', elapsed); }); t.test('Memory usage with large PDFs', async () => { const startTime = performance.now(); // Monitor memory usage const initialMemory = process.memoryUsage(); console.log('Initial memory (MB):', { rss: (initialMemory.rss / 1024 / 1024).toFixed(2), heapUsed: (initialMemory.heapUsed / 1024 / 1024).toFixed(2) }); const { PDFDocument } = plugins; const pdfDoc = await PDFDocument.create(); // Create a large PDF with many objects const pageCount = 200; for (let i = 0; i < pageCount; i++) { const page = pdfDoc.addPage(); // Add many small objects to increase complexity for (let j = 0; j < 50; j++) { page.drawText(`Item ${i}-${j}`, { x: 50 + (j % 10) * 50, y: 700 - Math.floor(j / 10) * 20, size: 8 }); } } // Add large XML attachment let xmlContent = '\n\n'; for (let i = 0; i < 1000; i++) { xmlContent += ` Product item with long description text that increases file size 10 99.99 \n`; } xmlContent += ''; await pdfDoc.attach( Buffer.from(xmlContent, 'utf8'), 'large-invoice.xml', { mimeType: 'application/xml', description: 'Large invoice with many line items' } ); const pdfBytes = await pdfDoc.save(); const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2); console.log(`Created large PDF: ${sizeMB} MB`); // Test memory usage during processing const einvoice = new EInvoice(); await einvoice.loadFromPdfBuffer(pdfBytes); const afterMemory = process.memoryUsage(); console.log('After processing memory (MB):', { rss: (afterMemory.rss / 1024 / 1024).toFixed(2), heapUsed: (afterMemory.heapUsed / 1024 / 1024).toFixed(2) }); const memoryIncrease = afterMemory.heapUsed - initialMemory.heapUsed; console.log(`Memory increase: ${(memoryIncrease / 1024 / 1024).toFixed(2)} MB`); // Force garbage collection if available if (global.gc) { global.gc(); const gcMemory = process.memoryUsage(); console.log('After GC memory (MB):', { heapUsed: (gcMemory.heapUsed / 1024 / 1024).toFixed(2) }); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('memory-usage', elapsed); }); t.test('Streaming vs loading performance', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; // Create a moderately large PDF const pdfDoc = await PDFDocument.create(); for (let i = 0; i < 50; i++) { const page = pdfDoc.addPage(); page.drawText(`Page ${i + 1}`, { x: 50, y: 700, size: 20 }); } const xmlContent = ` STREAM-TEST`; await pdfDoc.attach( Buffer.from(xmlContent, 'utf8'), 'invoice.xml', { mimeType: 'application/xml' } ); const pdfBytes = await pdfDoc.save(); // Test full loading const loadStartTime = performance.now(); const einvoice1 = new EInvoice(); await einvoice1.loadFromPdfBuffer(pdfBytes); const loadTime = performance.now() - loadStartTime; console.log(`Full loading time: ${loadTime.toFixed(2)}ms`); // Note: Actual streaming would require stream API support // This is a placeholder for streaming performance comparison console.log('Streaming API would potentially reduce memory usage for large files'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('streaming-comparison', elapsed); }); t.test('Concurrent large PDF processing', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; // Create multiple PDFs for concurrent processing const createPdf = async (id: string, pages: number) => { const pdfDoc = await PDFDocument.create(); for (let i = 0; i < pages; i++) { const page = pdfDoc.addPage(); page.drawText(`Document ${id} - Page ${i + 1}`, { x: 50, y: 700, size: 16 }); } await pdfDoc.attach( Buffer.from(`${id}`, 'utf8'), 'invoice.xml', { mimeType: 'application/xml' } ); return pdfDoc.save(); }; // Create PDFs const pdfPromises = [ createPdf('PDF-A', 30), createPdf('PDF-B', 40), createPdf('PDF-C', 50), createPdf('PDF-D', 60) ]; const pdfs = await Promise.all(pdfPromises); // Process concurrently const concurrentStartTime = performance.now(); const processPromises = pdfs.map(async (pdfBytes, index) => { const einvoice = new EInvoice(); await einvoice.loadFromPdfBuffer(pdfBytes); return einvoice.getXmlString(); }); const results = await Promise.all(processPromises); const concurrentTime = performance.now() - concurrentStartTime; expect(results.length).toBe(4); results.forEach((xml, index) => { expect(xml).toContain(`PDF-${String.fromCharCode(65 + index)}`); }); console.log(`Concurrent processing of 4 PDFs: ${concurrentTime.toFixed(2)}ms`); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('concurrent-processing', elapsed); }); t.test('Large PDF with complex structure', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; const pdfDoc = await PDFDocument.create(); // Create complex structure with forms, annotations, etc. const formPage = pdfDoc.addPage(); // Add form fields (simplified - actual forms require more setup) formPage.drawText('Invoice Form', { x: 50, y: 750, size: 24 }); formPage.drawRectangle({ x: 50, y: 700, width: 200, height: 30, borderColor: { red: 0, green: 0, blue: 0.5 }, borderWidth: 1 }); formPage.drawText('Invoice Number:', { x: 55, y: 710, size: 12 }); // Add multiple embedded files const attachments = [ { name: 'invoice.xml', size: 10000 }, { name: 'terms.pdf', size: 50000 }, { name: 'logo.png', size: 20000 } ]; for (const att of attachments) { const content = Buffer.alloc(att.size, 'A'); // Dummy content await pdfDoc.attach(content, att.name, { mimeType: att.name.endsWith('.xml') ? 'application/xml' : 'application/octet-stream', description: `Attachment: ${att.name}` }); } // Add many pages with different content types for (let i = 0; i < 25; i++) { const page = pdfDoc.addPage(); // Alternate between text-heavy and graphic-heavy pages if (i % 2 === 0) { // Text-heavy page for (let j = 0; j < 40; j++) { page.drawText(`Line ${j + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`, { x: 50, y: 750 - (j * 18), size: 10 }); } } else { // Graphic-heavy page for (let j = 0; j < 10; j++) { for (let k = 0; k < 10; k++) { page.drawRectangle({ x: 50 + (k * 50), y: 700 - (j * 50), width: 45, height: 45, color: { red: Math.random(), green: Math.random(), blue: Math.random() } }); } } } } const pdfBytes = await pdfDoc.save(); const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2); console.log(`Complex PDF size: ${sizeMB} MB`); // Test processing const processStartTime = performance.now(); const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); const processTime = performance.now() - processStartTime; console.log(`Complex PDF processed in: ${processTime.toFixed(2)}ms`); } catch (error) { console.log('Complex PDF processing error:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('complex-structure', elapsed); }); t.test('Corpus large PDF analysis', async () => { const startTime = performance.now(); let largeFileCount = 0; let totalSize = 0; let processedCount = 0; const sizeDistribution = { small: 0, // < 100KB medium: 0, // 100KB - 1MB large: 0, // 1MB - 10MB veryLarge: 0 // > 10MB }; const files = await corpusLoader.getAllFiles(); const pdfFiles = files.filter(f => f.endsWith('.pdf')); for (const file of pdfFiles) { try { const content = await corpusLoader.readFile(file); const sizeMB = content.length / 1024 / 1024; totalSize += content.length; if (content.length < 100 * 1024) { sizeDistribution.small++; } else if (content.length < 1024 * 1024) { sizeDistribution.medium++; } else if (content.length < 10 * 1024 * 1024) { sizeDistribution.large++; largeFileCount++; } else { sizeDistribution.veryLarge++; largeFileCount++; } // Test large file processing if (sizeMB > 1) { const testStartTime = performance.now(); const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(content); const testTime = performance.now() - testStartTime; console.log(`Large file ${file} (${sizeMB.toFixed(2)} MB) processed in ${testTime.toFixed(2)}ms`); } catch (error) { console.log(`Large file ${file} processing failed:`, error.message); } } processedCount++; } catch (error) { console.log(`Error reading ${file}:`, error.message); } } const avgSize = totalSize / processedCount / 1024; console.log(`Corpus PDF analysis (${processedCount} files):`); console.log(`- Average size: ${avgSize.toFixed(2)} KB`); console.log(`- Large files (>1MB): ${largeFileCount}`); console.log('Size distribution:', sizeDistribution); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-large-pdfs', elapsed); }); t.test('Performance degradation test', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; const processingTimes: number[] = []; // Test if performance degrades with repeated operations for (let iteration = 0; iteration < 5; iteration++) { const iterStartTime = performance.now(); // Create PDF const pdfDoc = await PDFDocument.create(); for (let i = 0; i < 20; i++) { const page = pdfDoc.addPage(); page.drawText(`Iteration ${iteration + 1} - Page ${i + 1}`, { x: 50, y: 700, size: 16 }); } await pdfDoc.attach( Buffer.from(`PERF-${iteration}`, 'utf8'), 'invoice.xml', { mimeType: 'application/xml' } ); const pdfBytes = await pdfDoc.save(); // Process PDF const einvoice = new EInvoice(); await einvoice.loadFromPdfBuffer(pdfBytes); einvoice.getXmlString(); const iterTime = performance.now() - iterStartTime; processingTimes.push(iterTime); console.log(`Iteration ${iteration + 1}: ${iterTime.toFixed(2)}ms`); } // Check for performance degradation const firstTime = processingTimes[0]; const lastTime = processingTimes[processingTimes.length - 1]; const degradation = ((lastTime - firstTime) / firstTime) * 100; console.log(`Performance degradation: ${degradation.toFixed(2)}%`); expect(Math.abs(degradation)).toBeLessThan(50); // Allow up to 50% variation const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('degradation-test', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(2000); // Large PDFs may take longer }); tap.start();