/** * @file test.perf-08.large-files.ts * @description Performance tests for large file processing */ import { tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../suite/corpus.loader.js'; import { PerformanceTracker } from '../../suite/performance.tracker.js'; import { FormatDetector } from '../../../ts/formats/utils/format.detector.js'; const performanceTracker = new PerformanceTracker('PERF-08: Large File Processing'); // Helper function to create UBL invoice XML function createUBLInvoiceXML(data: any): string { const items = data.items.map((item: any, idx: number) => ` ${idx + 1} ${item.quantity} ${item.lineTotal} ${item.description} ${item.unitPrice} `).join(''); return ` 2.1 ${data.invoiceNumber} ${data.issueDate} ${data.dueDate || data.issueDate} 380 ${data.currency || 'EUR'} ${data.seller.name} ${data.seller.address} ${data.seller.city || ''} ${data.seller.postalCode || ''} ${data.seller.country} ${data.seller.taxId} VAT ${data.buyer.name} ${data.buyer.address} ${data.buyer.city || ''} ${data.buyer.postalCode || ''} ${data.buyer.country} ${data.buyer.taxId} VAT ${data.totals.vatAmount} ${data.totals.netAmount} ${data.totals.grossAmount} ${data.totals.grossAmount} ${items} `; } tap.test('PERF-08: Large File Processing - should handle large files efficiently', async (t) => { // Test 1: Large PEPPOL file processing const largePEPPOLProcessing = await performanceTracker.measureAsync( 'large-peppol-processing', async () => { const files = await CorpusLoader.loadPattern('**/PEPPOL/**/*.xml'); const results = { files: [], memoryProfile: { baseline: 0, peak: 0, increments: [] } }; // Get baseline memory if (global.gc) global.gc(); const baselineMemory = process.memoryUsage(); results.memoryProfile.baseline = baselineMemory.heapUsed / 1024 / 1024; // Process PEPPOL files (known to be large) for (const file of files) { try { const startTime = Date.now(); const startMemory = process.memoryUsage(); // Read file const content = await plugins.fs.readFile(file.path, 'utf-8'); const fileSize = Buffer.byteLength(content, 'utf-8'); // Process file const format = FormatDetector.detectFormat(content); const parseStart = Date.now(); const einvoice = await EInvoice.fromXml(content); const parseEnd = Date.now(); const validationStart = Date.now(); const validationResult = await einvoice.validate(); const validationEnd = Date.now(); const endMemory = process.memoryUsage(); const totalTime = Date.now() - startTime; const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024; if (endMemory.heapUsed > results.memoryProfile.peak) { results.memoryProfile.peak = endMemory.heapUsed / 1024 / 1024; } results.files.push({ path: file, sizeKB: (fileSize / 1024).toFixed(2), sizeMB: (fileSize / 1024 / 1024).toFixed(2), format, processingTime: totalTime, parseTime: parseEnd - parseStart, validationTime: validationEnd - validationStart, memoryUsedMB: memoryUsed.toFixed(2), throughputMBps: ((fileSize / 1024 / 1024) / (totalTime / 1000)).toFixed(2), itemCount: einvoice.data.items?.length || 0, valid: validationResult.valid }); results.memoryProfile.increments.push(memoryUsed); } catch (error) { results.files.push({ path: file, error: error.message }); } } return results; } ); // Test 2: Synthetic large file generation and processing const syntheticLargeFiles = await performanceTracker.measureAsync( 'synthetic-large-files', async () => { const results = { tests: [], scalingAnalysis: null }; // Generate invoices of increasing size const sizes = [ { items: 100, name: '100 items' }, { items: 500, name: '500 items' }, { items: 1000, name: '1K items' }, { items: 5000, name: '5K items' }, { items: 10000, name: '10K items' } ]; for (const size of sizes) { // Generate large invoice const invoice = { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `LARGE-${size.items}`, issueDate: '2024-02-25', dueDate: '2024-03-25', currency: 'EUR', seller: { name: 'Large File Test Seller Corporation International GmbH', address: 'Hauptstraße 123-125, Building A, Floor 5', city: 'Berlin', postalCode: '10115', country: 'DE', taxId: 'DE123456789', registrationNumber: 'HRB123456', email: 'invoicing@largetest.de', phone: '+49 30 123456789', bankAccount: { iban: 'DE89370400440532013000', bic: 'COBADEFFXXX', bankName: 'Commerzbank AG' } }, buyer: { name: 'Large File Test Buyer Enterprises Ltd.', address: '456 Commerce Boulevard, Suite 789', city: 'Munich', postalCode: '80331', country: 'DE', taxId: 'DE987654321', registrationNumber: 'HRB654321', email: 'ap@largebuyer.de', phone: '+49 89 987654321' }, items: Array.from({ length: size.items }, (_, i) => ({ itemId: `ITEM-${String(i + 1).padStart(6, '0')}`, description: `Product Item Number ${i + 1} - Detailed description with technical specifications, compliance information, country of origin, weight, dimensions, and special handling instructions. This is a very detailed description to simulate real-world invoice data with comprehensive product information.`, quantity: Math.floor(Math.random() * 100) + 1, unitPrice: Math.random() * 1000, vatRate: [0, 7, 19][Math.floor(Math.random() * 3)], lineTotal: 0, additionalInfo: { weight: `${(Math.random() * 50).toFixed(2)}kg`, dimensions: `${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}cm`, countryOfOrigin: ['DE', 'FR', 'IT', 'CN', 'US'][Math.floor(Math.random() * 5)], customsCode: `${Math.floor(Math.random() * 9000000000) + 1000000000}`, serialNumber: `SN-${Date.now()}-${i}`, batchNumber: `BATCH-${Math.floor(i / 100)}` } })), totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 }, notes: 'This is a large invoice generated for performance testing purposes. ' + 'It contains a significant number of line items to test the system\'s ability ' + 'to handle large documents efficiently.' } }; // Calculate totals invoice.data.items.forEach(item => { item.lineTotal = item.quantity * item.unitPrice; invoice.data.totals.netAmount += item.lineTotal; invoice.data.totals.vatAmount += item.lineTotal * (item.vatRate / 100); }); invoice.data.totals.grossAmount = invoice.data.totals.netAmount + invoice.data.totals.vatAmount; // Measure processing if (global.gc) global.gc(); const startMemory = process.memoryUsage(); const startTime = Date.now(); // Generate XML const xmlStart = Date.now(); const xml = createUBLInvoiceXML(invoice.data); const xmlEnd = Date.now(); const xmlSize = Buffer.byteLength(xml, 'utf-8'); // Parse back const parseStart = Date.now(); const parsed = await EInvoice.fromXml(xml); const parseEnd = Date.now(); // Validate const validateStart = Date.now(); const validation = await parsed.validate(); const validateEnd = Date.now(); // Convert const convertStart = Date.now(); await parsed.toXmlString('cii'); // Test conversion performance const convertEnd = Date.now(); const endTime = Date.now(); const endMemory = process.memoryUsage(); results.tests.push({ size: size.name, items: size.items, xmlSizeMB: (xmlSize / 1024 / 1024).toFixed(2), totalTime: endTime - startTime, xmlGeneration: xmlEnd - xmlStart, parsing: parseEnd - parseStart, validation: validateEnd - validateStart, conversion: convertEnd - convertStart, memoryUsedMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2), memoryPerItemKB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / size.items).toFixed(2), throughputMBps: ((xmlSize / 1024 / 1024) / ((endTime - startTime) / 1000)).toFixed(2), valid: validation.valid }); } // Analyze scaling if (results.tests.length >= 3) { const points = results.tests.map(t => ({ x: t.items, y: t.totalTime })); // Simple linear regression const n = points.length; const sumX = points.reduce((sum, p) => sum + p.x, 0); const sumY = points.reduce((sum, p) => sum + p.y, 0); const sumXY = points.reduce((sum, p) => sum + p.x * p.y, 0); const sumX2 = points.reduce((sum, p) => sum + p.x * p.x, 0); const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX); const intercept = (sumY - slope * sumX) / n; results.scalingAnalysis = { type: slope < 0.5 ? 'Sub-linear' : slope <= 1.5 ? 'Linear' : 'Super-linear', formula: `Time(ms) = ${slope.toFixed(3)} * items + ${intercept.toFixed(2)}`, msPerItem: slope.toFixed(3) }; } return results; } ); // Test 3: Memory-efficient large file streaming const streamingLargeFiles = await performanceTracker.measureAsync( 'streaming-large-files', async () => { const results = { streamingSupported: false, chunkProcessing: [], memoryEfficiency: null }; // Simulate large file processing in chunks const totalItems = 10000; const chunkSizes = [100, 500, 1000, 2000]; for (const chunkSize of chunkSizes) { const chunks = Math.ceil(totalItems / chunkSize); const startTime = Date.now(); const startMemory = process.memoryUsage(); let peakMemory = startMemory.heapUsed; // Process in chunks const chunkResults = []; for (let chunk = 0; chunk < chunks; chunk++) { const startItem = chunk * chunkSize; const endItem = Math.min(startItem + chunkSize, totalItems); // Create chunk invoice const chunkInvoice = { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `CHUNK-${chunk}`, issueDate: '2024-02-25', seller: { name: 'Chunk Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Chunk Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: Array.from({ length: endItem - startItem }, (_, i) => ({ description: `Chunk ${chunk} Item ${i + 1}`, quantity: 1, unitPrice: 100, vatRate: 19, lineTotal: 100 })), totals: { netAmount: (endItem - startItem) * 100, vatAmount: (endItem - startItem) * 19, grossAmount: (endItem - startItem) * 119 } } }; // Process chunk const chunkStart = Date.now(); const chunkXml = createUBLInvoiceXML(chunkInvoice.data); const chunkEInvoice = await EInvoice.fromXml(chunkXml); await chunkEInvoice.validate(); const chunkEnd = Date.now(); chunkResults.push({ chunk, items: endItem - startItem, duration: chunkEnd - chunkStart }); // Track peak memory const currentMemory = process.memoryUsage(); if (currentMemory.heapUsed > peakMemory) { peakMemory = currentMemory.heapUsed; } // Simulate cleanup between chunks if (global.gc) global.gc(); } const totalDuration = Date.now() - startTime; const memoryIncrease = (peakMemory - startMemory.heapUsed) / 1024 / 1024; results.chunkProcessing.push({ chunkSize, chunks, totalItems, totalDuration, avgChunkTime: chunkResults.reduce((sum, r) => sum + r.duration, 0) / chunkResults.length, throughput: (totalItems / (totalDuration / 1000)).toFixed(2), peakMemoryMB: (peakMemory / 1024 / 1024).toFixed(2), memoryIncreaseMB: memoryIncrease.toFixed(2), memoryPerItemKB: ((memoryIncrease * 1024) / totalItems).toFixed(3) }); } // Analyze memory efficiency if (results.chunkProcessing.length > 0) { const smallChunk = results.chunkProcessing[0]; const largeChunk = results.chunkProcessing[results.chunkProcessing.length - 1]; results.memoryEfficiency = { smallChunkMemory: smallChunk.memoryIncreaseMB, largeChunkMemory: largeChunk.memoryIncreaseMB, memoryScaling: (parseFloat(largeChunk.memoryIncreaseMB) / parseFloat(smallChunk.memoryIncreaseMB)).toFixed(2), recommendation: parseFloat(largeChunk.memoryIncreaseMB) < parseFloat(smallChunk.memoryIncreaseMB) * 2 ? 'Use larger chunks for better memory efficiency' : 'Use smaller chunks to reduce memory usage' }; } return results; } ); // Test 4: Corpus large file analysis const corpusLargeFiles = await performanceTracker.measureAsync( 'corpus-large-file-analysis', async () => { const files = await CorpusLoader.loadPattern('**/*.xml'); const results = { totalFiles: 0, largeFiles: [], sizeDistribution: { tiny: { count: 0, maxSize: 10 * 1024 }, // < 10KB small: { count: 0, maxSize: 100 * 1024 }, // < 100KB medium: { count: 0, maxSize: 1024 * 1024 }, // < 1MB large: { count: 0, maxSize: 10 * 1024 * 1024 }, // < 10MB huge: { count: 0, maxSize: Infinity } // >= 10MB }, processingStats: { avgTimePerKB: 0, avgMemoryPerKB: 0 } }; // Analyze all files const fileSizes = []; const processingMetrics = []; for (const file of files) { try { const stats = await plugins.fs.stat(file.path); const fileSize = stats.size; results.totalFiles++; // Categorize by size if (fileSize < results.sizeDistribution.tiny.maxSize) { results.sizeDistribution.tiny.count++; } else if (fileSize < results.sizeDistribution.small.maxSize) { results.sizeDistribution.small.count++; } else if (fileSize < results.sizeDistribution.medium.maxSize) { results.sizeDistribution.medium.count++; } else if (fileSize < results.sizeDistribution.large.maxSize) { results.sizeDistribution.large.count++; } else { results.sizeDistribution.huge.count++; } // Process large files if (fileSize > 100 * 1024) { // Process files > 100KB const content = await plugins.fs.readFile(file.path, 'utf-8'); const startTime = Date.now(); const startMemory = process.memoryUsage(); const format = FormatDetector.detectFormat(content); if (format && format !== 'unknown') { const invoice = await EInvoice.fromXml(content); await invoice.validate(); } const endTime = Date.now(); const endMemory = process.memoryUsage(); const processingTime = endTime - startTime; const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024; // KB results.largeFiles.push({ path: file, sizeKB: (fileSize / 1024).toFixed(2), format, processingTime, memoryUsedKB: memoryUsed.toFixed(2), timePerKB: (processingTime / (fileSize / 1024)).toFixed(3), throughputKBps: ((fileSize / 1024) / (processingTime / 1000)).toFixed(2) }); processingMetrics.push({ size: fileSize, time: processingTime, memory: memoryUsed }); } fileSizes.push(fileSize); } catch (error) { // Skip files that can't be processed } } // Calculate statistics if (processingMetrics.length > 0) { const totalSize = processingMetrics.reduce((sum, m) => sum + m.size, 0); const totalTime = processingMetrics.reduce((sum, m) => sum + m.time, 0); const totalMemory = processingMetrics.reduce((sum, m) => sum + m.memory, 0); results.processingStats.avgTimePerKB = parseFloat((totalTime / (totalSize / 1024)).toFixed(3)); results.processingStats.avgMemoryPerKB = parseFloat((totalMemory / (totalSize / 1024)).toFixed(3)); } // Sort large files by size results.largeFiles.sort((a, b) => parseFloat(b.sizeKB) - parseFloat(a.sizeKB)); return { ...results, largeFiles: results.largeFiles.slice(0, 10), // Top 10 largest avgFileSizeKB: fileSizes.length > 0 ? (fileSizes.reduce((a, b) => a + b, 0) / fileSizes.length / 1024).toFixed(2) : 0 }; } ); // Test 5: Stress test with extreme sizes const extremeSizeStressTest = await performanceTracker.measureAsync( 'extreme-size-stress-test', async () => { const results = { tests: [], limits: { maxItemsProcessed: 0, maxSizeProcessedMB: 0, failurePoint: null } }; // Test extreme scenarios const extremeScenarios = [ { name: 'Wide invoice (many items)', generator: (count: number) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `EXTREME-WIDE-${count}`, issueDate: '2024-02-25', seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: Array.from({ length: count }, (_, i) => ({ description: `Item ${i + 1}`, quantity: 1, unitPrice: 10, vatRate: 10, lineTotal: 10 })), totals: { netAmount: count * 10, vatAmount: count, grossAmount: count * 11 } } }) }, { name: 'Deep invoice (long descriptions)', generator: (size: number) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `EXTREME-DEEP-${size}`, issueDate: '2024-02-25', seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: [{ description: 'A'.repeat(size * 1024), // Size in KB quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }], totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 } } }) } ]; // Test each scenario for (const scenario of extremeScenarios) { const testResults = { scenario: scenario.name, tests: [] }; // Test increasing sizes const sizes = scenario.name.includes('Wide') ? [1000, 5000, 10000, 20000, 50000] : [100, 500, 1000, 2000, 5000]; // KB for (const size of sizes) { try { const invoice = scenario.generator(size); const startTime = Date.now(); const startMemory = process.memoryUsage(); // Try to process - create XML from invoice data // Since we have invoice data, we need to convert it to XML // For now, we'll create a simple UBL invoice XML const xml = createUBLInvoiceXML(invoice.data); const xmlSize = Buffer.byteLength(xml, 'utf-8') / 1024 / 1024; // MB const parsed = await EInvoice.fromXml(xml); await parsed.validate(); const endTime = Date.now(); const endMemory = process.memoryUsage(); testResults.tests.push({ size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`, success: true, time: endTime - startTime, memoryMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2), xmlSizeMB: xmlSize.toFixed(2) }); // Update limits if (scenario.name.includes('Wide') && size > results.limits.maxItemsProcessed) { results.limits.maxItemsProcessed = size; } if (xmlSize > results.limits.maxSizeProcessedMB) { results.limits.maxSizeProcessedMB = xmlSize; } } catch (error) { testResults.tests.push({ size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`, success: false, error: error.message }); if (!results.limits.failurePoint) { results.limits.failurePoint = { scenario: scenario.name, size, error: error.message }; } break; // Stop testing larger sizes after failure } } results.tests.push(testResults); } return results; } ); // Summary console.log('\n=== PERF-08: Large File Processing Test Summary ==='); if (largePEPPOLProcessing.files.length > 0) { console.log('\nLarge PEPPOL File Processing:'); largePEPPOLProcessing.files.forEach(file => { if (!file.error) { console.log(` ${file.path.split('/').pop()}:`); console.log(` - Size: ${file.sizeMB}MB, Items: ${file.itemCount}`); console.log(` - Processing: ${file.processingTime}ms (parse: ${file.parseTime}ms, validate: ${file.validationTime}ms)`); console.log(` - Throughput: ${file.throughputMBps}MB/s`); console.log(` - Memory used: ${file.memoryUsedMB}MB`); } }); console.log(` Peak memory: ${largePEPPOLProcessing.memoryProfile.peak.toFixed(2)}MB`); } console.log('\nSynthetic Large File Scaling:'); console.log(' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput'); console.log(' ----------|----------|------------|--------|----------|---------|--------|----------'); syntheticLargeFiles.tests.forEach((test: any) => { console.log(` ${test.size.padEnd(9)} | ${test.xmlSizeMB.padEnd(8)}MB | ${String(test.totalTime + 'ms').padEnd(10)} | ${String(test.parsing + 'ms').padEnd(6)} | ${String(test.validation + 'ms').padEnd(8)} | ${String(test.conversion + 'ms').padEnd(7)} | ${test.memoryUsedMB.padEnd(6)}MB | ${test.throughputMBps}MB/s`); }); if (syntheticLargeFiles.scalingAnalysis) { console.log(` Scaling: ${syntheticLargeFiles.scalingAnalysis.type}`); console.log(` Formula: ${syntheticLargeFiles.scalingAnalysis.formula}`); } console.log('\nChunked Processing Efficiency:'); console.log(' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item'); console.log(' -----------|--------|----------|------------|-------------|------------'); streamingLargeFiles.chunkProcessing.forEach((chunk: any) => { console.log(` ${String(chunk.chunkSize).padEnd(10)} | ${String(chunk.chunks).padEnd(6)} | ${String(chunk.totalDuration + 'ms').padEnd(8)} | ${chunk.throughput.padEnd(10)}/s | ${chunk.peakMemoryMB.padEnd(11)}MB | ${chunk.memoryPerItemKB}KB`); }); if (streamingLargeFiles.memoryEfficiency) { console.log(` Recommendation: ${streamingLargeFiles.memoryEfficiency.recommendation}`); } console.log('\nCorpus Large File Analysis:'); console.log(` Total files: ${corpusLargeFiles.totalFiles}`); console.log(` Size distribution:`); Object.entries(corpusLargeFiles.sizeDistribution).forEach(([size, data]: [string, any]) => { console.log(` - ${size}: ${data.count} files`); }); console.log(` Largest processed files:`); corpusLargeFiles.largeFiles.slice(0, 5).forEach(file => { console.log(` - ${file.path.split('/').pop()}: ${file.sizeKB}KB, ${file.processingTime}ms, ${file.throughputKBps}KB/s`); }); console.log(` Average processing: ${corpusLargeFiles.processingStats.avgTimePerKB}ms/KB`); console.log('\nExtreme Size Stress Test:'); extremeSizeStressTest.tests.forEach(scenario => { console.log(` ${scenario.scenario}:`); scenario.tests.forEach((test: any) => { console.log(` - ${test.size}: ${test.success ? `✅ ${test.time}ms, ${test.xmlSizeMB}MB XML` : `❌ ${test.error}`}`); }); }); console.log(` Limits:`); console.log(` - Max items processed: ${extremeSizeStressTest.limits.maxItemsProcessed}`); console.log(` - Max size processed: ${extremeSizeStressTest.limits.maxSizeProcessedMB.toFixed(2)}MB`); if (extremeSizeStressTest.limits.failurePoint) { console.log(` - Failure point: ${extremeSizeStressTest.limits.failurePoint.scenario} at ${extremeSizeStressTest.limits.failurePoint.size}`); } // Performance targets check console.log('\n=== Performance Targets Check ==='); const largeFileThroughput = syntheticLargeFiles.tests.length > 0 ? parseFloat(syntheticLargeFiles.tests[syntheticLargeFiles.tests.length - 1].throughputMBps) : 0; const targetThroughput = 1; // Target: >1MB/s for large files console.log(`Large file throughput: ${largeFileThroughput}MB/s ${largeFileThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput}MB/s)`); // Overall performance summary console.log('\n=== Overall Performance Summary ==='); console.log(performanceTracker.getSummary()); t.pass('Large file processing tests completed'); }); tap.start();