/** * @file test.perf-08.large-files.ts * @description Performance tests for large file processing */ import { tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../suite/corpus.loader.js'; import { PerformanceTracker } from '../../suite/performance.tracker.js'; const corpusLoader = new CorpusLoader(); const performanceTracker = new PerformanceTracker('PERF-08: Large File Processing'); tap.test('PERF-08: Large File Processing - should handle large files efficiently', async (t) => { // Test 1: Large PEPPOL file processing const largePEPPOLProcessing = await performanceTracker.measureAsync( 'large-peppol-processing', async () => { const files = await corpusLoader.getFilesByPattern('**/PEPPOL/**/*.xml'); const einvoice = new EInvoice(); const results = { files: [], memoryProfile: { baseline: 0, peak: 0, increments: [] } }; // Get baseline memory if (global.gc) global.gc(); const baselineMemory = process.memoryUsage(); results.memoryProfile.baseline = baselineMemory.heapUsed / 1024 / 1024; // Process PEPPOL files (known to be large) for (const file of files) { try { const startTime = Date.now(); const startMemory = process.memoryUsage(); // Read file const content = await plugins.fs.readFile(file, 'utf-8'); const fileSize = Buffer.byteLength(content, 'utf-8'); // Process file const format = await einvoice.detectFormat(content); const parseStart = Date.now(); const invoice = await einvoice.parseInvoice(content, format || 'ubl'); const parseEnd = Date.now(); const validationStart = Date.now(); const validationResult = await einvoice.validateInvoice(invoice); const validationEnd = Date.now(); const endMemory = process.memoryUsage(); const totalTime = Date.now() - startTime; const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024; if (endMemory.heapUsed > results.memoryProfile.peak) { results.memoryProfile.peak = endMemory.heapUsed / 1024 / 1024; } results.files.push({ path: file, sizeKB: (fileSize / 1024).toFixed(2), sizeMB: (fileSize / 1024 / 1024).toFixed(2), format, processingTime: totalTime, parseTime: parseEnd - parseStart, validationTime: validationEnd - validationStart, memoryUsedMB: memoryUsed.toFixed(2), throughputMBps: ((fileSize / 1024 / 1024) / (totalTime / 1000)).toFixed(2), itemCount: invoice.data.items?.length || 0, valid: validationResult.isValid }); results.memoryProfile.increments.push(memoryUsed); } catch (error) { results.files.push({ path: file, error: error.message }); } } return results; } ); // Test 2: Synthetic large file generation and processing const syntheticLargeFiles = await performanceTracker.measureAsync( 'synthetic-large-files', async () => { const einvoice = new EInvoice(); const results = { tests: [], scalingAnalysis: null }; // Generate invoices of increasing size const sizes = [ { items: 100, name: '100 items' }, { items: 500, name: '500 items' }, { items: 1000, name: '1K items' }, { items: 5000, name: '5K items' }, { items: 10000, name: '10K items' } ]; for (const size of sizes) { // Generate large invoice const invoice = { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `LARGE-${size.items}`, issueDate: '2024-02-25', dueDate: '2024-03-25', currency: 'EUR', seller: { name: 'Large File Test Seller Corporation International GmbH', address: 'Hauptstraße 123-125, Building A, Floor 5', city: 'Berlin', postalCode: '10115', country: 'DE', taxId: 'DE123456789', registrationNumber: 'HRB123456', email: 'invoicing@largetest.de', phone: '+49 30 123456789', bankAccount: { iban: 'DE89370400440532013000', bic: 'COBADEFFXXX', bankName: 'Commerzbank AG' } }, buyer: { name: 'Large File Test Buyer Enterprises Ltd.', address: '456 Commerce Boulevard, Suite 789', city: 'Munich', postalCode: '80331', country: 'DE', taxId: 'DE987654321', registrationNumber: 'HRB654321', email: 'ap@largebuyer.de', phone: '+49 89 987654321' }, items: Array.from({ length: size.items }, (_, i) => ({ itemId: `ITEM-${String(i + 1).padStart(6, '0')}`, description: `Product Item Number ${i + 1} - Detailed description with technical specifications, compliance information, country of origin, weight, dimensions, and special handling instructions. This is a very detailed description to simulate real-world invoice data with comprehensive product information.`, quantity: Math.floor(Math.random() * 100) + 1, unitPrice: Math.random() * 1000, vatRate: [0, 7, 19][Math.floor(Math.random() * 3)], lineTotal: 0, additionalInfo: { weight: `${(Math.random() * 50).toFixed(2)}kg`, dimensions: `${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}cm`, countryOfOrigin: ['DE', 'FR', 'IT', 'CN', 'US'][Math.floor(Math.random() * 5)], customsCode: `${Math.floor(Math.random() * 9000000000) + 1000000000}`, serialNumber: `SN-${Date.now()}-${i}`, batchNumber: `BATCH-${Math.floor(i / 100)}` } })), totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 }, notes: 'This is a large invoice generated for performance testing purposes. ' + 'It contains a significant number of line items to test the system\'s ability ' + 'to handle large documents efficiently.' } }; // Calculate totals invoice.data.items.forEach(item => { item.lineTotal = item.quantity * item.unitPrice; invoice.data.totals.netAmount += item.lineTotal; invoice.data.totals.vatAmount += item.lineTotal * (item.vatRate / 100); }); invoice.data.totals.grossAmount = invoice.data.totals.netAmount + invoice.data.totals.vatAmount; // Measure processing if (global.gc) global.gc(); const startMemory = process.memoryUsage(); const startTime = Date.now(); // Generate XML const xmlStart = Date.now(); const xml = await einvoice.generateXML(invoice); const xmlEnd = Date.now(); const xmlSize = Buffer.byteLength(xml, 'utf-8'); // Parse back const parseStart = Date.now(); const parsed = await einvoice.parseInvoice(xml, 'ubl'); const parseEnd = Date.now(); // Validate const validateStart = Date.now(); const validation = await einvoice.validateInvoice(parsed); const validateEnd = Date.now(); // Convert const convertStart = Date.now(); const converted = await einvoice.convertFormat(parsed, 'cii'); const convertEnd = Date.now(); const endTime = Date.now(); const endMemory = process.memoryUsage(); results.tests.push({ size: size.name, items: size.items, xmlSizeMB: (xmlSize / 1024 / 1024).toFixed(2), totalTime: endTime - startTime, xmlGeneration: xmlEnd - xmlStart, parsing: parseEnd - parseStart, validation: validateEnd - validateStart, conversion: convertEnd - convertStart, memoryUsedMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2), memoryPerItemKB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / size.items).toFixed(2), throughputMBps: ((xmlSize / 1024 / 1024) / ((endTime - startTime) / 1000)).toFixed(2), valid: validation.isValid }); } // Analyze scaling if (results.tests.length >= 3) { const points = results.tests.map(t => ({ x: t.items, y: t.totalTime })); // Simple linear regression const n = points.length; const sumX = points.reduce((sum, p) => sum + p.x, 0); const sumY = points.reduce((sum, p) => sum + p.y, 0); const sumXY = points.reduce((sum, p) => sum + p.x * p.y, 0); const sumX2 = points.reduce((sum, p) => sum + p.x * p.x, 0); const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX); const intercept = (sumY - slope * sumX) / n; results.scalingAnalysis = { type: slope < 0.5 ? 'Sub-linear' : slope <= 1.5 ? 'Linear' : 'Super-linear', formula: `Time(ms) = ${slope.toFixed(3)} * items + ${intercept.toFixed(2)}`, msPerItem: slope.toFixed(3) }; } return results; } ); // Test 3: Memory-efficient large file streaming const streamingLargeFiles = await performanceTracker.measureAsync( 'streaming-large-files', async () => { const einvoice = new EInvoice(); const results = { streamingSupported: false, chunkProcessing: [], memoryEfficiency: null }; // Simulate large file processing in chunks const totalItems = 10000; const chunkSizes = [100, 500, 1000, 2000]; for (const chunkSize of chunkSizes) { const chunks = Math.ceil(totalItems / chunkSize); const startTime = Date.now(); const startMemory = process.memoryUsage(); let peakMemory = startMemory.heapUsed; // Process in chunks const chunkResults = []; for (let chunk = 0; chunk < chunks; chunk++) { const startItem = chunk * chunkSize; const endItem = Math.min(startItem + chunkSize, totalItems); // Create chunk invoice const chunkInvoice = { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `CHUNK-${chunk}`, issueDate: '2024-02-25', seller: { name: 'Chunk Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Chunk Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: Array.from({ length: endItem - startItem }, (_, i) => ({ description: `Chunk ${chunk} Item ${i + 1}`, quantity: 1, unitPrice: 100, vatRate: 19, lineTotal: 100 })), totals: { netAmount: (endItem - startItem) * 100, vatAmount: (endItem - startItem) * 19, grossAmount: (endItem - startItem) * 119 } } }; // Process chunk const chunkStart = Date.now(); await einvoice.validateInvoice(chunkInvoice); const chunkEnd = Date.now(); chunkResults.push({ chunk, items: endItem - startItem, duration: chunkEnd - chunkStart }); // Track peak memory const currentMemory = process.memoryUsage(); if (currentMemory.heapUsed > peakMemory) { peakMemory = currentMemory.heapUsed; } // Simulate cleanup between chunks if (global.gc) global.gc(); } const totalDuration = Date.now() - startTime; const memoryIncrease = (peakMemory - startMemory.heapUsed) / 1024 / 1024; results.chunkProcessing.push({ chunkSize, chunks, totalItems, totalDuration, avgChunkTime: chunkResults.reduce((sum, r) => sum + r.duration, 0) / chunkResults.length, throughput: (totalItems / (totalDuration / 1000)).toFixed(2), peakMemoryMB: (peakMemory / 1024 / 1024).toFixed(2), memoryIncreaseMB: memoryIncrease.toFixed(2), memoryPerItemKB: ((memoryIncrease * 1024) / totalItems).toFixed(3) }); } // Analyze memory efficiency if (results.chunkProcessing.length > 0) { const smallChunk = results.chunkProcessing[0]; const largeChunk = results.chunkProcessing[results.chunkProcessing.length - 1]; results.memoryEfficiency = { smallChunkMemory: smallChunk.memoryIncreaseMB, largeChunkMemory: largeChunk.memoryIncreaseMB, memoryScaling: (parseFloat(largeChunk.memoryIncreaseMB) / parseFloat(smallChunk.memoryIncreaseMB)).toFixed(2), recommendation: parseFloat(largeChunk.memoryIncreaseMB) < parseFloat(smallChunk.memoryIncreaseMB) * 2 ? 'Use larger chunks for better memory efficiency' : 'Use smaller chunks to reduce memory usage' }; } return results; } ); // Test 4: Corpus large file analysis const corpusLargeFiles = await performanceTracker.measureAsync( 'corpus-large-file-analysis', async () => { const files = await corpusLoader.getFilesByPattern('**/*.xml'); const einvoice = new EInvoice(); const results = { totalFiles: 0, largeFiles: [], sizeDistribution: { tiny: { count: 0, maxSize: 10 * 1024 }, // < 10KB small: { count: 0, maxSize: 100 * 1024 }, // < 100KB medium: { count: 0, maxSize: 1024 * 1024 }, // < 1MB large: { count: 0, maxSize: 10 * 1024 * 1024 }, // < 10MB huge: { count: 0, maxSize: Infinity } // >= 10MB }, processingStats: { avgTimePerKB: 0, avgMemoryPerKB: 0 } }; // Analyze all files const fileSizes = []; const processingMetrics = []; for (const file of files) { try { const stats = await plugins.fs.stat(file); const fileSize = stats.size; results.totalFiles++; // Categorize by size if (fileSize < results.sizeDistribution.tiny.maxSize) { results.sizeDistribution.tiny.count++; } else if (fileSize < results.sizeDistribution.small.maxSize) { results.sizeDistribution.small.count++; } else if (fileSize < results.sizeDistribution.medium.maxSize) { results.sizeDistribution.medium.count++; } else if (fileSize < results.sizeDistribution.large.maxSize) { results.sizeDistribution.large.count++; } else { results.sizeDistribution.huge.count++; } // Process large files if (fileSize > 100 * 1024) { // Process files > 100KB const content = await plugins.fs.readFile(file, 'utf-8'); const startTime = Date.now(); const startMemory = process.memoryUsage(); const format = await einvoice.detectFormat(content); if (format && format !== 'unknown') { const invoice = await einvoice.parseInvoice(content, format); await einvoice.validateInvoice(invoice); } const endTime = Date.now(); const endMemory = process.memoryUsage(); const processingTime = endTime - startTime; const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024; // KB results.largeFiles.push({ path: file, sizeKB: (fileSize / 1024).toFixed(2), format, processingTime, memoryUsedKB: memoryUsed.toFixed(2), timePerKB: (processingTime / (fileSize / 1024)).toFixed(3), throughputKBps: ((fileSize / 1024) / (processingTime / 1000)).toFixed(2) }); processingMetrics.push({ size: fileSize, time: processingTime, memory: memoryUsed }); } fileSizes.push(fileSize); } catch (error) { // Skip files that can't be processed } } // Calculate statistics if (processingMetrics.length > 0) { const totalSize = processingMetrics.reduce((sum, m) => sum + m.size, 0); const totalTime = processingMetrics.reduce((sum, m) => sum + m.time, 0); const totalMemory = processingMetrics.reduce((sum, m) => sum + m.memory, 0); results.processingStats.avgTimePerKB = (totalTime / (totalSize / 1024)).toFixed(3); results.processingStats.avgMemoryPerKB = (totalMemory / (totalSize / 1024)).toFixed(3); } // Sort large files by size results.largeFiles.sort((a, b) => parseFloat(b.sizeKB) - parseFloat(a.sizeKB)); return { ...results, largeFiles: results.largeFiles.slice(0, 10), // Top 10 largest avgFileSizeKB: fileSizes.length > 0 ? (fileSizes.reduce((a, b) => a + b, 0) / fileSizes.length / 1024).toFixed(2) : 0 }; } ); // Test 5: Stress test with extreme sizes const extremeSizeStressTest = await performanceTracker.measureAsync( 'extreme-size-stress-test', async () => { const einvoice = new EInvoice(); const results = { tests: [], limits: { maxItemsProcessed: 0, maxSizeProcessedMB: 0, failurePoint: null } }; // Test extreme scenarios const extremeScenarios = [ { name: 'Wide invoice (many items)', generator: (count: number) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `EXTREME-WIDE-${count}`, issueDate: '2024-02-25', seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: Array.from({ length: count }, (_, i) => ({ description: `Item ${i + 1}`, quantity: 1, unitPrice: 10, vatRate: 10, lineTotal: 10 })), totals: { netAmount: count * 10, vatAmount: count, grossAmount: count * 11 } } }) }, { name: 'Deep invoice (long descriptions)', generator: (size: number) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `EXTREME-DEEP-${size}`, issueDate: '2024-02-25', seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: [{ description: 'A'.repeat(size * 1024), // Size in KB quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }], totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 } } }) } ]; // Test each scenario for (const scenario of extremeScenarios) { const testResults = { scenario: scenario.name, tests: [] }; // Test increasing sizes const sizes = scenario.name.includes('Wide') ? [1000, 5000, 10000, 20000, 50000] : [100, 500, 1000, 2000, 5000]; // KB for (const size of sizes) { try { const invoice = scenario.generator(size); const startTime = Date.now(); const startMemory = process.memoryUsage(); // Try to process const xml = await einvoice.generateXML(invoice); const xmlSize = Buffer.byteLength(xml, 'utf-8') / 1024 / 1024; // MB const parsed = await einvoice.parseInvoice(xml, invoice.format); await einvoice.validateInvoice(parsed); const endTime = Date.now(); const endMemory = process.memoryUsage(); testResults.tests.push({ size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`, success: true, time: endTime - startTime, memoryMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2), xmlSizeMB: xmlSize.toFixed(2) }); // Update limits if (scenario.name.includes('Wide') && size > results.limits.maxItemsProcessed) { results.limits.maxItemsProcessed = size; } if (xmlSize > results.limits.maxSizeProcessedMB) { results.limits.maxSizeProcessedMB = xmlSize; } } catch (error) { testResults.tests.push({ size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`, success: false, error: error.message }); if (!results.limits.failurePoint) { results.limits.failurePoint = { scenario: scenario.name, size, error: error.message }; } break; // Stop testing larger sizes after failure } } results.tests.push(testResults); } return results; } ); // Summary t.comment('\n=== PERF-08: Large File Processing Test Summary ==='); if (largePEPPOLProcessing.result.files.length > 0) { t.comment('\nLarge PEPPOL File Processing:'); largePEPPOLProcessing.result.files.forEach(file => { if (!file.error) { t.comment(` ${file.path.split('/').pop()}:`); t.comment(` - Size: ${file.sizeMB}MB, Items: ${file.itemCount}`); t.comment(` - Processing: ${file.processingTime}ms (parse: ${file.parseTime}ms, validate: ${file.validationTime}ms)`); t.comment(` - Throughput: ${file.throughputMBps}MB/s`); t.comment(` - Memory used: ${file.memoryUsedMB}MB`); } }); t.comment(` Peak memory: ${largePEPPOLProcessing.result.memoryProfile.peak.toFixed(2)}MB`); } t.comment('\nSynthetic Large File Scaling:'); t.comment(' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput'); t.comment(' ----------|----------|------------|--------|----------|---------|--------|----------'); syntheticLargeFiles.result.tests.forEach(test => { t.comment(` ${test.size.padEnd(9)} | ${test.xmlSizeMB.padEnd(8)}MB | ${String(test.totalTime + 'ms').padEnd(10)} | ${String(test.parsing + 'ms').padEnd(6)} | ${String(test.validation + 'ms').padEnd(8)} | ${String(test.conversion + 'ms').padEnd(7)} | ${test.memoryUsedMB.padEnd(6)}MB | ${test.throughputMBps}MB/s`); }); if (syntheticLargeFiles.result.scalingAnalysis) { t.comment(` Scaling: ${syntheticLargeFiles.result.scalingAnalysis.type}`); t.comment(` Formula: ${syntheticLargeFiles.result.scalingAnalysis.formula}`); } t.comment('\nChunked Processing Efficiency:'); t.comment(' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item'); t.comment(' -----------|--------|----------|------------|-------------|------------'); streamingLargeFiles.result.chunkProcessing.forEach(chunk => { t.comment(` ${String(chunk.chunkSize).padEnd(10)} | ${String(chunk.chunks).padEnd(6)} | ${String(chunk.totalDuration + 'ms').padEnd(8)} | ${chunk.throughput.padEnd(10)}/s | ${chunk.peakMemoryMB.padEnd(11)}MB | ${chunk.memoryPerItemKB}KB`); }); if (streamingLargeFiles.result.memoryEfficiency) { t.comment(` Recommendation: ${streamingLargeFiles.result.memoryEfficiency.recommendation}`); } t.comment('\nCorpus Large File Analysis:'); t.comment(` Total files: ${corpusLargeFiles.result.totalFiles}`); t.comment(` Size distribution:`); Object.entries(corpusLargeFiles.result.sizeDistribution).forEach(([size, data]: [string, any]) => { t.comment(` - ${size}: ${data.count} files`); }); t.comment(` Largest processed files:`); corpusLargeFiles.result.largeFiles.slice(0, 5).forEach(file => { t.comment(` - ${file.path.split('/').pop()}: ${file.sizeKB}KB, ${file.processingTime}ms, ${file.throughputKBps}KB/s`); }); t.comment(` Average processing: ${corpusLargeFiles.result.processingStats.avgTimePerKB}ms/KB`); t.comment('\nExtreme Size Stress Test:'); extremeSizeStressTest.result.tests.forEach(scenario => { t.comment(` ${scenario.scenario}:`); scenario.tests.forEach(test => { t.comment(` - ${test.size}: ${test.success ? `✅ ${test.time}ms, ${test.xmlSizeMB}MB XML` : `❌ ${test.error}`}`); }); }); t.comment(` Limits:`); t.comment(` - Max items processed: ${extremeSizeStressTest.result.limits.maxItemsProcessed}`); t.comment(` - Max size processed: ${extremeSizeStressTest.result.limits.maxSizeProcessedMB.toFixed(2)}MB`); if (extremeSizeStressTest.result.limits.failurePoint) { t.comment(` - Failure point: ${extremeSizeStressTest.result.limits.failurePoint.scenario} at ${extremeSizeStressTest.result.limits.failurePoint.size}`); } // Performance targets check t.comment('\n=== Performance Targets Check ==='); const largeFileThroughput = syntheticLargeFiles.result.tests.length > 0 ? parseFloat(syntheticLargeFiles.result.tests[syntheticLargeFiles.result.tests.length - 1].throughputMBps) : 0; const targetThroughput = 1; // Target: >1MB/s for large files t.comment(`Large file throughput: ${largeFileThroughput}MB/s ${largeFileThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput}MB/s)`); // Overall performance summary t.comment('\n=== Overall Performance Summary ==='); performanceTracker.logSummary(); t.end(); }); tap.start();