/** * @file test.perf-11.batch-processing.ts * @description Performance tests for batch processing operations */ import { tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../suite/corpus.loader.js'; import { PerformanceTracker } from '../../suite/performance.tracker.js'; import * as os from 'os'; import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'; const corpusLoader = new CorpusLoader(); const performanceTracker = new PerformanceTracker('PERF-11: Batch Processing'); tap.test('PERF-11: Batch Processing - should handle batch operations efficiently', async (t) => { // Test 1: Batch size optimization const batchSizeOptimization = await performanceTracker.measureAsync( 'batch-size-optimization', async () => { const einvoice = new EInvoice(); const results = { batchSizes: [], optimalBatchSize: 0, maxThroughput: 0 }; // Create test invoices const totalInvoices = 500; const testInvoices = Array.from({ length: totalInvoices }, (_, i) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `BATCH-${i + 1}`, issueDate: '2024-03-10', seller: { name: `Seller ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i}` }, buyer: { name: `Buyer ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i + 1000}` }, items: Array.from({ length: 10 }, (_, j) => ({ description: `Item ${j + 1}`, quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 })), totals: { netAmount: 1000, vatAmount: 100, grossAmount: 1100 } } })); // Test different batch sizes const batchSizes = [1, 5, 10, 20, 50, 100, 200]; for (const batchSize of batchSizes) { const startTime = Date.now(); let processed = 0; let errors = 0; // Process in batches for (let i = 0; i < testInvoices.length; i += batchSize) { const batch = testInvoices.slice(i, Math.min(i + batchSize, testInvoices.length)); // Process batch const batchPromises = batch.map(async (invoice) => { try { await einvoice.validateInvoice(invoice); await einvoice.convertFormat(invoice, 'cii'); processed++; return true; } catch (error) { errors++; return false; } }); await Promise.all(batchPromises); } const totalTime = Date.now() - startTime; const throughput = (processed / (totalTime / 1000)); const result = { batchSize, totalTime, processed, errors, throughput: throughput.toFixed(2), avgTimePerInvoice: (totalTime / processed).toFixed(2), avgTimePerBatch: (totalTime / Math.ceil(totalInvoices / batchSize)).toFixed(2) }; results.batchSizes.push(result); if (throughput > results.maxThroughput) { results.maxThroughput = throughput; results.optimalBatchSize = batchSize; } } return results; } ); // Test 2: Batch operation types const batchOperationTypes = await performanceTracker.measureAsync( 'batch-operation-types', async () => { const einvoice = new EInvoice(); const results = { operations: [] }; // Create test data const batchSize = 50; const testBatch = Array.from({ length: batchSize }, (_, i) => ({ xml: `BATCH-OP-${i}2024-03-10`, invoice: { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `BATCH-OP-${i}`, issueDate: '2024-03-10', seller: { name: 'Batch Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Batch Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }], totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 } } } })); // Test different batch operations const operations = [ { name: 'Batch format detection', fn: async (batch: any[]) => { const promises = batch.map(item => einvoice.detectFormat(item.xml)); return await Promise.all(promises); } }, { name: 'Batch parsing', fn: async (batch: any[]) => { const promises = batch.map(item => einvoice.parseInvoice(item.xml, 'ubl')); return await Promise.all(promises); } }, { name: 'Batch validation', fn: async (batch: any[]) => { const promises = batch.map(item => einvoice.validateInvoice(item.invoice)); return await Promise.all(promises); } }, { name: 'Batch conversion', fn: async (batch: any[]) => { const promises = batch.map(item => einvoice.convertFormat(item.invoice, 'cii')); return await Promise.all(promises); } }, { name: 'Batch pipeline', fn: async (batch: any[]) => { const promises = batch.map(async (item) => { const format = await einvoice.detectFormat(item.xml); const parsed = await einvoice.parseInvoice(item.xml, format || 'ubl'); const validated = await einvoice.validateInvoice(parsed); const converted = await einvoice.convertFormat(parsed, 'cii'); return { format, validated: validated.isValid, converted: !!converted }; }); return await Promise.all(promises); } } ]; for (const operation of operations) { const iterations = 10; const times = []; for (let i = 0; i < iterations; i++) { const startTime = Date.now(); await operation.fn(testBatch); const endTime = Date.now(); times.push(endTime - startTime); } const avgTime = times.reduce((a, b) => a + b, 0) / times.length; const minTime = Math.min(...times); const maxTime = Math.max(...times); results.operations.push({ name: operation.name, batchSize, avgTime: avgTime.toFixed(2), minTime, maxTime, throughput: (batchSize / (avgTime / 1000)).toFixed(2), avgPerItem: (avgTime / batchSize).toFixed(2) }); } return results; } ); // Test 3: Batch error handling const batchErrorHandling = await performanceTracker.measureAsync( 'batch-error-handling', async () => { const einvoice = new EInvoice(); const results = { strategies: [], recommendation: null }; // Create batch with some invalid invoices const batchSize = 100; const errorRate = 0.2; // 20% errors const testBatch = Array.from({ length: batchSize }, (_, i) => { const hasError = Math.random() < errorRate; if (hasError) { return { id: i, invoice: { format: 'ubl' as const, data: { // Invalid invoice - missing required fields invoiceNumber: `ERROR-${i}`, items: [] } } }; } return { id: i, invoice: { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `VALID-${i}`, issueDate: '2024-03-10', seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }], totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 } } } }; }); // Test different error handling strategies const strategies = [ { name: 'Fail fast', fn: async (batch: any[]) => { const startTime = Date.now(); const results = []; try { for (const item of batch) { const result = await einvoice.validateInvoice(item.invoice); if (!result.isValid) { throw new Error(`Validation failed for invoice ${item.id}`); } results.push({ id: item.id, success: true }); } } catch (error) { return { time: Date.now() - startTime, processed: results.length, failed: batch.length - results.length, results }; } return { time: Date.now() - startTime, processed: results.length, failed: 0, results }; } }, { name: 'Continue on error', fn: async (batch: any[]) => { const startTime = Date.now(); const results = []; let failed = 0; for (const item of batch) { try { const result = await einvoice.validateInvoice(item.invoice); results.push({ id: item.id, success: result.isValid }); if (!result.isValid) failed++; } catch (error) { results.push({ id: item.id, success: false, error: error.message }); failed++; } } return { time: Date.now() - startTime, processed: results.length, failed, results }; } }, { name: 'Parallel with error collection', fn: async (batch: any[]) => { const startTime = Date.now(); const promises = batch.map(async (item) => { try { const result = await einvoice.validateInvoice(item.invoice); return { id: item.id, success: result.isValid }; } catch (error) { return { id: item.id, success: false, error: error.message }; } }); const results = await Promise.allSettled(promises); const processed = results.filter(r => r.status === 'fulfilled').map(r => (r as any).value); const failed = processed.filter(r => !r.success).length; return { time: Date.now() - startTime, processed: processed.length, failed, results: processed }; } } ]; for (const strategy of strategies) { const result = await strategy.fn(testBatch); results.strategies.push({ name: strategy.name, time: result.time, processed: result.processed, failed: result.failed, successRate: ((result.processed - result.failed) / result.processed * 100).toFixed(2), throughput: (result.processed / (result.time / 1000)).toFixed(2) }); } // Determine best strategy results.recommendation = results.strategies.reduce((best, current) => { // Balance between completion and speed const bestScore = parseFloat(best.successRate) * parseFloat(best.throughput); const currentScore = parseFloat(current.successRate) * parseFloat(current.throughput); return currentScore > bestScore ? current.name : best.name; }, results.strategies[0].name); return results; } ); // Test 4: Memory-efficient batch processing const memoryEfficientBatch = await performanceTracker.measureAsync( 'memory-efficient-batch', async () => { const einvoice = new EInvoice(); const results = { approaches: [], memoryProfile: null }; // Create large dataset const totalItems = 1000; const createInvoice = (id: number) => ({ format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: `MEM-BATCH-${id}`, issueDate: '2024-03-10', seller: { name: `Memory Test Seller ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id}` }, buyer: { name: `Memory Test Buyer ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id + 10000}` }, items: Array.from({ length: 20 }, (_, j) => ({ description: `Detailed product description for item ${j + 1} with lots of text `.repeat(5), quantity: j + 1, unitPrice: 100 + j, vatRate: 19, lineTotal: (j + 1) * (100 + j) })), totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 } } }); // Approach 1: Load all in memory const approach1 = async () => { if (global.gc) global.gc(); const startMemory = process.memoryUsage(); const startTime = Date.now(); // Create all invoices const allInvoices = Array.from({ length: totalItems }, (_, i) => createInvoice(i)); // Process all const results = await Promise.all( allInvoices.map(invoice => einvoice.validateInvoice(invoice)) ); const endTime = Date.now(); const endMemory = process.memoryUsage(); return { approach: 'Load all in memory', time: endTime - startTime, peakMemory: (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024, processed: results.length, memoryPerItem: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / totalItems).toFixed(2) }; }; // Approach 2: Streaming with chunks const approach2 = async () => { if (global.gc) global.gc(); const startMemory = process.memoryUsage(); const startTime = Date.now(); const chunkSize = 50; let processed = 0; let peakMemory = 0; for (let i = 0; i < totalItems; i += chunkSize) { // Create chunk on demand const chunk = Array.from( { length: Math.min(chunkSize, totalItems - i) }, (_, j) => createInvoice(i + j) ); // Process chunk await Promise.all(chunk.map(invoice => einvoice.validateInvoice(invoice))); processed += chunk.length; // Track memory const currentMemory = process.memoryUsage(); const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed; if (memoryUsed > peakMemory) { peakMemory = memoryUsed; } // Allow GC between chunks if (global.gc && i % 200 === 0) global.gc(); } const endTime = Date.now(); return { approach: 'Streaming chunks', time: endTime - startTime, peakMemory: peakMemory / 1024 / 1024, processed, memoryPerItem: (peakMemory / 1024 / processed).toFixed(2) }; }; // Approach 3: Generator-based processing const approach3 = async () => { if (global.gc) global.gc(); const startMemory = process.memoryUsage(); const startTime = Date.now(); let processed = 0; let peakMemory = 0; // Invoice generator function* invoiceGenerator() { for (let i = 0; i < totalItems; i++) { yield createInvoice(i); } } // Process using generator const batchSize = 20; const batch = []; for (const invoice of invoiceGenerator()) { batch.push(einvoice.validateInvoice(invoice)); if (batch.length >= batchSize) { await Promise.all(batch); processed += batch.length; batch.length = 0; // Track memory const currentMemory = process.memoryUsage(); const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed; if (memoryUsed > peakMemory) { peakMemory = memoryUsed; } } } // Process remaining if (batch.length > 0) { await Promise.all(batch); processed += batch.length; } const endTime = Date.now(); return { approach: 'Generator-based', time: endTime - startTime, peakMemory: peakMemory / 1024 / 1024, processed, memoryPerItem: (peakMemory / 1024 / processed).toFixed(2) }; }; // Execute approaches results.approaches.push(await approach1()); results.approaches.push(await approach2()); results.approaches.push(await approach3()); // Analyze memory efficiency const sortedByMemory = [...results.approaches].sort((a, b) => a.peakMemory - b.peakMemory); const sortedBySpeed = [...results.approaches].sort((a, b) => a.time - b.time); results.memoryProfile = { mostMemoryEfficient: sortedByMemory[0].approach, fastest: sortedBySpeed[0].approach, recommendation: sortedByMemory[0].peakMemory < sortedBySpeed[0].peakMemory * 0.5 ? 'Use memory-efficient approach for large datasets' : 'Use fastest approach if memory is not constrained' }; return results; } ); // Test 5: Corpus batch processing const corpusBatchProcessing = await performanceTracker.measureAsync( 'corpus-batch-processing', async () => { const files = await corpusLoader.getFilesByPattern('**/*.xml'); const einvoice = new EInvoice(); const results = { totalFiles: files.length, batchResults: [], overallStats: { totalProcessed: 0, totalTime: 0, failures: 0, avgBatchTime: 0 } }; // Process corpus in batches const batchSize = 20; const maxBatches = 5; // Limit for testing const startTime = Date.now(); for (let batchNum = 0; batchNum < maxBatches && batchNum * batchSize < files.length; batchNum++) { const batchStart = batchNum * batchSize; const batchFiles = files.slice(batchStart, batchStart + batchSize); const batchStartTime = Date.now(); const batchResults = { batchNumber: batchNum + 1, filesInBatch: batchFiles.length, processed: 0, formats: new Map(), errors: 0 }; // Process batch in parallel const promises = batchFiles.map(async (file) => { try { const content = await plugins.fs.readFile(file, 'utf-8'); const format = await einvoice.detectFormat(content); if (format && format !== 'unknown') { batchResults.formats.set(format, (batchResults.formats.get(format) || 0) + 1); const invoice = await einvoice.parseInvoice(content, format); await einvoice.validateInvoice(invoice); batchResults.processed++; return { success: true, format }; } else { batchResults.errors++; return { success: false }; } } catch (error) { batchResults.errors++; return { success: false, error: error.message }; } }); await Promise.all(promises); const batchEndTime = Date.now(); batchResults.batchTime = batchEndTime - batchStartTime; batchResults.throughput = (batchResults.processed / (batchResults.batchTime / 1000)).toFixed(2); results.batchResults.push({ ...batchResults, formats: Array.from(batchResults.formats.entries()) }); results.overallStats.totalProcessed += batchResults.processed; results.overallStats.failures += batchResults.errors; } results.overallStats.totalTime = Date.now() - startTime; results.overallStats.avgBatchTime = results.batchResults.length > 0 ? results.batchResults.reduce((sum, b) => sum + b.batchTime, 0) / results.batchResults.length : 0; return results; } ); // Summary t.comment('\n=== PERF-11: Batch Processing Test Summary ==='); t.comment('\nBatch Size Optimization:'); t.comment(' Batch Size | Total Time | Processed | Throughput | Avg/Invoice | Avg/Batch'); t.comment(' -----------|------------|-----------|------------|-------------|----------'); batchSizeOptimization.result.batchSizes.forEach(size => { t.comment(` ${String(size.batchSize).padEnd(10)} | ${String(size.totalTime + 'ms').padEnd(10)} | ${String(size.processed).padEnd(9)} | ${size.throughput.padEnd(10)}/s | ${size.avgTimePerInvoice.padEnd(11)}ms | ${size.avgTimePerBatch}ms`); }); t.comment(` Optimal batch size: ${batchSizeOptimization.result.optimalBatchSize} (${batchSizeOptimization.result.maxThroughput.toFixed(2)} ops/sec)`); t.comment('\nBatch Operation Types:'); batchOperationTypes.result.operations.forEach(op => { t.comment(` ${op.name}:`); t.comment(` - Avg time: ${op.avgTime}ms (${op.minTime}-${op.maxTime}ms)`); t.comment(` - Throughput: ${op.throughput} ops/sec`); t.comment(` - Per item: ${op.avgPerItem}ms`); }); t.comment('\nBatch Error Handling Strategies:'); t.comment(' Strategy | Time | Processed | Failed | Success Rate | Throughput'); t.comment(' --------------------------|--------|-----------|--------|--------------|----------'); batchErrorHandling.result.strategies.forEach(strategy => { t.comment(` ${strategy.name.padEnd(25)} | ${String(strategy.time + 'ms').padEnd(6)} | ${String(strategy.processed).padEnd(9)} | ${String(strategy.failed).padEnd(6)} | ${strategy.successRate.padEnd(12)}% | ${strategy.throughput}/s`); }); t.comment(` Recommended strategy: ${batchErrorHandling.result.recommendation}`); t.comment('\nMemory-Efficient Batch Processing:'); t.comment(' Approach | Time | Peak Memory | Processed | Memory/Item'); t.comment(' -------------------|---------|-------------|-----------|------------'); memoryEfficientBatch.result.approaches.forEach(approach => { t.comment(` ${approach.approach.padEnd(18)} | ${String(approach.time + 'ms').padEnd(7)} | ${approach.peakMemory.toFixed(2).padEnd(11)}MB | ${String(approach.processed).padEnd(9)} | ${approach.memoryPerItem}KB`); }); t.comment(` Most memory efficient: ${memoryEfficientBatch.result.memoryProfile.mostMemoryEfficient}`); t.comment(` Fastest: ${memoryEfficientBatch.result.memoryProfile.fastest}`); t.comment(` ${memoryEfficientBatch.result.memoryProfile.recommendation}`); t.comment('\nCorpus Batch Processing:'); t.comment(` Total files: ${corpusBatchProcessing.result.totalFiles}`); t.comment(` Batches processed: ${corpusBatchProcessing.result.batchResults.length}`); t.comment(' Batch # | Files | Processed | Errors | Time | Throughput'); t.comment(' --------|-------|-----------|--------|---------|----------'); corpusBatchProcessing.result.batchResults.forEach(batch => { t.comment(` ${String(batch.batchNumber).padEnd(7)} | ${String(batch.filesInBatch).padEnd(5)} | ${String(batch.processed).padEnd(9)} | ${String(batch.errors).padEnd(6)} | ${String(batch.batchTime + 'ms').padEnd(7)} | ${batch.throughput}/s`); }); t.comment(` Overall:`); t.comment(` - Total processed: ${corpusBatchProcessing.result.overallStats.totalProcessed}`); t.comment(` - Total failures: ${corpusBatchProcessing.result.overallStats.failures}`); t.comment(` - Total time: ${corpusBatchProcessing.result.overallStats.totalTime}ms`); t.comment(` - Avg batch time: ${corpusBatchProcessing.result.overallStats.avgBatchTime.toFixed(2)}ms`); // Performance targets check t.comment('\n=== Performance Targets Check ==='); const optimalThroughput = batchSizeOptimization.result.maxThroughput; const targetThroughput = 50; // Target: >50 ops/sec for batch processing t.comment(`Batch throughput: ${optimalThroughput.toFixed(2)} ops/sec ${optimalThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput} ops/sec)`); // Overall performance summary t.comment('\n=== Overall Performance Summary ==='); performanceTracker.logSummary(); t.end(); }); tap.start();