einvoice/test/suite/einvoice_performance/test.perf-11.batch-processing.ts

/**
 * @file test.perf-11.batch-processing.ts
 * @description Performance tests for batch processing operations
 */

import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
import * as os from 'os';
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';

const corpusLoader = new CorpusLoader();
const performanceTracker = new PerformanceTracker('PERF-11: Batch Processing');

tap.test('PERF-11: Batch Processing - should handle batch operations efficiently', async (t) => {
  // Test 1: Batch size optimization
  const batchSizeOptimization = await performanceTracker.measureAsync(
    'batch-size-optimization',
    async () => {
      const einvoice = new EInvoice();
      const results = {
        batchSizes: [],
        optimalBatchSize: 0,
        maxThroughput: 0
      };
      
      // Create test invoices
      const totalInvoices = 500;
      const testInvoices = Array.from({ length: totalInvoices }, (_, i) => ({
        format: 'ubl' as const,
        data: {
          documentType: 'INVOICE',
          invoiceNumber: `BATCH-${i + 1}`,
          issueDate: '2024-03-10',
          seller: { name: `Seller ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i}` },
          buyer: { name: `Buyer ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i + 1000}` },
          items: Array.from({ length: 10 }, (_, j) => ({
            description: `Item ${j + 1}`,
            quantity: 1,
            unitPrice: 100,
            vatRate: 10,
            lineTotal: 100
          })),
          totals: { netAmount: 1000, vatAmount: 100, grossAmount: 1100 }
        }
      }));
      
      // Test different batch sizes
      const batchSizes = [1, 5, 10, 20, 50, 100, 200];
      
      for (const batchSize of batchSizes) {
        const startTime = Date.now();
        let processed = 0;
        let errors = 0;
        
        // Process in batches
        for (let i = 0; i < testInvoices.length; i += batchSize) {
          const batch = testInvoices.slice(i, Math.min(i + batchSize, testInvoices.length));
          
          // Process batch
          const batchPromises = batch.map(async (invoice) => {
            try {
              await einvoice.validateInvoice(invoice);
              await einvoice.convertFormat(invoice, 'cii');
              processed++;
              return true;
            } catch (error) {
              errors++;
              return false;
            }
          });
          
          await Promise.all(batchPromises);
        }
        
        const totalTime = Date.now() - startTime;
        const throughput = (processed / (totalTime / 1000));
        
        const result = {
          batchSize,
          totalTime,
          processed,
          errors,
          throughput: throughput.toFixed(2),
          avgTimePerInvoice: (totalTime / processed).toFixed(2),
          avgTimePerBatch: (totalTime / Math.ceil(totalInvoices / batchSize)).toFixed(2)
        };
        
        results.batchSizes.push(result);
        
        if (throughput > results.maxThroughput) {
          results.maxThroughput = throughput;
          results.optimalBatchSize = batchSize;
        }
      }
      
      return results;
    }
  );
  
  // Test 2: Batch operation types
  const batchOperationTypes = await performanceTracker.measureAsync(
    'batch-operation-types',
    async () => {
      const einvoice = new EInvoice();
      const results = {
        operations: []
      };
      
      // Create test data
      const batchSize = 50;
      const testBatch = Array.from({ length: batchSize }, (_, i) => ({
        xml: `<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>BATCH-OP-${i}</ID><IssueDate>2024-03-10</IssueDate></Invoice>`,
        invoice: {
          format: 'ubl' as const,
          data: {
            documentType: 'INVOICE',
            invoiceNumber: `BATCH-OP-${i}`,
            issueDate: '2024-03-10',
            seller: { name: 'Batch Seller', address: 'Address', country: 'US', taxId: 'US123' },
            buyer: { name: 'Batch Buyer', address: 'Address', country: 'US', taxId: 'US456' },
            items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }],
            totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 }
          }
        }
      }));
      
      // Test different batch operations
      const operations = [
        {
          name: 'Batch format detection',
          fn: async (batch: any[]) => {
            const promises = batch.map(item => einvoice.detectFormat(item.xml));
            return await Promise.all(promises);
          }
        },
        {
          name: 'Batch parsing',
          fn: async (batch: any[]) => {
            const promises = batch.map(item => einvoice.parseInvoice(item.xml, 'ubl'));
            return await Promise.all(promises);
          }
        },
        {
          name: 'Batch validation',
          fn: async (batch: any[]) => {
            const promises = batch.map(item => einvoice.validateInvoice(item.invoice));
            return await Promise.all(promises);
          }
        },
        {
          name: 'Batch conversion',
          fn: async (batch: any[]) => {
            const promises = batch.map(item => einvoice.convertFormat(item.invoice, 'cii'));
            return await Promise.all(promises);
          }
        },
        {
          name: 'Batch pipeline',
          fn: async (batch: any[]) => {
            const promises = batch.map(async (item) => {
              const format = await einvoice.detectFormat(item.xml);
              const parsed = await einvoice.parseInvoice(item.xml, format || 'ubl');
              const validated = await einvoice.validateInvoice(parsed);
              const converted = await einvoice.convertFormat(parsed, 'cii');
              return { format, validated: validated.isValid, converted: !!converted };
            });
            return await Promise.all(promises);
          }
        }
      ];
      
      for (const operation of operations) {
        const iterations = 10;
        const times = [];
        
        for (let i = 0; i < iterations; i++) {
          const startTime = Date.now();
          await operation.fn(testBatch);
          const endTime = Date.now();
          times.push(endTime - startTime);
        }
        
        const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
        const minTime = Math.min(...times);
        const maxTime = Math.max(...times);
        
        results.operations.push({
          name: operation.name,
          batchSize,
          avgTime: avgTime.toFixed(2),
          minTime,
          maxTime,
          throughput: (batchSize / (avgTime / 1000)).toFixed(2),
          avgPerItem: (avgTime / batchSize).toFixed(2)
        });
      }
      
      return results;
    }
  );
  
  // Test 3: Batch error handling
  const batchErrorHandling = await performanceTracker.measureAsync(
    'batch-error-handling',
    async () => {
      const einvoice = new EInvoice();
      const results = {
        strategies: [],
        recommendation: null
      };
      
      // Create batch with some invalid invoices
      const batchSize = 100;
      const errorRate = 0.2; // 20% errors
      
      const testBatch = Array.from({ length: batchSize }, (_, i) => {
        const hasError = Math.random() < errorRate;
        
        if (hasError) {
          return {
            id: i,
            invoice: {
              format: 'ubl' as const,
              data: {
                // Invalid invoice - missing required fields
                invoiceNumber: `ERROR-${i}`,
                items: []
              }
            }
          };
        }
        
        return {
          id: i,
          invoice: {
            format: 'ubl' as const,
            data: {
              documentType: 'INVOICE',
              invoiceNumber: `VALID-${i}`,
              issueDate: '2024-03-10',
              seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' },
              buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' },
              items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }],
              totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 }
            }
          }
        };
      });
      
      // Test different error handling strategies
      const strategies = [
        {
          name: 'Fail fast',
          fn: async (batch: any[]) => {
            const startTime = Date.now();
            const results = [];
            
            try {
              for (const item of batch) {
                const result = await einvoice.validateInvoice(item.invoice);
                if (!result.isValid) {
                  throw new Error(`Validation failed for invoice ${item.id}`);
                }
                results.push({ id: item.id, success: true });
              }
            } catch (error) {
              return {
                time: Date.now() - startTime,
                processed: results.length,
                failed: batch.length - results.length,
                results
              };
            }
            
            return {
              time: Date.now() - startTime,
              processed: results.length,
              failed: 0,
              results
            };
          }
        },
        {
          name: 'Continue on error',
          fn: async (batch: any[]) => {
            const startTime = Date.now();
            const results = [];
            let failed = 0;
            
            for (const item of batch) {
              try {
                const result = await einvoice.validateInvoice(item.invoice);
                results.push({ id: item.id, success: result.isValid });
                if (!result.isValid) failed++;
              } catch (error) {
                results.push({ id: item.id, success: false, error: error.message });
                failed++;
              }
            }
            
            return {
              time: Date.now() - startTime,
              processed: results.length,
              failed,
              results
            };
          }
        },
        {
          name: 'Parallel with error collection',
          fn: async (batch: any[]) => {
            const startTime = Date.now();
            
            const promises = batch.map(async (item) => {
              try {
                const result = await einvoice.validateInvoice(item.invoice);
                return { id: item.id, success: result.isValid };
              } catch (error) {
                return { id: item.id, success: false, error: error.message };
              }
            });
            
            const results = await Promise.allSettled(promises);
            const processed = results.filter(r => r.status === 'fulfilled').map(r => (r as any).value);
            const failed = processed.filter(r => !r.success).length;
            
            return {
              time: Date.now() - startTime,
              processed: processed.length,
              failed,
              results: processed
            };
          }
        }
      ];
      
      for (const strategy of strategies) {
        const result = await strategy.fn(testBatch);
        
        results.strategies.push({
          name: strategy.name,
          time: result.time,
          processed: result.processed,
          failed: result.failed,
          successRate: ((result.processed - result.failed) / result.processed * 100).toFixed(2),
          throughput: (result.processed / (result.time / 1000)).toFixed(2)
        });
      }
      
      // Determine best strategy
      results.recommendation = results.strategies.reduce((best, current) => {
        // Balance between completion and speed
        const bestScore = parseFloat(best.successRate) * parseFloat(best.throughput);
        const currentScore = parseFloat(current.successRate) * parseFloat(current.throughput);
        return currentScore > bestScore ? current.name : best.name;
      }, results.strategies[0].name);
      
      return results;
    }
  );
  
  // Test 4: Memory-efficient batch processing
  const memoryEfficientBatch = await performanceTracker.measureAsync(
    'memory-efficient-batch',
    async () => {
      const einvoice = new EInvoice();
      const results = {
        approaches: [],
        memoryProfile: null
      };
      
      // Create large dataset
      const totalItems = 1000;
      const createInvoice = (id: number) => ({
        format: 'ubl' as const,
        data: {
          documentType: 'INVOICE',
          invoiceNumber: `MEM-BATCH-${id}`,
          issueDate: '2024-03-10',
          seller: { name: `Memory Test Seller ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id}` },
          buyer: { name: `Memory Test Buyer ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id + 10000}` },
          items: Array.from({ length: 20 }, (_, j) => ({
            description: `Detailed product description for item ${j + 1} with lots of text `.repeat(5),
            quantity: j + 1,
            unitPrice: 100 + j,
            vatRate: 19,
            lineTotal: (j + 1) * (100 + j)
          })),
          totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 }
        }
      });
      
      // Approach 1: Load all in memory
      const approach1 = async () => {
        if (global.gc) global.gc();
        const startMemory = process.memoryUsage();
        const startTime = Date.now();
        
        // Create all invoices
        const allInvoices = Array.from({ length: totalItems }, (_, i) => createInvoice(i));
        
        // Process all
        const results = await Promise.all(
          allInvoices.map(invoice => einvoice.validateInvoice(invoice))
        );
        
        const endTime = Date.now();
        const endMemory = process.memoryUsage();
        
        return {
          approach: 'Load all in memory',
          time: endTime - startTime,
          peakMemory: (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024,
          processed: results.length,
          memoryPerItem: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / totalItems).toFixed(2)
        };
      };
      
      // Approach 2: Streaming with chunks
      const approach2 = async () => {
        if (global.gc) global.gc();
        const startMemory = process.memoryUsage();
        const startTime = Date.now();
        const chunkSize = 50;
        let processed = 0;
        let peakMemory = 0;
        
        for (let i = 0; i < totalItems; i += chunkSize) {
          // Create chunk on demand
          const chunk = Array.from(
            { length: Math.min(chunkSize, totalItems - i) },
            (_, j) => createInvoice(i + j)
          );
          
          // Process chunk
          await Promise.all(chunk.map(invoice => einvoice.validateInvoice(invoice)));
          processed += chunk.length;
          
          // Track memory
          const currentMemory = process.memoryUsage();
          const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed;
          if (memoryUsed > peakMemory) {
            peakMemory = memoryUsed;
          }
          
          // Allow GC between chunks
          if (global.gc && i % 200 === 0) global.gc();
        }
        
        const endTime = Date.now();
        
        return {
          approach: 'Streaming chunks',
          time: endTime - startTime,
          peakMemory: peakMemory / 1024 / 1024,
          processed,
          memoryPerItem: (peakMemory / 1024 / processed).toFixed(2)
        };
      };
      
      // Approach 3: Generator-based processing
      const approach3 = async () => {
        if (global.gc) global.gc();
        const startMemory = process.memoryUsage();
        const startTime = Date.now();
        let processed = 0;
        let peakMemory = 0;
        
        // Invoice generator
        function* invoiceGenerator() {
          for (let i = 0; i < totalItems; i++) {
            yield createInvoice(i);
          }
        }
        
        // Process using generator
        const batchSize = 20;
        const batch = [];
        
        for (const invoice of invoiceGenerator()) {
          batch.push(einvoice.validateInvoice(invoice));
          
          if (batch.length >= batchSize) {
            await Promise.all(batch);
            processed += batch.length;
            batch.length = 0;
            
            // Track memory
            const currentMemory = process.memoryUsage();
            const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed;
            if (memoryUsed > peakMemory) {
              peakMemory = memoryUsed;
            }
          }
        }
        
        // Process remaining
        if (batch.length > 0) {
          await Promise.all(batch);
          processed += batch.length;
        }
        
        const endTime = Date.now();
        
        return {
          approach: 'Generator-based',
          time: endTime - startTime,
          peakMemory: peakMemory / 1024 / 1024,
          processed,
          memoryPerItem: (peakMemory / 1024 / processed).toFixed(2)
        };
      };
      
      // Execute approaches
      results.approaches.push(await approach1());
      results.approaches.push(await approach2());
      results.approaches.push(await approach3());
      
      // Analyze memory efficiency
      const sortedByMemory = [...results.approaches].sort((a, b) => a.peakMemory - b.peakMemory);
      const sortedBySpeed = [...results.approaches].sort((a, b) => a.time - b.time);
      
      results.memoryProfile = {
        mostMemoryEfficient: sortedByMemory[0].approach,
        fastest: sortedBySpeed[0].approach,
        recommendation: sortedByMemory[0].peakMemory < sortedBySpeed[0].peakMemory * 0.5 ?
          'Use memory-efficient approach for large datasets' :
          'Use fastest approach if memory is not constrained'
      };
      
      return results;
    }
  );
  
  // Test 5: Corpus batch processing
  const corpusBatchProcessing = await performanceTracker.measureAsync(
    'corpus-batch-processing',
    async () => {
      const files = await corpusLoader.getFilesByPattern('**/*.xml');
      const einvoice = new EInvoice();
      const results = {
        totalFiles: files.length,
        batchResults: [],
        overallStats: {
          totalProcessed: 0,
          totalTime: 0,
          failures: 0,
          avgBatchTime: 0
        }
      };
      
      // Process corpus in batches
      const batchSize = 20;
      const maxBatches = 5; // Limit for testing
      const startTime = Date.now();
      
      for (let batchNum = 0; batchNum < maxBatches && batchNum * batchSize < files.length; batchNum++) {
        const batchStart = batchNum * batchSize;
        const batchFiles = files.slice(batchStart, batchStart + batchSize);
        
        const batchStartTime = Date.now();
        const batchResults = {
          batchNumber: batchNum + 1,
          filesInBatch: batchFiles.length,
          processed: 0,
          formats: new Map<string, number>(),
          errors: 0
        };
        
        // Process batch in parallel
        const promises = batchFiles.map(async (file) => {
          try {
            const content = await plugins.fs.readFile(file, 'utf-8');
            const format = await einvoice.detectFormat(content);
            
            if (format && format !== 'unknown') {
              batchResults.formats.set(format, (batchResults.formats.get(format) || 0) + 1);
              
              const invoice = await einvoice.parseInvoice(content, format);
              await einvoice.validateInvoice(invoice);
              
              batchResults.processed++;
              return { success: true, format };
            } else {
              batchResults.errors++;
              return { success: false };
            }
          } catch (error) {
            batchResults.errors++;
            return { success: false, error: error.message };
          }
        });
        
        await Promise.all(promises);
        
        const batchEndTime = Date.now();
        batchResults.batchTime = batchEndTime - batchStartTime;
        batchResults.throughput = (batchResults.processed / (batchResults.batchTime / 1000)).toFixed(2);
        
        results.batchResults.push({
          ...batchResults,
          formats: Array.from(batchResults.formats.entries())
        });
        
        results.overallStats.totalProcessed += batchResults.processed;
        results.overallStats.failures += batchResults.errors;
      }
      
      results.overallStats.totalTime = Date.now() - startTime;
      results.overallStats.avgBatchTime = results.batchResults.length > 0 ?
        results.batchResults.reduce((sum, b) => sum + b.batchTime, 0) / results.batchResults.length : 0;
      
      return results;
    }
  );

  // Summary
  t.comment('\n=== PERF-11: Batch Processing Test Summary ===');
  
  t.comment('\nBatch Size Optimization:');
  t.comment('  Batch Size | Total Time | Processed | Throughput | Avg/Invoice | Avg/Batch');
  t.comment('  -----------|------------|-----------|------------|-------------|----------');
  batchSizeOptimization.result.batchSizes.forEach(size => {
    t.comment(`  ${String(size.batchSize).padEnd(10)} | ${String(size.totalTime + 'ms').padEnd(10)} | ${String(size.processed).padEnd(9)} | ${size.throughput.padEnd(10)}/s | ${size.avgTimePerInvoice.padEnd(11)}ms | ${size.avgTimePerBatch}ms`);
  });
  t.comment(`  Optimal batch size: ${batchSizeOptimization.result.optimalBatchSize} (${batchSizeOptimization.result.maxThroughput.toFixed(2)} ops/sec)`);
  
  t.comment('\nBatch Operation Types:');
  batchOperationTypes.result.operations.forEach(op => {
    t.comment(`  ${op.name}:`);
    t.comment(`    - Avg time: ${op.avgTime}ms (${op.minTime}-${op.maxTime}ms)`);
    t.comment(`    - Throughput: ${op.throughput} ops/sec`);
    t.comment(`    - Per item: ${op.avgPerItem}ms`);
  });
  
  t.comment('\nBatch Error Handling Strategies:');
  t.comment('  Strategy                  | Time   | Processed | Failed | Success Rate | Throughput');
  t.comment('  --------------------------|--------|-----------|--------|--------------|----------');
  batchErrorHandling.result.strategies.forEach(strategy => {
    t.comment(`  ${strategy.name.padEnd(25)} | ${String(strategy.time + 'ms').padEnd(6)} | ${String(strategy.processed).padEnd(9)} | ${String(strategy.failed).padEnd(6)} | ${strategy.successRate.padEnd(12)}% | ${strategy.throughput}/s`);
  });
  t.comment(`  Recommended strategy: ${batchErrorHandling.result.recommendation}`);
  
  t.comment('\nMemory-Efficient Batch Processing:');
  t.comment('  Approach           | Time    | Peak Memory | Processed | Memory/Item');
  t.comment('  -------------------|---------|-------------|-----------|------------');
  memoryEfficientBatch.result.approaches.forEach(approach => {
    t.comment(`  ${approach.approach.padEnd(18)} | ${String(approach.time + 'ms').padEnd(7)} | ${approach.peakMemory.toFixed(2).padEnd(11)}MB | ${String(approach.processed).padEnd(9)} | ${approach.memoryPerItem}KB`);
  });
  t.comment(`  Most memory efficient: ${memoryEfficientBatch.result.memoryProfile.mostMemoryEfficient}`);
  t.comment(`  Fastest: ${memoryEfficientBatch.result.memoryProfile.fastest}`);
  t.comment(`  ${memoryEfficientBatch.result.memoryProfile.recommendation}`);
  
  t.comment('\nCorpus Batch Processing:');
  t.comment(`  Total files: ${corpusBatchProcessing.result.totalFiles}`);
  t.comment(`  Batches processed: ${corpusBatchProcessing.result.batchResults.length}`);
  t.comment('  Batch # | Files | Processed | Errors | Time    | Throughput');
  t.comment('  --------|-------|-----------|--------|---------|----------');
  corpusBatchProcessing.result.batchResults.forEach(batch => {
    t.comment(`  ${String(batch.batchNumber).padEnd(7)} | ${String(batch.filesInBatch).padEnd(5)} | ${String(batch.processed).padEnd(9)} | ${String(batch.errors).padEnd(6)} | ${String(batch.batchTime + 'ms').padEnd(7)} | ${batch.throughput}/s`);
  });
  t.comment(`  Overall:`);
  t.comment(`    - Total processed: ${corpusBatchProcessing.result.overallStats.totalProcessed}`);
  t.comment(`    - Total failures: ${corpusBatchProcessing.result.overallStats.failures}`);
  t.comment(`    - Total time: ${corpusBatchProcessing.result.overallStats.totalTime}ms`);
  t.comment(`    - Avg batch time: ${corpusBatchProcessing.result.overallStats.avgBatchTime.toFixed(2)}ms`);
  
  // Performance targets check
  t.comment('\n=== Performance Targets Check ===');
  const optimalThroughput = batchSizeOptimization.result.maxThroughput;
  const targetThroughput = 50; // Target: >50 ops/sec for batch processing
  
  t.comment(`Batch throughput: ${optimalThroughput.toFixed(2)} ops/sec ${optimalThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput} ops/sec)`);
  
  // Overall performance summary
  t.comment('\n=== Overall Performance Summary ===');
  performanceTracker.logSummary();

  t.end();
});

tap.start();