einvoice/test/suite/einvoice_performance/test.perf-11.batch-processing.ts

685 lines
25 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
/**
* @file test.perf-11.batch-processing.ts
* @description Performance tests for batch processing operations
*/
import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
import * as os from 'os';
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';
const corpusLoader = new CorpusLoader();
const performanceTracker = new PerformanceTracker('PERF-11: Batch Processing');
tap.test('PERF-11: Batch Processing - should handle batch operations efficiently', async (t) => {
// Test 1: Batch size optimization
const batchSizeOptimization = await performanceTracker.measureAsync(
'batch-size-optimization',
async () => {
const einvoice = new EInvoice();
const results = {
batchSizes: [],
optimalBatchSize: 0,
maxThroughput: 0
};
// Create test invoices
const totalInvoices = 500;
const testInvoices = Array.from({ length: totalInvoices }, (_, i) => ({
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `BATCH-${i + 1}`,
issueDate: '2024-03-10',
seller: { name: `Seller ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i}` },
buyer: { name: `Buyer ${i + 1}`, address: 'Address', country: 'US', taxId: `US${i + 1000}` },
items: Array.from({ length: 10 }, (_, j) => ({
description: `Item ${j + 1}`,
quantity: 1,
unitPrice: 100,
vatRate: 10,
lineTotal: 100
})),
totals: { netAmount: 1000, vatAmount: 100, grossAmount: 1100 }
}
}));
// Test different batch sizes
const batchSizes = [1, 5, 10, 20, 50, 100, 200];
for (const batchSize of batchSizes) {
const startTime = Date.now();
let processed = 0;
let errors = 0;
// Process in batches
for (let i = 0; i < testInvoices.length; i += batchSize) {
const batch = testInvoices.slice(i, Math.min(i + batchSize, testInvoices.length));
// Process batch
const batchPromises = batch.map(async (invoice) => {
try {
await einvoice.validateInvoice(invoice);
await einvoice.convertFormat(invoice, 'cii');
processed++;
return true;
} catch (error) {
errors++;
return false;
}
});
await Promise.all(batchPromises);
}
const totalTime = Date.now() - startTime;
const throughput = (processed / (totalTime / 1000));
const result = {
batchSize,
totalTime,
processed,
errors,
throughput: throughput.toFixed(2),
avgTimePerInvoice: (totalTime / processed).toFixed(2),
avgTimePerBatch: (totalTime / Math.ceil(totalInvoices / batchSize)).toFixed(2)
};
results.batchSizes.push(result);
if (throughput > results.maxThroughput) {
results.maxThroughput = throughput;
results.optimalBatchSize = batchSize;
}
}
return results;
}
);
// Test 2: Batch operation types
const batchOperationTypes = await performanceTracker.measureAsync(
'batch-operation-types',
async () => {
const einvoice = new EInvoice();
const results = {
operations: []
};
// Create test data
const batchSize = 50;
const testBatch = Array.from({ length: batchSize }, (_, i) => ({
xml: `<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>BATCH-OP-${i}</ID><IssueDate>2024-03-10</IssueDate></Invoice>`,
invoice: {
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `BATCH-OP-${i}`,
issueDate: '2024-03-10',
seller: { name: 'Batch Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Batch Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }],
totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 }
}
}
}));
// Test different batch operations
const operations = [
{
name: 'Batch format detection',
fn: async (batch: any[]) => {
const promises = batch.map(item => einvoice.detectFormat(item.xml));
return await Promise.all(promises);
}
},
{
name: 'Batch parsing',
fn: async (batch: any[]) => {
const promises = batch.map(item => einvoice.parseInvoice(item.xml, 'ubl'));
return await Promise.all(promises);
}
},
{
name: 'Batch validation',
fn: async (batch: any[]) => {
const promises = batch.map(item => einvoice.validateInvoice(item.invoice));
return await Promise.all(promises);
}
},
{
name: 'Batch conversion',
fn: async (batch: any[]) => {
const promises = batch.map(item => einvoice.convertFormat(item.invoice, 'cii'));
return await Promise.all(promises);
}
},
{
name: 'Batch pipeline',
fn: async (batch: any[]) => {
const promises = batch.map(async (item) => {
const format = await einvoice.detectFormat(item.xml);
const parsed = await einvoice.parseInvoice(item.xml, format || 'ubl');
const validated = await einvoice.validateInvoice(parsed);
const converted = await einvoice.convertFormat(parsed, 'cii');
return { format, validated: validated.isValid, converted: !!converted };
});
return await Promise.all(promises);
}
}
];
for (const operation of operations) {
const iterations = 10;
const times = [];
for (let i = 0; i < iterations; i++) {
const startTime = Date.now();
await operation.fn(testBatch);
const endTime = Date.now();
times.push(endTime - startTime);
}
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
const minTime = Math.min(...times);
const maxTime = Math.max(...times);
results.operations.push({
name: operation.name,
batchSize,
avgTime: avgTime.toFixed(2),
minTime,
maxTime,
throughput: (batchSize / (avgTime / 1000)).toFixed(2),
avgPerItem: (avgTime / batchSize).toFixed(2)
});
}
return results;
}
);
// Test 3: Batch error handling
const batchErrorHandling = await performanceTracker.measureAsync(
'batch-error-handling',
async () => {
const einvoice = new EInvoice();
const results = {
strategies: [],
recommendation: null
};
// Create batch with some invalid invoices
const batchSize = 100;
const errorRate = 0.2; // 20% errors
const testBatch = Array.from({ length: batchSize }, (_, i) => {
const hasError = Math.random() < errorRate;
if (hasError) {
return {
id: i,
invoice: {
format: 'ubl' as const,
data: {
// Invalid invoice - missing required fields
invoiceNumber: `ERROR-${i}`,
items: []
}
}
};
}
return {
id: i,
invoice: {
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `VALID-${i}`,
issueDate: '2024-03-10',
seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: [{ description: 'Item', quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 }],
totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 }
}
}
};
});
// Test different error handling strategies
const strategies = [
{
name: 'Fail fast',
fn: async (batch: any[]) => {
const startTime = Date.now();
const results = [];
try {
for (const item of batch) {
const result = await einvoice.validateInvoice(item.invoice);
if (!result.isValid) {
throw new Error(`Validation failed for invoice ${item.id}`);
}
results.push({ id: item.id, success: true });
}
} catch (error) {
return {
time: Date.now() - startTime,
processed: results.length,
failed: batch.length - results.length,
results
};
}
return {
time: Date.now() - startTime,
processed: results.length,
failed: 0,
results
};
}
},
{
name: 'Continue on error',
fn: async (batch: any[]) => {
const startTime = Date.now();
const results = [];
let failed = 0;
for (const item of batch) {
try {
const result = await einvoice.validateInvoice(item.invoice);
results.push({ id: item.id, success: result.isValid });
if (!result.isValid) failed++;
} catch (error) {
results.push({ id: item.id, success: false, error: error.message });
failed++;
}
}
return {
time: Date.now() - startTime,
processed: results.length,
failed,
results
};
}
},
{
name: 'Parallel with error collection',
fn: async (batch: any[]) => {
const startTime = Date.now();
const promises = batch.map(async (item) => {
try {
const result = await einvoice.validateInvoice(item.invoice);
return { id: item.id, success: result.isValid };
} catch (error) {
return { id: item.id, success: false, error: error.message };
}
});
const results = await Promise.allSettled(promises);
const processed = results.filter(r => r.status === 'fulfilled').map(r => (r as any).value);
const failed = processed.filter(r => !r.success).length;
return {
time: Date.now() - startTime,
processed: processed.length,
failed,
results: processed
};
}
}
];
for (const strategy of strategies) {
const result = await strategy.fn(testBatch);
results.strategies.push({
name: strategy.name,
time: result.time,
processed: result.processed,
failed: result.failed,
successRate: ((result.processed - result.failed) / result.processed * 100).toFixed(2),
throughput: (result.processed / (result.time / 1000)).toFixed(2)
});
}
// Determine best strategy
results.recommendation = results.strategies.reduce((best, current) => {
// Balance between completion and speed
const bestScore = parseFloat(best.successRate) * parseFloat(best.throughput);
const currentScore = parseFloat(current.successRate) * parseFloat(current.throughput);
return currentScore > bestScore ? current.name : best.name;
}, results.strategies[0].name);
return results;
}
);
// Test 4: Memory-efficient batch processing
const memoryEfficientBatch = await performanceTracker.measureAsync(
'memory-efficient-batch',
async () => {
const einvoice = new EInvoice();
const results = {
approaches: [],
memoryProfile: null
};
// Create large dataset
const totalItems = 1000;
const createInvoice = (id: number) => ({
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `MEM-BATCH-${id}`,
issueDate: '2024-03-10',
seller: { name: `Memory Test Seller ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id}` },
buyer: { name: `Memory Test Buyer ${id}`, address: 'Long Address '.repeat(10), country: 'US', taxId: `US${id + 10000}` },
items: Array.from({ length: 20 }, (_, j) => ({
description: `Detailed product description for item ${j + 1} with lots of text `.repeat(5),
quantity: j + 1,
unitPrice: 100 + j,
vatRate: 19,
lineTotal: (j + 1) * (100 + j)
})),
totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 }
}
});
// Approach 1: Load all in memory
const approach1 = async () => {
if (global.gc) global.gc();
const startMemory = process.memoryUsage();
const startTime = Date.now();
// Create all invoices
const allInvoices = Array.from({ length: totalItems }, (_, i) => createInvoice(i));
// Process all
const results = await Promise.all(
allInvoices.map(invoice => einvoice.validateInvoice(invoice))
);
const endTime = Date.now();
const endMemory = process.memoryUsage();
return {
approach: 'Load all in memory',
time: endTime - startTime,
peakMemory: (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024,
processed: results.length,
memoryPerItem: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / totalItems).toFixed(2)
};
};
// Approach 2: Streaming with chunks
const approach2 = async () => {
if (global.gc) global.gc();
const startMemory = process.memoryUsage();
const startTime = Date.now();
const chunkSize = 50;
let processed = 0;
let peakMemory = 0;
for (let i = 0; i < totalItems; i += chunkSize) {
// Create chunk on demand
const chunk = Array.from(
{ length: Math.min(chunkSize, totalItems - i) },
(_, j) => createInvoice(i + j)
);
// Process chunk
await Promise.all(chunk.map(invoice => einvoice.validateInvoice(invoice)));
processed += chunk.length;
// Track memory
const currentMemory = process.memoryUsage();
const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed;
if (memoryUsed > peakMemory) {
peakMemory = memoryUsed;
}
// Allow GC between chunks
if (global.gc && i % 200 === 0) global.gc();
}
const endTime = Date.now();
return {
approach: 'Streaming chunks',
time: endTime - startTime,
peakMemory: peakMemory / 1024 / 1024,
processed,
memoryPerItem: (peakMemory / 1024 / processed).toFixed(2)
};
};
// Approach 3: Generator-based processing
const approach3 = async () => {
if (global.gc) global.gc();
const startMemory = process.memoryUsage();
const startTime = Date.now();
let processed = 0;
let peakMemory = 0;
// Invoice generator
function* invoiceGenerator() {
for (let i = 0; i < totalItems; i++) {
yield createInvoice(i);
}
}
// Process using generator
const batchSize = 20;
const batch = [];
for (const invoice of invoiceGenerator()) {
batch.push(einvoice.validateInvoice(invoice));
if (batch.length >= batchSize) {
await Promise.all(batch);
processed += batch.length;
batch.length = 0;
// Track memory
const currentMemory = process.memoryUsage();
const memoryUsed = currentMemory.heapUsed - startMemory.heapUsed;
if (memoryUsed > peakMemory) {
peakMemory = memoryUsed;
}
}
}
// Process remaining
if (batch.length > 0) {
await Promise.all(batch);
processed += batch.length;
}
const endTime = Date.now();
return {
approach: 'Generator-based',
time: endTime - startTime,
peakMemory: peakMemory / 1024 / 1024,
processed,
memoryPerItem: (peakMemory / 1024 / processed).toFixed(2)
};
};
// Execute approaches
results.approaches.push(await approach1());
results.approaches.push(await approach2());
results.approaches.push(await approach3());
// Analyze memory efficiency
const sortedByMemory = [...results.approaches].sort((a, b) => a.peakMemory - b.peakMemory);
const sortedBySpeed = [...results.approaches].sort((a, b) => a.time - b.time);
results.memoryProfile = {
mostMemoryEfficient: sortedByMemory[0].approach,
fastest: sortedBySpeed[0].approach,
recommendation: sortedByMemory[0].peakMemory < sortedBySpeed[0].peakMemory * 0.5 ?
'Use memory-efficient approach for large datasets' :
'Use fastest approach if memory is not constrained'
};
return results;
}
);
// Test 5: Corpus batch processing
const corpusBatchProcessing = await performanceTracker.measureAsync(
'corpus-batch-processing',
async () => {
const files = await corpusLoader.getFilesByPattern('**/*.xml');
const einvoice = new EInvoice();
const results = {
totalFiles: files.length,
batchResults: [],
overallStats: {
totalProcessed: 0,
totalTime: 0,
failures: 0,
avgBatchTime: 0
}
};
// Process corpus in batches
const batchSize = 20;
const maxBatches = 5; // Limit for testing
const startTime = Date.now();
for (let batchNum = 0; batchNum < maxBatches && batchNum * batchSize < files.length; batchNum++) {
const batchStart = batchNum * batchSize;
const batchFiles = files.slice(batchStart, batchStart + batchSize);
const batchStartTime = Date.now();
const batchResults = {
batchNumber: batchNum + 1,
filesInBatch: batchFiles.length,
processed: 0,
formats: new Map<string, number>(),
errors: 0
};
// Process batch in parallel
const promises = batchFiles.map(async (file) => {
try {
const content = await plugins.fs.readFile(file, 'utf-8');
const format = await einvoice.detectFormat(content);
if (format && format !== 'unknown') {
batchResults.formats.set(format, (batchResults.formats.get(format) || 0) + 1);
const invoice = await einvoice.parseInvoice(content, format);
await einvoice.validateInvoice(invoice);
batchResults.processed++;
return { success: true, format };
} else {
batchResults.errors++;
return { success: false };
}
} catch (error) {
batchResults.errors++;
return { success: false, error: error.message };
}
});
await Promise.all(promises);
const batchEndTime = Date.now();
batchResults.batchTime = batchEndTime - batchStartTime;
batchResults.throughput = (batchResults.processed / (batchResults.batchTime / 1000)).toFixed(2);
results.batchResults.push({
...batchResults,
formats: Array.from(batchResults.formats.entries())
});
results.overallStats.totalProcessed += batchResults.processed;
results.overallStats.failures += batchResults.errors;
}
results.overallStats.totalTime = Date.now() - startTime;
results.overallStats.avgBatchTime = results.batchResults.length > 0 ?
results.batchResults.reduce((sum, b) => sum + b.batchTime, 0) / results.batchResults.length : 0;
return results;
}
);
// Summary
t.comment('\n=== PERF-11: Batch Processing Test Summary ===');
t.comment('\nBatch Size Optimization:');
t.comment(' Batch Size | Total Time | Processed | Throughput | Avg/Invoice | Avg/Batch');
t.comment(' -----------|------------|-----------|------------|-------------|----------');
batchSizeOptimization.result.batchSizes.forEach(size => {
t.comment(` ${String(size.batchSize).padEnd(10)} | ${String(size.totalTime + 'ms').padEnd(10)} | ${String(size.processed).padEnd(9)} | ${size.throughput.padEnd(10)}/s | ${size.avgTimePerInvoice.padEnd(11)}ms | ${size.avgTimePerBatch}ms`);
});
t.comment(` Optimal batch size: ${batchSizeOptimization.result.optimalBatchSize} (${batchSizeOptimization.result.maxThroughput.toFixed(2)} ops/sec)`);
t.comment('\nBatch Operation Types:');
batchOperationTypes.result.operations.forEach(op => {
t.comment(` ${op.name}:`);
t.comment(` - Avg time: ${op.avgTime}ms (${op.minTime}-${op.maxTime}ms)`);
t.comment(` - Throughput: ${op.throughput} ops/sec`);
t.comment(` - Per item: ${op.avgPerItem}ms`);
});
t.comment('\nBatch Error Handling Strategies:');
t.comment(' Strategy | Time | Processed | Failed | Success Rate | Throughput');
t.comment(' --------------------------|--------|-----------|--------|--------------|----------');
batchErrorHandling.result.strategies.forEach(strategy => {
t.comment(` ${strategy.name.padEnd(25)} | ${String(strategy.time + 'ms').padEnd(6)} | ${String(strategy.processed).padEnd(9)} | ${String(strategy.failed).padEnd(6)} | ${strategy.successRate.padEnd(12)}% | ${strategy.throughput}/s`);
});
t.comment(` Recommended strategy: ${batchErrorHandling.result.recommendation}`);
t.comment('\nMemory-Efficient Batch Processing:');
t.comment(' Approach | Time | Peak Memory | Processed | Memory/Item');
t.comment(' -------------------|---------|-------------|-----------|------------');
memoryEfficientBatch.result.approaches.forEach(approach => {
t.comment(` ${approach.approach.padEnd(18)} | ${String(approach.time + 'ms').padEnd(7)} | ${approach.peakMemory.toFixed(2).padEnd(11)}MB | ${String(approach.processed).padEnd(9)} | ${approach.memoryPerItem}KB`);
});
t.comment(` Most memory efficient: ${memoryEfficientBatch.result.memoryProfile.mostMemoryEfficient}`);
t.comment(` Fastest: ${memoryEfficientBatch.result.memoryProfile.fastest}`);
t.comment(` ${memoryEfficientBatch.result.memoryProfile.recommendation}`);
t.comment('\nCorpus Batch Processing:');
t.comment(` Total files: ${corpusBatchProcessing.result.totalFiles}`);
t.comment(` Batches processed: ${corpusBatchProcessing.result.batchResults.length}`);
t.comment(' Batch # | Files | Processed | Errors | Time | Throughput');
t.comment(' --------|-------|-----------|--------|---------|----------');
corpusBatchProcessing.result.batchResults.forEach(batch => {
t.comment(` ${String(batch.batchNumber).padEnd(7)} | ${String(batch.filesInBatch).padEnd(5)} | ${String(batch.processed).padEnd(9)} | ${String(batch.errors).padEnd(6)} | ${String(batch.batchTime + 'ms').padEnd(7)} | ${batch.throughput}/s`);
});
t.comment(` Overall:`);
t.comment(` - Total processed: ${corpusBatchProcessing.result.overallStats.totalProcessed}`);
t.comment(` - Total failures: ${corpusBatchProcessing.result.overallStats.failures}`);
t.comment(` - Total time: ${corpusBatchProcessing.result.overallStats.totalTime}ms`);
t.comment(` - Avg batch time: ${corpusBatchProcessing.result.overallStats.avgBatchTime.toFixed(2)}ms`);
// Performance targets check
t.comment('\n=== Performance Targets Check ===');
const optimalThroughput = batchSizeOptimization.result.maxThroughput;
const targetThroughput = 50; // Target: >50 ops/sec for batch processing
t.comment(`Batch throughput: ${optimalThroughput.toFixed(2)} ops/sec ${optimalThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput} ops/sec)`);
// Overall performance summary
t.comment('\n=== Overall Performance Summary ===');
performanceTracker.logSummary();
t.end();
});
tap.start();