einvoice/test/suite/einvoice_performance/test.perf-08.large-files.ts
2025-05-25 19:45:37 +00:00

680 lines
26 KiB
TypeScript

/**
* @file test.perf-08.large-files.ts
* @description Performance tests for large file processing
*/
import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
const corpusLoader = new CorpusLoader();
const performanceTracker = new PerformanceTracker('PERF-08: Large File Processing');
tap.test('PERF-08: Large File Processing - should handle large files efficiently', async (t) => {
// Test 1: Large PEPPOL file processing
const largePEPPOLProcessing = await performanceTracker.measureAsync(
'large-peppol-processing',
async () => {
const files = await corpusLoader.getFilesByPattern('**/PEPPOL/**/*.xml');
const einvoice = new EInvoice();
const results = {
files: [],
memoryProfile: {
baseline: 0,
peak: 0,
increments: []
}
};
// Get baseline memory
if (global.gc) global.gc();
const baselineMemory = process.memoryUsage();
results.memoryProfile.baseline = baselineMemory.heapUsed / 1024 / 1024;
// Process PEPPOL files (known to be large)
for (const file of files) {
try {
const startTime = Date.now();
const startMemory = process.memoryUsage();
// Read file
const content = await plugins.fs.readFile(file, 'utf-8');
const fileSize = Buffer.byteLength(content, 'utf-8');
// Process file
const format = await einvoice.detectFormat(content);
const parseStart = Date.now();
const invoice = await einvoice.parseInvoice(content, format || 'ubl');
const parseEnd = Date.now();
const validationStart = Date.now();
const validationResult = await einvoice.validateInvoice(invoice);
const validationEnd = Date.now();
const endMemory = process.memoryUsage();
const totalTime = Date.now() - startTime;
const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024;
if (endMemory.heapUsed > results.memoryProfile.peak) {
results.memoryProfile.peak = endMemory.heapUsed / 1024 / 1024;
}
results.files.push({
path: file,
sizeKB: (fileSize / 1024).toFixed(2),
sizeMB: (fileSize / 1024 / 1024).toFixed(2),
format,
processingTime: totalTime,
parseTime: parseEnd - parseStart,
validationTime: validationEnd - validationStart,
memoryUsedMB: memoryUsed.toFixed(2),
throughputMBps: ((fileSize / 1024 / 1024) / (totalTime / 1000)).toFixed(2),
itemCount: invoice.data.items?.length || 0,
valid: validationResult.isValid
});
results.memoryProfile.increments.push(memoryUsed);
} catch (error) {
results.files.push({
path: file,
error: error.message
});
}
}
return results;
}
);
// Test 2: Synthetic large file generation and processing
const syntheticLargeFiles = await performanceTracker.measureAsync(
'synthetic-large-files',
async () => {
const einvoice = new EInvoice();
const results = {
tests: [],
scalingAnalysis: null
};
// Generate invoices of increasing size
const sizes = [
{ items: 100, name: '100 items' },
{ items: 500, name: '500 items' },
{ items: 1000, name: '1K items' },
{ items: 5000, name: '5K items' },
{ items: 10000, name: '10K items' }
];
for (const size of sizes) {
// Generate large invoice
const invoice = {
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `LARGE-${size.items}`,
issueDate: '2024-02-25',
dueDate: '2024-03-25',
currency: 'EUR',
seller: {
name: 'Large File Test Seller Corporation International GmbH',
address: 'Hauptstraße 123-125, Building A, Floor 5',
city: 'Berlin',
postalCode: '10115',
country: 'DE',
taxId: 'DE123456789',
registrationNumber: 'HRB123456',
email: 'invoicing@largetest.de',
phone: '+49 30 123456789',
bankAccount: {
iban: 'DE89370400440532013000',
bic: 'COBADEFFXXX',
bankName: 'Commerzbank AG'
}
},
buyer: {
name: 'Large File Test Buyer Enterprises Ltd.',
address: '456 Commerce Boulevard, Suite 789',
city: 'Munich',
postalCode: '80331',
country: 'DE',
taxId: 'DE987654321',
registrationNumber: 'HRB654321',
email: 'ap@largebuyer.de',
phone: '+49 89 987654321'
},
items: Array.from({ length: size.items }, (_, i) => ({
itemId: `ITEM-${String(i + 1).padStart(6, '0')}`,
description: `Product Item Number ${i + 1} - Detailed description with technical specifications, compliance information, country of origin, weight, dimensions, and special handling instructions. This is a very detailed description to simulate real-world invoice data with comprehensive product information.`,
quantity: Math.floor(Math.random() * 100) + 1,
unitPrice: Math.random() * 1000,
vatRate: [0, 7, 19][Math.floor(Math.random() * 3)],
lineTotal: 0,
additionalInfo: {
weight: `${(Math.random() * 50).toFixed(2)}kg`,
dimensions: `${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}x${Math.floor(Math.random() * 100)}cm`,
countryOfOrigin: ['DE', 'FR', 'IT', 'CN', 'US'][Math.floor(Math.random() * 5)],
customsCode: `${Math.floor(Math.random() * 9000000000) + 1000000000}`,
serialNumber: `SN-${Date.now()}-${i}`,
batchNumber: `BATCH-${Math.floor(i / 100)}`
}
})),
totals: { netAmount: 0, vatAmount: 0, grossAmount: 0 },
notes: 'This is a large invoice generated for performance testing purposes. ' +
'It contains a significant number of line items to test the system\'s ability ' +
'to handle large documents efficiently.'
}
};
// Calculate totals
invoice.data.items.forEach(item => {
item.lineTotal = item.quantity * item.unitPrice;
invoice.data.totals.netAmount += item.lineTotal;
invoice.data.totals.vatAmount += item.lineTotal * (item.vatRate / 100);
});
invoice.data.totals.grossAmount = invoice.data.totals.netAmount + invoice.data.totals.vatAmount;
// Measure processing
if (global.gc) global.gc();
const startMemory = process.memoryUsage();
const startTime = Date.now();
// Generate XML
const xmlStart = Date.now();
const xml = await einvoice.generateXML(invoice);
const xmlEnd = Date.now();
const xmlSize = Buffer.byteLength(xml, 'utf-8');
// Parse back
const parseStart = Date.now();
const parsed = await einvoice.parseInvoice(xml, 'ubl');
const parseEnd = Date.now();
// Validate
const validateStart = Date.now();
const validation = await einvoice.validateInvoice(parsed);
const validateEnd = Date.now();
// Convert
const convertStart = Date.now();
const converted = await einvoice.convertFormat(parsed, 'cii');
const convertEnd = Date.now();
const endTime = Date.now();
const endMemory = process.memoryUsage();
results.tests.push({
size: size.name,
items: size.items,
xmlSizeMB: (xmlSize / 1024 / 1024).toFixed(2),
totalTime: endTime - startTime,
xmlGeneration: xmlEnd - xmlStart,
parsing: parseEnd - parseStart,
validation: validateEnd - validateStart,
conversion: convertEnd - convertStart,
memoryUsedMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2),
memoryPerItemKB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / size.items).toFixed(2),
throughputMBps: ((xmlSize / 1024 / 1024) / ((endTime - startTime) / 1000)).toFixed(2),
valid: validation.isValid
});
}
// Analyze scaling
if (results.tests.length >= 3) {
const points = results.tests.map(t => ({
x: t.items,
y: t.totalTime
}));
// Simple linear regression
const n = points.length;
const sumX = points.reduce((sum, p) => sum + p.x, 0);
const sumY = points.reduce((sum, p) => sum + p.y, 0);
const sumXY = points.reduce((sum, p) => sum + p.x * p.y, 0);
const sumX2 = points.reduce((sum, p) => sum + p.x * p.x, 0);
const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX);
const intercept = (sumY - slope * sumX) / n;
results.scalingAnalysis = {
type: slope < 0.5 ? 'Sub-linear' : slope <= 1.5 ? 'Linear' : 'Super-linear',
formula: `Time(ms) = ${slope.toFixed(3)} * items + ${intercept.toFixed(2)}`,
msPerItem: slope.toFixed(3)
};
}
return results;
}
);
// Test 3: Memory-efficient large file streaming
const streamingLargeFiles = await performanceTracker.measureAsync(
'streaming-large-files',
async () => {
const einvoice = new EInvoice();
const results = {
streamingSupported: false,
chunkProcessing: [],
memoryEfficiency: null
};
// Simulate large file processing in chunks
const totalItems = 10000;
const chunkSizes = [100, 500, 1000, 2000];
for (const chunkSize of chunkSizes) {
const chunks = Math.ceil(totalItems / chunkSize);
const startTime = Date.now();
const startMemory = process.memoryUsage();
let peakMemory = startMemory.heapUsed;
// Process in chunks
const chunkResults = [];
for (let chunk = 0; chunk < chunks; chunk++) {
const startItem = chunk * chunkSize;
const endItem = Math.min(startItem + chunkSize, totalItems);
// Create chunk invoice
const chunkInvoice = {
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `CHUNK-${chunk}`,
issueDate: '2024-02-25',
seller: { name: 'Chunk Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Chunk Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: Array.from({ length: endItem - startItem }, (_, i) => ({
description: `Chunk ${chunk} Item ${i + 1}`,
quantity: 1,
unitPrice: 100,
vatRate: 19,
lineTotal: 100
})),
totals: {
netAmount: (endItem - startItem) * 100,
vatAmount: (endItem - startItem) * 19,
grossAmount: (endItem - startItem) * 119
}
}
};
// Process chunk
const chunkStart = Date.now();
await einvoice.validateInvoice(chunkInvoice);
const chunkEnd = Date.now();
chunkResults.push({
chunk,
items: endItem - startItem,
duration: chunkEnd - chunkStart
});
// Track peak memory
const currentMemory = process.memoryUsage();
if (currentMemory.heapUsed > peakMemory) {
peakMemory = currentMemory.heapUsed;
}
// Simulate cleanup between chunks
if (global.gc) global.gc();
}
const totalDuration = Date.now() - startTime;
const memoryIncrease = (peakMemory - startMemory.heapUsed) / 1024 / 1024;
results.chunkProcessing.push({
chunkSize,
chunks,
totalItems,
totalDuration,
avgChunkTime: chunkResults.reduce((sum, r) => sum + r.duration, 0) / chunkResults.length,
throughput: (totalItems / (totalDuration / 1000)).toFixed(2),
peakMemoryMB: (peakMemory / 1024 / 1024).toFixed(2),
memoryIncreaseMB: memoryIncrease.toFixed(2),
memoryPerItemKB: ((memoryIncrease * 1024) / totalItems).toFixed(3)
});
}
// Analyze memory efficiency
if (results.chunkProcessing.length > 0) {
const smallChunk = results.chunkProcessing[0];
const largeChunk = results.chunkProcessing[results.chunkProcessing.length - 1];
results.memoryEfficiency = {
smallChunkMemory: smallChunk.memoryIncreaseMB,
largeChunkMemory: largeChunk.memoryIncreaseMB,
memoryScaling: (parseFloat(largeChunk.memoryIncreaseMB) / parseFloat(smallChunk.memoryIncreaseMB)).toFixed(2),
recommendation: parseFloat(largeChunk.memoryIncreaseMB) < parseFloat(smallChunk.memoryIncreaseMB) * 2 ?
'Use larger chunks for better memory efficiency' :
'Use smaller chunks to reduce memory usage'
};
}
return results;
}
);
// Test 4: Corpus large file analysis
const corpusLargeFiles = await performanceTracker.measureAsync(
'corpus-large-file-analysis',
async () => {
const files = await corpusLoader.getFilesByPattern('**/*.xml');
const einvoice = new EInvoice();
const results = {
totalFiles: 0,
largeFiles: [],
sizeDistribution: {
tiny: { count: 0, maxSize: 10 * 1024 }, // < 10KB
small: { count: 0, maxSize: 100 * 1024 }, // < 100KB
medium: { count: 0, maxSize: 1024 * 1024 }, // < 1MB
large: { count: 0, maxSize: 10 * 1024 * 1024 }, // < 10MB
huge: { count: 0, maxSize: Infinity } // >= 10MB
},
processingStats: {
avgTimePerKB: 0,
avgMemoryPerKB: 0
}
};
// Analyze all files
const fileSizes = [];
const processingMetrics = [];
for (const file of files) {
try {
const stats = await plugins.fs.stat(file);
const fileSize = stats.size;
results.totalFiles++;
// Categorize by size
if (fileSize < results.sizeDistribution.tiny.maxSize) {
results.sizeDistribution.tiny.count++;
} else if (fileSize < results.sizeDistribution.small.maxSize) {
results.sizeDistribution.small.count++;
} else if (fileSize < results.sizeDistribution.medium.maxSize) {
results.sizeDistribution.medium.count++;
} else if (fileSize < results.sizeDistribution.large.maxSize) {
results.sizeDistribution.large.count++;
} else {
results.sizeDistribution.huge.count++;
}
// Process large files
if (fileSize > 100 * 1024) { // Process files > 100KB
const content = await plugins.fs.readFile(file, 'utf-8');
const startTime = Date.now();
const startMemory = process.memoryUsage();
const format = await einvoice.detectFormat(content);
if (format && format !== 'unknown') {
const invoice = await einvoice.parseInvoice(content, format);
await einvoice.validateInvoice(invoice);
}
const endTime = Date.now();
const endMemory = process.memoryUsage();
const processingTime = endTime - startTime;
const memoryUsed = (endMemory.heapUsed - startMemory.heapUsed) / 1024; // KB
results.largeFiles.push({
path: file,
sizeKB: (fileSize / 1024).toFixed(2),
format,
processingTime,
memoryUsedKB: memoryUsed.toFixed(2),
timePerKB: (processingTime / (fileSize / 1024)).toFixed(3),
throughputKBps: ((fileSize / 1024) / (processingTime / 1000)).toFixed(2)
});
processingMetrics.push({
size: fileSize,
time: processingTime,
memory: memoryUsed
});
}
fileSizes.push(fileSize);
} catch (error) {
// Skip files that can't be processed
}
}
// Calculate statistics
if (processingMetrics.length > 0) {
const totalSize = processingMetrics.reduce((sum, m) => sum + m.size, 0);
const totalTime = processingMetrics.reduce((sum, m) => sum + m.time, 0);
const totalMemory = processingMetrics.reduce((sum, m) => sum + m.memory, 0);
results.processingStats.avgTimePerKB = (totalTime / (totalSize / 1024)).toFixed(3);
results.processingStats.avgMemoryPerKB = (totalMemory / (totalSize / 1024)).toFixed(3);
}
// Sort large files by size
results.largeFiles.sort((a, b) => parseFloat(b.sizeKB) - parseFloat(a.sizeKB));
return {
...results,
largeFiles: results.largeFiles.slice(0, 10), // Top 10 largest
avgFileSizeKB: fileSizes.length > 0 ?
(fileSizes.reduce((a, b) => a + b, 0) / fileSizes.length / 1024).toFixed(2) : 0
};
}
);
// Test 5: Stress test with extreme sizes
const extremeSizeStressTest = await performanceTracker.measureAsync(
'extreme-size-stress-test',
async () => {
const einvoice = new EInvoice();
const results = {
tests: [],
limits: {
maxItemsProcessed: 0,
maxSizeProcessedMB: 0,
failurePoint: null
}
};
// Test extreme scenarios
const extremeScenarios = [
{
name: 'Wide invoice (many items)',
generator: (count: number) => ({
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `EXTREME-WIDE-${count}`,
issueDate: '2024-02-25',
seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: Array.from({ length: count }, (_, i) => ({
description: `Item ${i + 1}`,
quantity: 1,
unitPrice: 10,
vatRate: 10,
lineTotal: 10
})),
totals: { netAmount: count * 10, vatAmount: count, grossAmount: count * 11 }
}
})
},
{
name: 'Deep invoice (long descriptions)',
generator: (size: number) => ({
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: `EXTREME-DEEP-${size}`,
issueDate: '2024-02-25',
seller: { name: 'Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: [{
description: 'A'.repeat(size * 1024), // Size in KB
quantity: 1,
unitPrice: 100,
vatRate: 10,
lineTotal: 100
}],
totals: { netAmount: 100, vatAmount: 10, grossAmount: 110 }
}
})
}
];
// Test each scenario
for (const scenario of extremeScenarios) {
const testResults = {
scenario: scenario.name,
tests: []
};
// Test increasing sizes
const sizes = scenario.name.includes('Wide') ?
[1000, 5000, 10000, 20000, 50000] :
[100, 500, 1000, 2000, 5000]; // KB
for (const size of sizes) {
try {
const invoice = scenario.generator(size);
const startTime = Date.now();
const startMemory = process.memoryUsage();
// Try to process
const xml = await einvoice.generateXML(invoice);
const xmlSize = Buffer.byteLength(xml, 'utf-8') / 1024 / 1024; // MB
const parsed = await einvoice.parseInvoice(xml, invoice.format);
await einvoice.validateInvoice(parsed);
const endTime = Date.now();
const endMemory = process.memoryUsage();
testResults.tests.push({
size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`,
success: true,
time: endTime - startTime,
memoryMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2),
xmlSizeMB: xmlSize.toFixed(2)
});
// Update limits
if (scenario.name.includes('Wide') && size > results.limits.maxItemsProcessed) {
results.limits.maxItemsProcessed = size;
}
if (xmlSize > results.limits.maxSizeProcessedMB) {
results.limits.maxSizeProcessedMB = xmlSize;
}
} catch (error) {
testResults.tests.push({
size: scenario.name.includes('Wide') ? `${size} items` : `${size}KB text`,
success: false,
error: error.message
});
if (!results.limits.failurePoint) {
results.limits.failurePoint = {
scenario: scenario.name,
size,
error: error.message
};
}
break; // Stop testing larger sizes after failure
}
}
results.tests.push(testResults);
}
return results;
}
);
// Summary
t.comment('\n=== PERF-08: Large File Processing Test Summary ===');
if (largePEPPOLProcessing.result.files.length > 0) {
t.comment('\nLarge PEPPOL File Processing:');
largePEPPOLProcessing.result.files.forEach(file => {
if (!file.error) {
t.comment(` ${file.path.split('/').pop()}:`);
t.comment(` - Size: ${file.sizeMB}MB, Items: ${file.itemCount}`);
t.comment(` - Processing: ${file.processingTime}ms (parse: ${file.parseTime}ms, validate: ${file.validationTime}ms)`);
t.comment(` - Throughput: ${file.throughputMBps}MB/s`);
t.comment(` - Memory used: ${file.memoryUsedMB}MB`);
}
});
t.comment(` Peak memory: ${largePEPPOLProcessing.result.memoryProfile.peak.toFixed(2)}MB`);
}
t.comment('\nSynthetic Large File Scaling:');
t.comment(' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput');
t.comment(' ----------|----------|------------|--------|----------|---------|--------|----------');
syntheticLargeFiles.result.tests.forEach(test => {
t.comment(` ${test.size.padEnd(9)} | ${test.xmlSizeMB.padEnd(8)}MB | ${String(test.totalTime + 'ms').padEnd(10)} | ${String(test.parsing + 'ms').padEnd(6)} | ${String(test.validation + 'ms').padEnd(8)} | ${String(test.conversion + 'ms').padEnd(7)} | ${test.memoryUsedMB.padEnd(6)}MB | ${test.throughputMBps}MB/s`);
});
if (syntheticLargeFiles.result.scalingAnalysis) {
t.comment(` Scaling: ${syntheticLargeFiles.result.scalingAnalysis.type}`);
t.comment(` Formula: ${syntheticLargeFiles.result.scalingAnalysis.formula}`);
}
t.comment('\nChunked Processing Efficiency:');
t.comment(' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item');
t.comment(' -----------|--------|----------|------------|-------------|------------');
streamingLargeFiles.result.chunkProcessing.forEach(chunk => {
t.comment(` ${String(chunk.chunkSize).padEnd(10)} | ${String(chunk.chunks).padEnd(6)} | ${String(chunk.totalDuration + 'ms').padEnd(8)} | ${chunk.throughput.padEnd(10)}/s | ${chunk.peakMemoryMB.padEnd(11)}MB | ${chunk.memoryPerItemKB}KB`);
});
if (streamingLargeFiles.result.memoryEfficiency) {
t.comment(` Recommendation: ${streamingLargeFiles.result.memoryEfficiency.recommendation}`);
}
t.comment('\nCorpus Large File Analysis:');
t.comment(` Total files: ${corpusLargeFiles.result.totalFiles}`);
t.comment(` Size distribution:`);
Object.entries(corpusLargeFiles.result.sizeDistribution).forEach(([size, data]: [string, any]) => {
t.comment(` - ${size}: ${data.count} files`);
});
t.comment(` Largest processed files:`);
corpusLargeFiles.result.largeFiles.slice(0, 5).forEach(file => {
t.comment(` - ${file.path.split('/').pop()}: ${file.sizeKB}KB, ${file.processingTime}ms, ${file.throughputKBps}KB/s`);
});
t.comment(` Average processing: ${corpusLargeFiles.result.processingStats.avgTimePerKB}ms/KB`);
t.comment('\nExtreme Size Stress Test:');
extremeSizeStressTest.result.tests.forEach(scenario => {
t.comment(` ${scenario.scenario}:`);
scenario.tests.forEach(test => {
t.comment(` - ${test.size}: ${test.success ? `${test.time}ms, ${test.xmlSizeMB}MB XML` : `${test.error}`}`);
});
});
t.comment(` Limits:`);
t.comment(` - Max items processed: ${extremeSizeStressTest.result.limits.maxItemsProcessed}`);
t.comment(` - Max size processed: ${extremeSizeStressTest.result.limits.maxSizeProcessedMB.toFixed(2)}MB`);
if (extremeSizeStressTest.result.limits.failurePoint) {
t.comment(` - Failure point: ${extremeSizeStressTest.result.limits.failurePoint.scenario} at ${extremeSizeStressTest.result.limits.failurePoint.size}`);
}
// Performance targets check
t.comment('\n=== Performance Targets Check ===');
const largeFileThroughput = syntheticLargeFiles.result.tests.length > 0 ?
parseFloat(syntheticLargeFiles.result.tests[syntheticLargeFiles.result.tests.length - 1].throughputMBps) : 0;
const targetThroughput = 1; // Target: >1MB/s for large files
t.comment(`Large file throughput: ${largeFileThroughput}MB/s ${largeFileThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput}MB/s)`);
// Overall performance summary
t.comment('\n=== Overall Performance Summary ===');
performanceTracker.logSummary();
t.end();
});
tap.start();