This commit is contained in:
2025-05-29 13:35:36 +00:00
parent 756964aabd
commit 960bbc2208
15 changed files with 2373 additions and 3396 deletions

View File

@ -8,17 +8,95 @@ import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
import { FormatDetector } from '../../../ts/formats/utils/format.detector.js';
const corpusLoader = new CorpusLoader();
const performanceTracker = new PerformanceTracker('PERF-08: Large File Processing');
// Helper function to create UBL invoice XML
function createUBLInvoiceXML(data: any): string {
const items = data.items.map((item: any, idx: number) => `
<cac:InvoiceLine>
<cbc:ID>${idx + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">${item.quantity}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="${data.currency || 'EUR'}">${item.lineTotal}</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Description>${item.description}</cbc:Description>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="${data.currency || 'EUR'}">${item.unitPrice}</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>`).join('');
return `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:ID>${data.invoiceNumber}</cbc:ID>
<cbc:IssueDate>${data.issueDate}</cbc:IssueDate>
<cbc:DueDate>${data.dueDate || data.issueDate}</cbc:DueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>${data.currency || 'EUR'}</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>${data.seller.name}</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>${data.seller.address}</cbc:StreetName>
<cbc:CityName>${data.seller.city || ''}</cbc:CityName>
<cbc:PostalZone>${data.seller.postalCode || ''}</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>${data.seller.country}</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
<cac:PartyTaxScheme>
<cbc:CompanyID>${data.seller.taxId}</cbc:CompanyID>
<cac:TaxScheme>
<cbc:ID>VAT</cbc:ID>
</cac:TaxScheme>
</cac:PartyTaxScheme>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>${data.buyer.name}</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>${data.buyer.address}</cbc:StreetName>
<cbc:CityName>${data.buyer.city || ''}</cbc:CityName>
<cbc:PostalZone>${data.buyer.postalCode || ''}</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>${data.buyer.country}</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
<cac:PartyTaxScheme>
<cbc:CompanyID>${data.buyer.taxId}</cbc:CompanyID>
<cac:TaxScheme>
<cbc:ID>VAT</cbc:ID>
</cac:TaxScheme>
</cac:PartyTaxScheme>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:TaxTotal>
<cbc:TaxAmount currencyID="${data.currency || 'EUR'}">${data.totals.vatAmount}</cbc:TaxAmount>
</cac:TaxTotal>
<cac:LegalMonetaryTotal>
<cbc:TaxExclusiveAmount currencyID="${data.currency || 'EUR'}">${data.totals.netAmount}</cbc:TaxExclusiveAmount>
<cbc:TaxInclusiveAmount currencyID="${data.currency || 'EUR'}">${data.totals.grossAmount}</cbc:TaxInclusiveAmount>
<cbc:PayableAmount currencyID="${data.currency || 'EUR'}">${data.totals.grossAmount}</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
${items}
</Invoice>`;
}
tap.test('PERF-08: Large File Processing - should handle large files efficiently', async (t) => {
// Test 1: Large PEPPOL file processing
const largePEPPOLProcessing = await performanceTracker.measureAsync(
'large-peppol-processing',
async () => {
const files = await corpusLoader.getFilesByPattern('**/PEPPOL/**/*.xml');
const einvoice = new EInvoice();
const files = await CorpusLoader.loadPattern('**/PEPPOL/**/*.xml');
const results = {
files: [],
memoryProfile: {
@ -40,17 +118,17 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const startMemory = process.memoryUsage();
// Read file
const content = await plugins.fs.readFile(file, 'utf-8');
const content = await plugins.fs.readFile(file.path, 'utf-8');
const fileSize = Buffer.byteLength(content, 'utf-8');
// Process file
const format = await einvoice.detectFormat(content);
const format = FormatDetector.detectFormat(content);
const parseStart = Date.now();
const invoice = await einvoice.parseInvoice(content, format || 'ubl');
const einvoice = await EInvoice.fromXml(content);
const parseEnd = Date.now();
const validationStart = Date.now();
const validationResult = await einvoice.validateInvoice(invoice);
const validationResult = await einvoice.validate();
const validationEnd = Date.now();
const endMemory = process.memoryUsage();
@ -71,8 +149,8 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
validationTime: validationEnd - validationStart,
memoryUsedMB: memoryUsed.toFixed(2),
throughputMBps: ((fileSize / 1024 / 1024) / (totalTime / 1000)).toFixed(2),
itemCount: invoice.data.items?.length || 0,
valid: validationResult.isValid
itemCount: einvoice.data.items?.length || 0,
valid: validationResult.valid
});
results.memoryProfile.increments.push(memoryUsed);
@ -93,7 +171,6 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const syntheticLargeFiles = await performanceTracker.measureAsync(
'synthetic-large-files',
async () => {
const einvoice = new EInvoice();
const results = {
tests: [],
scalingAnalysis: null
@ -183,23 +260,23 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
// Generate XML
const xmlStart = Date.now();
const xml = await einvoice.generateXML(invoice);
const xml = createUBLInvoiceXML(invoice.data);
const xmlEnd = Date.now();
const xmlSize = Buffer.byteLength(xml, 'utf-8');
// Parse back
const parseStart = Date.now();
const parsed = await einvoice.parseInvoice(xml, 'ubl');
const parsed = await EInvoice.fromXml(xml);
const parseEnd = Date.now();
// Validate
const validateStart = Date.now();
const validation = await einvoice.validateInvoice(parsed);
const validation = await parsed.validate();
const validateEnd = Date.now();
// Convert
const convertStart = Date.now();
const converted = await einvoice.convertFormat(parsed, 'cii');
await parsed.toXmlString('cii'); // Test conversion performance
const convertEnd = Date.now();
const endTime = Date.now();
@ -217,7 +294,7 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
memoryUsedMB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024).toFixed(2),
memoryPerItemKB: ((endMemory.heapUsed - startMemory.heapUsed) / 1024 / size.items).toFixed(2),
throughputMBps: ((xmlSize / 1024 / 1024) / ((endTime - startTime) / 1000)).toFixed(2),
valid: validation.isValid
valid: validation.valid
});
}
@ -253,7 +330,6 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const streamingLargeFiles = await performanceTracker.measureAsync(
'streaming-large-files',
async () => {
const einvoice = new EInvoice();
const results = {
streamingSupported: false,
chunkProcessing: [],
@ -303,7 +379,9 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
// Process chunk
const chunkStart = Date.now();
await einvoice.validateInvoice(chunkInvoice);
const chunkXml = createUBLInvoiceXML(chunkInvoice.data);
const chunkEInvoice = await EInvoice.fromXml(chunkXml);
await chunkEInvoice.validate();
const chunkEnd = Date.now();
chunkResults.push({
@ -361,8 +439,7 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const corpusLargeFiles = await performanceTracker.measureAsync(
'corpus-large-file-analysis',
async () => {
const files = await corpusLoader.getFilesByPattern('**/*.xml');
const einvoice = new EInvoice();
const files = await CorpusLoader.loadPattern('**/*.xml');
const results = {
totalFiles: 0,
largeFiles: [],
@ -385,7 +462,7 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
for (const file of files) {
try {
const stats = await plugins.fs.stat(file);
const stats = await plugins.fs.stat(file.path);
const fileSize = stats.size;
results.totalFiles++;
@ -404,15 +481,15 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
// Process large files
if (fileSize > 100 * 1024) { // Process files > 100KB
const content = await plugins.fs.readFile(file, 'utf-8');
const content = await plugins.fs.readFile(file.path, 'utf-8');
const startTime = Date.now();
const startMemory = process.memoryUsage();
const format = await einvoice.detectFormat(content);
const format = FormatDetector.detectFormat(content);
if (format && format !== 'unknown') {
const invoice = await einvoice.parseInvoice(content, format);
await einvoice.validateInvoice(invoice);
const invoice = await EInvoice.fromXml(content);
await invoice.validate();
}
const endTime = Date.now();
@ -451,8 +528,8 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const totalTime = processingMetrics.reduce((sum, m) => sum + m.time, 0);
const totalMemory = processingMetrics.reduce((sum, m) => sum + m.memory, 0);
results.processingStats.avgTimePerKB = (totalTime / (totalSize / 1024)).toFixed(3);
results.processingStats.avgMemoryPerKB = (totalMemory / (totalSize / 1024)).toFixed(3);
results.processingStats.avgTimePerKB = parseFloat((totalTime / (totalSize / 1024)).toFixed(3));
results.processingStats.avgMemoryPerKB = parseFloat((totalMemory / (totalSize / 1024)).toFixed(3));
}
// Sort large files by size
@ -471,7 +548,6 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const extremeSizeStressTest = await performanceTracker.measureAsync(
'extreme-size-stress-test',
async () => {
const einvoice = new EInvoice();
const results = {
tests: [],
limits: {
@ -546,12 +622,14 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
const startTime = Date.now();
const startMemory = process.memoryUsage();
// Try to process
const xml = await einvoice.generateXML(invoice);
// Try to process - create XML from invoice data
// Since we have invoice data, we need to convert it to XML
// For now, we'll create a simple UBL invoice XML
const xml = createUBLInvoiceXML(invoice.data);
const xmlSize = Buffer.byteLength(xml, 'utf-8') / 1024 / 1024; // MB
const parsed = await einvoice.parseInvoice(xml, invoice.format);
await einvoice.validateInvoice(parsed);
const parsed = await EInvoice.fromXml(xml);
await parsed.validate();
const endTime = Date.now();
const endMemory = process.memoryUsage();
@ -599,82 +677,82 @@ tap.test('PERF-08: Large File Processing - should handle large files efficiently
);
// Summary
t.comment('\n=== PERF-08: Large File Processing Test Summary ===');
console.log('\n=== PERF-08: Large File Processing Test Summary ===');
if (largePEPPOLProcessing.result.files.length > 0) {
t.comment('\nLarge PEPPOL File Processing:');
largePEPPOLProcessing.result.files.forEach(file => {
if (largePEPPOLProcessing.files.length > 0) {
console.log('\nLarge PEPPOL File Processing:');
largePEPPOLProcessing.files.forEach(file => {
if (!file.error) {
t.comment(` ${file.path.split('/').pop()}:`);
t.comment(` - Size: ${file.sizeMB}MB, Items: ${file.itemCount}`);
t.comment(` - Processing: ${file.processingTime}ms (parse: ${file.parseTime}ms, validate: ${file.validationTime}ms)`);
t.comment(` - Throughput: ${file.throughputMBps}MB/s`);
t.comment(` - Memory used: ${file.memoryUsedMB}MB`);
console.log(` ${file.path.split('/').pop()}:`);
console.log(` - Size: ${file.sizeMB}MB, Items: ${file.itemCount}`);
console.log(` - Processing: ${file.processingTime}ms (parse: ${file.parseTime}ms, validate: ${file.validationTime}ms)`);
console.log(` - Throughput: ${file.throughputMBps}MB/s`);
console.log(` - Memory used: ${file.memoryUsedMB}MB`);
}
});
t.comment(` Peak memory: ${largePEPPOLProcessing.result.memoryProfile.peak.toFixed(2)}MB`);
console.log(` Peak memory: ${largePEPPOLProcessing.memoryProfile.peak.toFixed(2)}MB`);
}
t.comment('\nSynthetic Large File Scaling:');
t.comment(' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput');
t.comment(' ----------|----------|------------|--------|----------|---------|--------|----------');
syntheticLargeFiles.result.tests.forEach(test => {
t.comment(` ${test.size.padEnd(9)} | ${test.xmlSizeMB.padEnd(8)}MB | ${String(test.totalTime + 'ms').padEnd(10)} | ${String(test.parsing + 'ms').padEnd(6)} | ${String(test.validation + 'ms').padEnd(8)} | ${String(test.conversion + 'ms').padEnd(7)} | ${test.memoryUsedMB.padEnd(6)}MB | ${test.throughputMBps}MB/s`);
console.log('\nSynthetic Large File Scaling:');
console.log(' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput');
console.log(' ----------|----------|------------|--------|----------|---------|--------|----------');
syntheticLargeFiles.tests.forEach((test: any) => {
console.log(` ${test.size.padEnd(9)} | ${test.xmlSizeMB.padEnd(8)}MB | ${String(test.totalTime + 'ms').padEnd(10)} | ${String(test.parsing + 'ms').padEnd(6)} | ${String(test.validation + 'ms').padEnd(8)} | ${String(test.conversion + 'ms').padEnd(7)} | ${test.memoryUsedMB.padEnd(6)}MB | ${test.throughputMBps}MB/s`);
});
if (syntheticLargeFiles.result.scalingAnalysis) {
t.comment(` Scaling: ${syntheticLargeFiles.result.scalingAnalysis.type}`);
t.comment(` Formula: ${syntheticLargeFiles.result.scalingAnalysis.formula}`);
if (syntheticLargeFiles.scalingAnalysis) {
console.log(` Scaling: ${syntheticLargeFiles.scalingAnalysis.type}`);
console.log(` Formula: ${syntheticLargeFiles.scalingAnalysis.formula}`);
}
t.comment('\nChunked Processing Efficiency:');
t.comment(' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item');
t.comment(' -----------|--------|----------|------------|-------------|------------');
streamingLargeFiles.result.chunkProcessing.forEach(chunk => {
t.comment(` ${String(chunk.chunkSize).padEnd(10)} | ${String(chunk.chunks).padEnd(6)} | ${String(chunk.totalDuration + 'ms').padEnd(8)} | ${chunk.throughput.padEnd(10)}/s | ${chunk.peakMemoryMB.padEnd(11)}MB | ${chunk.memoryPerItemKB}KB`);
console.log('\nChunked Processing Efficiency:');
console.log(' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item');
console.log(' -----------|--------|----------|------------|-------------|------------');
streamingLargeFiles.chunkProcessing.forEach((chunk: any) => {
console.log(` ${String(chunk.chunkSize).padEnd(10)} | ${String(chunk.chunks).padEnd(6)} | ${String(chunk.totalDuration + 'ms').padEnd(8)} | ${chunk.throughput.padEnd(10)}/s | ${chunk.peakMemoryMB.padEnd(11)}MB | ${chunk.memoryPerItemKB}KB`);
});
if (streamingLargeFiles.result.memoryEfficiency) {
t.comment(` Recommendation: ${streamingLargeFiles.result.memoryEfficiency.recommendation}`);
if (streamingLargeFiles.memoryEfficiency) {
console.log(` Recommendation: ${streamingLargeFiles.memoryEfficiency.recommendation}`);
}
t.comment('\nCorpus Large File Analysis:');
t.comment(` Total files: ${corpusLargeFiles.result.totalFiles}`);
t.comment(` Size distribution:`);
Object.entries(corpusLargeFiles.result.sizeDistribution).forEach(([size, data]: [string, any]) => {
t.comment(` - ${size}: ${data.count} files`);
console.log('\nCorpus Large File Analysis:');
console.log(` Total files: ${corpusLargeFiles.totalFiles}`);
console.log(` Size distribution:`);
Object.entries(corpusLargeFiles.sizeDistribution).forEach(([size, data]: [string, any]) => {
console.log(` - ${size}: ${data.count} files`);
});
t.comment(` Largest processed files:`);
corpusLargeFiles.result.largeFiles.slice(0, 5).forEach(file => {
t.comment(` - ${file.path.split('/').pop()}: ${file.sizeKB}KB, ${file.processingTime}ms, ${file.throughputKBps}KB/s`);
console.log(` Largest processed files:`);
corpusLargeFiles.largeFiles.slice(0, 5).forEach(file => {
console.log(` - ${file.path.split('/').pop()}: ${file.sizeKB}KB, ${file.processingTime}ms, ${file.throughputKBps}KB/s`);
});
t.comment(` Average processing: ${corpusLargeFiles.result.processingStats.avgTimePerKB}ms/KB`);
console.log(` Average processing: ${corpusLargeFiles.processingStats.avgTimePerKB}ms/KB`);
t.comment('\nExtreme Size Stress Test:');
extremeSizeStressTest.result.tests.forEach(scenario => {
t.comment(` ${scenario.scenario}:`);
scenario.tests.forEach(test => {
t.comment(` - ${test.size}: ${test.success ? `${test.time}ms, ${test.xmlSizeMB}MB XML` : `${test.error}`}`);
console.log('\nExtreme Size Stress Test:');
extremeSizeStressTest.tests.forEach(scenario => {
console.log(` ${scenario.scenario}:`);
scenario.tests.forEach((test: any) => {
console.log(` - ${test.size}: ${test.success ? `${test.time}ms, ${test.xmlSizeMB}MB XML` : `${test.error}`}`);
});
});
t.comment(` Limits:`);
t.comment(` - Max items processed: ${extremeSizeStressTest.result.limits.maxItemsProcessed}`);
t.comment(` - Max size processed: ${extremeSizeStressTest.result.limits.maxSizeProcessedMB.toFixed(2)}MB`);
if (extremeSizeStressTest.result.limits.failurePoint) {
t.comment(` - Failure point: ${extremeSizeStressTest.result.limits.failurePoint.scenario} at ${extremeSizeStressTest.result.limits.failurePoint.size}`);
console.log(` Limits:`);
console.log(` - Max items processed: ${extremeSizeStressTest.limits.maxItemsProcessed}`);
console.log(` - Max size processed: ${extremeSizeStressTest.limits.maxSizeProcessedMB.toFixed(2)}MB`);
if (extremeSizeStressTest.limits.failurePoint) {
console.log(` - Failure point: ${extremeSizeStressTest.limits.failurePoint.scenario} at ${extremeSizeStressTest.limits.failurePoint.size}`);
}
// Performance targets check
t.comment('\n=== Performance Targets Check ===');
const largeFileThroughput = syntheticLargeFiles.result.tests.length > 0 ?
parseFloat(syntheticLargeFiles.result.tests[syntheticLargeFiles.result.tests.length - 1].throughputMBps) : 0;
console.log('\n=== Performance Targets Check ===');
const largeFileThroughput = syntheticLargeFiles.tests.length > 0 ?
parseFloat(syntheticLargeFiles.tests[syntheticLargeFiles.tests.length - 1].throughputMBps) : 0;
const targetThroughput = 1; // Target: >1MB/s for large files
t.comment(`Large file throughput: ${largeFileThroughput}MB/s ${largeFileThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput}MB/s)`);
console.log(`Large file throughput: ${largeFileThroughput}MB/s ${largeFileThroughput > targetThroughput ? '✅' : '⚠️'} (target: >${targetThroughput}MB/s)`);
// Overall performance summary
t.comment('\n=== Overall Performance Summary ===');
performanceTracker.logSummary();
console.log('\n=== Overall Performance Summary ===');
console.log(performanceTracker.getSummary());
t.end();
t.pass('Large file processing tests completed');
});
tap.start();