fix(compliance): improve compliance

This commit is contained in:
2025-05-28 19:37:00 +00:00
parent 892a8392a4
commit 756964aabd
6 changed files with 1223 additions and 1823 deletions

View File

@ -3,384 +3,262 @@
* @description Performance tests for format detection speed
*/
import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
import { expect, tap } from '@git.zone/tstest/tapbundle';
import { FormatDetector } from '../../../ts/formats/utils/format.detector.js';
import { InvoiceFormat } from '../../../ts/interfaces/common.js';
const corpusLoader = new CorpusLoader();
const performanceTracker = new PerformanceTracker('PERF-01: Format Detection Speed');
// Simple performance tracking
class SimplePerformanceTracker {
private measurements: Map<string, number[]> = new Map();
private name: string;
tap.test('PERF-01: Format Detection Speed - should meet performance targets for format detection', async (t) => {
// Test 1: Single file detection benchmarks
const singleFileDetection = await performanceTracker.measureAsync(
'single-file-detection',
async () => {
const einvoice = new EInvoice();
const benchmarks = [];
// Test different format samples
const testCases = [
{
name: 'Small UBL',
content: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>TEST-001</ID>
<IssueDate>2024-01-01</IssueDate>
</Invoice>`,
expectedFormat: 'ubl'
},
{
name: 'Small CII',
content: `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<rsm:ExchangedDocument><ram:ID>TEST-002</ram:ID></rsm:ExchangedDocument>
</rsm:CrossIndustryInvoice>`,
expectedFormat: 'cii'
},
{
name: 'Large UBL',
content: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>TEST-003</ID>
<IssueDate>2024-01-01</IssueDate>
${Array(100).fill('<InvoiceLine><ID>Line</ID></InvoiceLine>').join('\n')}
</Invoice>`,
expectedFormat: 'ubl'
}
];
// Run multiple iterations for accuracy
const iterations = 100;
for (const testCase of testCases) {
const times = [];
for (let i = 0; i < iterations; i++) {
const startTime = process.hrtime.bigint();
const format = await einvoice.detectFormat(testCase.content);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1_000_000; // Convert to ms
times.push(duration);
if (i === 0 && format !== testCase.expectedFormat) {
t.comment(`Warning: ${testCase.name} detected as ${format}, expected ${testCase.expectedFormat}`);
}
}
// Calculate statistics
times.sort((a, b) => a - b);
const stats = {
name: testCase.name,
min: times[0],
max: times[times.length - 1],
avg: times.reduce((a, b) => a + b, 0) / times.length,
median: times[Math.floor(times.length / 2)],
p95: times[Math.floor(times.length * 0.95)],
p99: times[Math.floor(times.length * 0.99)]
};
benchmarks.push(stats);
}
return benchmarks;
}
);
// Test 2: Corpus detection performance
const corpusDetection = await performanceTracker.measureAsync(
'corpus-detection-performance',
async () => {
const files = await corpusLoader.getFilesByPattern('**/*.xml');
const einvoice = new EInvoice();
const results = {
totalFiles: 0,
detectionTimes: [],
formatDistribution: new Map<string, number>(),
sizeCategories: {
small: { count: 0, avgTime: 0, times: [] }, // < 10KB
medium: { count: 0, avgTime: 0, times: [] }, // 10-100KB
large: { count: 0, avgTime: 0, times: [] }, // > 100KB
},
failures: 0
};
// Process sample of corpus files
const sampleFiles = files.slice(0, 100);
for (const file of sampleFiles) {
try {
const content = await plugins.fs.readFile(file, 'utf-8');
const fileSize = Buffer.byteLength(content, 'utf-8');
const sizeCategory = fileSize < 10240 ? 'small' :
fileSize < 102400 ? 'medium' : 'large';
results.totalFiles++;
// Measure detection time
const startTime = process.hrtime.bigint();
const format = await einvoice.detectFormat(content);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1_000_000;
results.detectionTimes.push(duration);
results.sizeCategories[sizeCategory].times.push(duration);
results.sizeCategories[sizeCategory].count++;
// Track format distribution
if (format && format !== 'unknown') {
results.formatDistribution.set(format,
(results.formatDistribution.get(format) || 0) + 1
);
} else {
results.failures++;
}
} catch (error) {
results.failures++;
}
}
// Calculate averages
for (const category of Object.keys(results.sizeCategories)) {
const cat = results.sizeCategories[category];
if (cat.times.length > 0) {
cat.avgTime = cat.times.reduce((a, b) => a + b, 0) / cat.times.length;
}
}
// Overall statistics
results.detectionTimes.sort((a, b) => a - b);
const overallStats = {
min: results.detectionTimes[0],
max: results.detectionTimes[results.detectionTimes.length - 1],
avg: results.detectionTimes.reduce((a, b) => a + b, 0) / results.detectionTimes.length,
median: results.detectionTimes[Math.floor(results.detectionTimes.length / 2)],
p95: results.detectionTimes[Math.floor(results.detectionTimes.length * 0.95)]
};
return {
...results,
overallStats,
formatDistribution: Array.from(results.formatDistribution.entries())
};
}
);
// Test 3: Concurrent detection performance
const concurrentDetection = await performanceTracker.measureAsync(
'concurrent-detection',
async () => {
const einvoice = new EInvoice();
const concurrencyLevels = [1, 5, 10, 20, 50];
const results = [];
// Create test content
const testContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>CONCURRENT-TEST</ID>
<IssueDate>2024-01-01</IssueDate>
<AccountingSupplierParty><Party><PartyName><Name>Test Supplier</Name></PartyName></Party></AccountingSupplierParty>
<AccountingCustomerParty><Party><PartyName><Name>Test Customer</Name></PartyName></Party></AccountingCustomerParty>
</Invoice>`;
for (const concurrency of concurrencyLevels) {
const startTime = Date.now();
// Create concurrent detection tasks
const tasks = Array(concurrency).fill(null).map(() =>
einvoice.detectFormat(testContent)
);
const detectionResults = await Promise.all(tasks);
const endTime = Date.now();
const duration = endTime - startTime;
const throughput = (concurrency / (duration / 1000)).toFixed(2);
results.push({
concurrency,
duration,
throughput: `${throughput} detections/sec`,
allSuccessful: detectionResults.every(r => r === 'ubl')
});
}
return results;
}
);
// Test 4: Edge case detection performance
const edgeCaseDetection = await performanceTracker.measureAsync(
'edge-case-detection',
async () => {
const einvoice = new EInvoice();
const edgeCases = [
{
name: 'Minimal XML',
content: '<?xml version="1.0"?><root/>'
},
{
name: 'No XML declaration',
content: '<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>1</ID></Invoice>'
},
{
name: 'With comments',
content: '<?xml version="1.0"?><!-- Comment --><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><!-- Another comment --><ID>1</ID></Invoice>'
},
{
name: 'With processing instructions',
content: '<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="style.xsl"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>1</ID></Invoice>'
},
{
name: 'Mixed namespaces',
content: '<?xml version="1.0"?><ns1:Invoice xmlns:ns1="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:ns2="http://example.com"><ns1:ID>1</ns1:ID></ns1:Invoice>'
},
{
name: 'Large with whitespace',
content: '<?xml version="1.0"?>\n\n\n' + ' '.repeat(10000) + '<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">\n' + ' '.repeat(5000) + '<ID>1</ID>\n' + ' '.repeat(5000) + '</Invoice>'
}
];
const results = [];
for (const edgeCase of edgeCases) {
const times = [];
const iterations = 50;
for (let i = 0; i < iterations; i++) {
const startTime = process.hrtime.bigint();
const format = await einvoice.detectFormat(edgeCase.content);
const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1_000_000;
times.push(duration);
}
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
results.push({
name: edgeCase.name,
avgTime: avgTime.toFixed(3),
contentSize: edgeCase.content.length
});
}
return results;
}
);
// Test 5: Performance under memory pressure
const memoryPressureDetection = await performanceTracker.measureAsync(
'memory-pressure-detection',
async () => {
const einvoice = new EInvoice();
const results = {
baseline: null,
underPressure: null,
degradation: null
};
// Baseline measurement
const baselineTimes = [];
const testXml = '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>MEM-TEST</ID></Invoice>';
for (let i = 0; i < 50; i++) {
const start = process.hrtime.bigint();
await einvoice.detectFormat(testXml);
const end = process.hrtime.bigint();
baselineTimes.push(Number(end - start) / 1_000_000);
}
results.baseline = baselineTimes.reduce((a, b) => a + b, 0) / baselineTimes.length;
// Create memory pressure by allocating large arrays
const memoryHogs = [];
for (let i = 0; i < 10; i++) {
memoryHogs.push(new Array(1_000_000).fill(Math.random()));
}
// Measurement under pressure
const pressureTimes = [];
for (let i = 0; i < 50; i++) {
const start = process.hrtime.bigint();
await einvoice.detectFormat(testXml);
const end = process.hrtime.bigint();
pressureTimes.push(Number(end - start) / 1_000_000);
}
results.underPressure = pressureTimes.reduce((a, b) => a + b, 0) / pressureTimes.length;
results.degradation = ((results.underPressure - results.baseline) / results.baseline * 100).toFixed(2) + '%';
// Cleanup
memoryHogs.length = 0;
return results;
}
);
constructor(name: string) {
this.name = name;
}
// Summary
t.comment('\n=== PERF-01: Format Detection Speed Test Summary ===');
t.comment('\nSingle File Detection Benchmarks (100 iterations each):');
singleFileDetection.result.forEach(bench => {
t.comment(` ${bench.name}:`);
t.comment(` - Min: ${bench.min.toFixed(3)}ms, Max: ${bench.max.toFixed(3)}ms`);
t.comment(` - Avg: ${bench.avg.toFixed(3)}ms, Median: ${bench.median.toFixed(3)}ms`);
t.comment(` - P95: ${bench.p95.toFixed(3)}ms, P99: ${bench.p99.toFixed(3)}ms`);
});
t.comment(`\nCorpus Detection Performance (${corpusDetection.result.totalFiles} files):`);
t.comment(` Overall statistics:`);
t.comment(` - Min: ${corpusDetection.result.overallStats.min.toFixed(3)}ms`);
t.comment(` - Max: ${corpusDetection.result.overallStats.max.toFixed(3)}ms`);
t.comment(` - Avg: ${corpusDetection.result.overallStats.avg.toFixed(3)}ms`);
t.comment(` - Median: ${corpusDetection.result.overallStats.median.toFixed(3)}ms`);
t.comment(` - P95: ${corpusDetection.result.overallStats.p95.toFixed(3)}ms`);
t.comment(` By file size:`);
Object.entries(corpusDetection.result.sizeCategories).forEach(([size, data]: [string, any]) => {
if (data.count > 0) {
t.comment(` - ${size}: ${data.count} files, avg ${data.avgTime.toFixed(3)}ms`);
addMeasurement(key: string, time: number): void {
if (!this.measurements.has(key)) {
this.measurements.set(key, []);
}
});
t.comment(` Format distribution:`);
corpusDetection.result.formatDistribution.forEach(([format, count]) => {
t.comment(` - ${format}: ${count} files`);
});
this.measurements.get(key)!.push(time);
}
getStats(key: string) {
const times = this.measurements.get(key) || [];
if (times.length === 0) return null;
const sorted = [...times].sort((a, b) => a - b);
return {
avg: times.reduce((a, b) => a + b, 0) / times.length,
min: sorted[0],
max: sorted[sorted.length - 1],
p95: sorted[Math.floor(sorted.length * 0.95)]
};
}
printSummary(): void {
console.log(`\n${this.name} - Performance Summary:`);
for (const [key, times] of this.measurements) {
const stats = this.getStats(key);
if (stats) {
console.log(` ${key}: avg=${stats.avg.toFixed(2)}ms, min=${stats.min.toFixed(2)}ms, max=${stats.max.toFixed(2)}ms, p95=${stats.p95.toFixed(2)}ms`);
}
}
}
}
const performanceTracker = new SimplePerformanceTracker('PERF-01: Format Detection Speed');
tap.test('PERF-01: Single file detection benchmarks', async () => {
const testCases = [
{
name: 'UBL Invoice',
content: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>123</ID>
<IssueDate>2025-01-25</IssueDate>
</Invoice>`,
expectedFormat: InvoiceFormat.UBL
},
{
name: 'CII Invoice',
content: `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocument>
<ram:ID>123</ram:ID>
</rsm:ExchangedDocument>
</rsm:CrossIndustryInvoice>`,
expectedFormat: InvoiceFormat.CII
},
{
name: 'Factur-X',
content: `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100"
xmlns:qdt="urn:un:unece:uncefact:data:standard:QualifiedDataType:100">
<rsm:ExchangedDocumentContext>
<ram:GuidelineSpecifiedDocumentContextParameter>
<ram:ID>urn:factur-x.eu:1p0:minimum</ram:ID>
</ram:GuidelineSpecifiedDocumentContextParameter>
</rsm:ExchangedDocumentContext>
</rsm:CrossIndustryInvoice>`,
expectedFormat: InvoiceFormat.FACTURX
}
];
const iterations = 100;
t.comment('\nConcurrent Detection Performance:');
concurrentDetection.result.forEach(result => {
t.comment(` ${result.concurrency} concurrent: ${result.duration}ms total, ${result.throughput}`);
});
for (const testCase of testCases) {
const times: number[] = [];
for (let i = 0; i < iterations; i++) {
const startTime = performance.now();
const format = FormatDetector.detectFormat(testCase.content);
const endTime = performance.now();
const duration = endTime - startTime;
times.push(duration);
performanceTracker.addMeasurement(`detect-${testCase.name}`, duration);
if (i === 0) {
expect(format).toEqual(testCase.expectedFormat);
}
}
// Calculate statistics
times.sort((a, b) => a - b);
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const p95 = times[Math.floor(times.length * 0.95)];
console.log(`${testCase.name}: avg=${avg.toFixed(3)}ms, p95=${p95.toFixed(3)}ms`);
// Performance assertions
expect(avg).toBeLessThan(5); // Average should be less than 5ms
expect(p95).toBeLessThan(10); // 95th percentile should be less than 10ms
}
});
tap.test('PERF-01: Quick detection performance', async () => {
// Test the quick string-based detection performance
const largeInvoice = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>LARGE-TEST-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
${Array(1000).fill('<cac:InvoiceLine><cbc:ID>1</cbc:ID></cac:InvoiceLine>').join('')}
</Invoice>`;
const iterations = 50;
const times: number[] = [];
t.comment('\nEdge Case Detection:');
edgeCaseDetection.result.forEach(result => {
t.comment(` ${result.name} (${result.contentSize} bytes): ${result.avgTime}ms avg`);
});
t.comment('\nMemory Pressure Impact:');
t.comment(` Baseline: ${memoryPressureDetection.result.baseline.toFixed(3)}ms`);
t.comment(` Under pressure: ${memoryPressureDetection.result.underPressure.toFixed(3)}ms`);
t.comment(` Performance degradation: ${memoryPressureDetection.result.degradation}`);
// Performance targets check
t.comment('\n=== Performance Targets Check ===');
const avgDetectionTime = corpusDetection.result.overallStats.avg;
const targetTime = 10; // Target: <10ms for format detection
if (avgDetectionTime < targetTime) {
t.comment(`✅ Format detection meets target: ${avgDetectionTime.toFixed(3)}ms < ${targetTime}ms`);
} else {
t.comment(`⚠️ Format detection exceeds target: ${avgDetectionTime.toFixed(3)}ms > ${targetTime}ms`);
for (let i = 0; i < iterations; i++) {
const startTime = performance.now();
const format = FormatDetector.detectFormat(largeInvoice);
const endTime = performance.now();
const duration = endTime - startTime;
times.push(duration);
performanceTracker.addMeasurement('large-invoice-detection', duration);
}
// Overall performance summary
t.comment('\n=== Overall Performance Summary ===');
performanceTracker.logSummary();
const avg = times.reduce((a, b) => a + b, 0) / times.length;
console.log(`Large invoice detection: avg=${avg.toFixed(3)}ms`);
// Even large invoices should be detected quickly due to quick string check
expect(avg).toBeLessThan(10);
});
t.end();
tap.test('PERF-01: Edge cases detection performance', async () => {
const edgeCases = [
{
name: 'Empty string',
content: '',
expectedFormat: InvoiceFormat.UNKNOWN
},
{
name: 'Invalid XML',
content: '<not-closed',
expectedFormat: InvoiceFormat.UNKNOWN
},
{
name: 'Non-invoice XML',
content: '<?xml version="1.0"?><root><data>test</data></root>',
expectedFormat: InvoiceFormat.UNKNOWN
}
];
for (const testCase of edgeCases) {
const times: number[] = [];
for (let i = 0; i < 100; i++) {
const startTime = performance.now();
const format = FormatDetector.detectFormat(testCase.content);
const endTime = performance.now();
times.push(endTime - startTime);
if (i === 0) {
expect(format).toEqual(testCase.expectedFormat);
}
}
const avg = times.reduce((a, b) => a + b, 0) / times.length;
console.log(`${testCase.name}: avg=${avg.toFixed(3)}ms`);
// Edge cases should be detected very quickly
expect(avg).toBeLessThan(1);
}
});
tap.test('PERF-01: Concurrent detection performance', async () => {
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>CONCURRENT-TEST</ID>
</Invoice>`;
const concurrentCount = 10;
const iterations = 5;
for (let iter = 0; iter < iterations; iter++) {
const startTime = performance.now();
// Run multiple detections concurrently
const promises = Array(concurrentCount).fill(null).map(() =>
Promise.resolve(FormatDetector.detectFormat(xmlContent))
);
const results = await Promise.all(promises);
const endTime = performance.now();
const duration = endTime - startTime;
performanceTracker.addMeasurement('concurrent-detection', duration);
// All should detect the same format
expect(results.every(r => r === InvoiceFormat.UBL)).toEqual(true);
console.log(`Concurrent detection (${concurrentCount} parallel): ${duration.toFixed(3)}ms`);
}
const stats = performanceTracker.getStats('concurrent-detection');
if (stats) {
// Concurrent detection should still be fast
expect(stats.avg).toBeLessThan(50);
}
});
tap.test('PERF-01: Memory usage during detection', async () => {
const initialMemory = process.memoryUsage();
// Create a reasonably large test set
const testXmls = Array(1000).fill(null).map((_, i) => `<?xml version="1.0"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MEM-TEST-${i}</ID>
<IssueDate>2025-01-25</IssueDate>
</Invoice>`);
// Detect all formats
const startTime = performance.now();
const formats = testXmls.map(xml => FormatDetector.detectFormat(xml));
const endTime = performance.now();
const afterMemory = process.memoryUsage();
const memoryIncrease = (afterMemory.heapUsed - initialMemory.heapUsed) / 1024 / 1024;
console.log(`Detected ${formats.length} formats in ${(endTime - startTime).toFixed(2)}ms`);
console.log(`Memory increase: ${memoryIncrease.toFixed(2)} MB`);
// Memory increase should be reasonable
expect(memoryIncrease).toBeLessThan(50); // Less than 50MB for 1000 detections
// All should be detected as UBL
expect(formats.every(f => f === InvoiceFormat.UBL)).toEqual(true);
});
tap.test('PERF-01: Performance Summary', async () => {
performanceTracker.printSummary();
console.log('\nFormat detection performance tests completed successfully');
});
tap.start();