update

2025-05-25 19:45:37 +00:00
parent e89675c319
commit 39942638d9
110 changed files with 49183 additions and 3104 deletions
--- a/test/suite/einvoice_performance/test.perf-01.detection-speed.ts
+++ b/test/suite/einvoice_performance/test.perf-01.detection-speed.ts
@@ -0,0 +1,386 @@
+/**
+ * @file test.perf-01.detection-speed.ts
+ * @description Performance tests for format detection speed
+ */
+
+import { tap } from '@git.zone/tstest/tapbundle';
+import * as plugins from '../../plugins.js';
+import { EInvoice } from '../../../ts/index.js';
+import { CorpusLoader } from '../../suite/corpus.loader.js';
+import { PerformanceTracker } from '../../suite/performance.tracker.js';
+
+const corpusLoader = new CorpusLoader();
+const performanceTracker = new PerformanceTracker('PERF-01: Format Detection Speed');
+
+tap.test('PERF-01: Format Detection Speed - should meet performance targets for format detection', async (t) => {
+  // Test 1: Single file detection benchmarks
+  const singleFileDetection = await performanceTracker.measureAsync(
+    'single-file-detection',
+    async () => {
+      const einvoice = new EInvoice();
+      const benchmarks = [];
+      
+      // Test different format samples
+      const testCases = [
+        {
+          name: 'Small UBL',
+          content: `<?xml version="1.0" encoding="UTF-8"?>
+            <Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
+              <ID>TEST-001</ID>
+              <IssueDate>2024-01-01</IssueDate>
+            </Invoice>`,
+          expectedFormat: 'ubl'
+        },
+        {
+          name: 'Small CII',
+          content: `<?xml version="1.0" encoding="UTF-8"?>
+            <rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
+              <rsm:ExchangedDocument><ram:ID>TEST-002</ram:ID></rsm:ExchangedDocument>
+            </rsm:CrossIndustryInvoice>`,
+          expectedFormat: 'cii'
+        },
+        {
+          name: 'Large UBL',
+          content: `<?xml version="1.0" encoding="UTF-8"?>
+            <Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
+              <ID>TEST-003</ID>
+              <IssueDate>2024-01-01</IssueDate>
+              ${Array(100).fill('<InvoiceLine><ID>Line</ID></InvoiceLine>').join('\n')}
+            </Invoice>`,
+          expectedFormat: 'ubl'
+        }
+      ];
+      
+      // Run multiple iterations for accuracy
+      const iterations = 100;
+      
+      for (const testCase of testCases) {
+        const times = [];
+        
+        for (let i = 0; i < iterations; i++) {
+          const startTime = process.hrtime.bigint();
+          const format = await einvoice.detectFormat(testCase.content);
+          const endTime = process.hrtime.bigint();
+          
+          const duration = Number(endTime - startTime) / 1_000_000; // Convert to ms
+          times.push(duration);
+          
+          if (i === 0 && format !== testCase.expectedFormat) {
+            t.comment(`Warning: ${testCase.name} detected as ${format}, expected ${testCase.expectedFormat}`);
+          }
+        }
+        
+        // Calculate statistics
+        times.sort((a, b) => a - b);
+        const stats = {
+          name: testCase.name,
+          min: times[0],
+          max: times[times.length - 1],
+          avg: times.reduce((a, b) => a + b, 0) / times.length,
+          median: times[Math.floor(times.length / 2)],
+          p95: times[Math.floor(times.length * 0.95)],
+          p99: times[Math.floor(times.length * 0.99)]
+        };
+        
+        benchmarks.push(stats);
+      }
+      
+      return benchmarks;
+    }
+  );
+  
+  // Test 2: Corpus detection performance
+  const corpusDetection = await performanceTracker.measureAsync(
+    'corpus-detection-performance',
+    async () => {
+      const files = await corpusLoader.getFilesByPattern('**/*.xml');
+      const einvoice = new EInvoice();
+      const results = {
+        totalFiles: 0,
+        detectionTimes: [],
+        formatDistribution: new Map<string, number>(),
+        sizeCategories: {
+          small: { count: 0, avgTime: 0, times: [] }, // < 10KB
+          medium: { count: 0, avgTime: 0, times: [] }, // 10-100KB
+          large: { count: 0, avgTime: 0, times: [] }, // > 100KB
+        },
+        failures: 0
+      };
+      
+      // Process sample of corpus files
+      const sampleFiles = files.slice(0, 100);
+      
+      for (const file of sampleFiles) {
+        try {
+          const content = await plugins.fs.readFile(file, 'utf-8');
+          const fileSize = Buffer.byteLength(content, 'utf-8');
+          const sizeCategory = fileSize < 10240 ? 'small' : 
+                              fileSize < 102400 ? 'medium' : 'large';
+          
+          results.totalFiles++;
+          
+          // Measure detection time
+          const startTime = process.hrtime.bigint();
+          const format = await einvoice.detectFormat(content);
+          const endTime = process.hrtime.bigint();
+          const duration = Number(endTime - startTime) / 1_000_000;
+          
+          results.detectionTimes.push(duration);
+          results.sizeCategories[sizeCategory].times.push(duration);
+          results.sizeCategories[sizeCategory].count++;
+          
+          // Track format distribution
+          if (format && format !== 'unknown') {
+            results.formatDistribution.set(format, 
+              (results.formatDistribution.get(format) || 0) + 1
+            );
+          } else {
+            results.failures++;
+          }
+          
+        } catch (error) {
+          results.failures++;
+        }
+      }
+      
+      // Calculate averages
+      for (const category of Object.keys(results.sizeCategories)) {
+        const cat = results.sizeCategories[category];
+        if (cat.times.length > 0) {
+          cat.avgTime = cat.times.reduce((a, b) => a + b, 0) / cat.times.length;
+        }
+      }
+      
+      // Overall statistics
+      results.detectionTimes.sort((a, b) => a - b);
+      const overallStats = {
+        min: results.detectionTimes[0],
+        max: results.detectionTimes[results.detectionTimes.length - 1],
+        avg: results.detectionTimes.reduce((a, b) => a + b, 0) / results.detectionTimes.length,
+        median: results.detectionTimes[Math.floor(results.detectionTimes.length / 2)],
+        p95: results.detectionTimes[Math.floor(results.detectionTimes.length * 0.95)]
+      };
+      
+      return {
+        ...results,
+        overallStats,
+        formatDistribution: Array.from(results.formatDistribution.entries())
+      };
+    }
+  );
+  
+  // Test 3: Concurrent detection performance
+  const concurrentDetection = await performanceTracker.measureAsync(
+    'concurrent-detection',
+    async () => {
+      const einvoice = new EInvoice();
+      const concurrencyLevels = [1, 5, 10, 20, 50];
+      const results = [];
+      
+      // Create test content
+      const testContent = `<?xml version="1.0" encoding="UTF-8"?>
+        <Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
+          <ID>CONCURRENT-TEST</ID>
+          <IssueDate>2024-01-01</IssueDate>
+          <AccountingSupplierParty><Party><PartyName><Name>Test Supplier</Name></PartyName></Party></AccountingSupplierParty>
+          <AccountingCustomerParty><Party><PartyName><Name>Test Customer</Name></PartyName></Party></AccountingCustomerParty>
+        </Invoice>`;
+      
+      for (const concurrency of concurrencyLevels) {
+        const startTime = Date.now();
+        
+        // Create concurrent detection tasks
+        const tasks = Array(concurrency).fill(null).map(() => 
+          einvoice.detectFormat(testContent)
+        );
+        
+        const detectionResults = await Promise.all(tasks);
+        const endTime = Date.now();
+        
+        const duration = endTime - startTime;
+        const throughput = (concurrency / (duration / 1000)).toFixed(2);
+        
+        results.push({
+          concurrency,
+          duration,
+          throughput: `${throughput} detections/sec`,
+          allSuccessful: detectionResults.every(r => r === 'ubl')
+        });
+      }
+      
+      return results;
+    }
+  );
+  
+  // Test 4: Edge case detection performance
+  const edgeCaseDetection = await performanceTracker.measureAsync(
+    'edge-case-detection',
+    async () => {
+      const einvoice = new EInvoice();
+      const edgeCases = [
+        {
+          name: 'Minimal XML',
+          content: '<?xml version="1.0"?><root/>'
+        },
+        {
+          name: 'No XML declaration',
+          content: '<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>1</ID></Invoice>'
+        },
+        {
+          name: 'With comments',
+          content: '<?xml version="1.0"?><!-- Comment --><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><!-- Another comment --><ID>1</ID></Invoice>'
+        },
+        {
+          name: 'With processing instructions',
+          content: '<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="style.xsl"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>1</ID></Invoice>'
+        },
+        {
+          name: 'Mixed namespaces',
+          content: '<?xml version="1.0"?><ns1:Invoice xmlns:ns1="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:ns2="http://example.com"><ns1:ID>1</ns1:ID></ns1:Invoice>'
+        },
+        {
+          name: 'Large with whitespace',
+          content: '<?xml version="1.0"?>\n\n\n' + ' '.repeat(10000) + '<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">\n' + ' '.repeat(5000) + '<ID>1</ID>\n' + ' '.repeat(5000) + '</Invoice>'
+        }
+      ];
+      
+      const results = [];
+      
+      for (const edgeCase of edgeCases) {
+        const times = [];
+        const iterations = 50;
+        
+        for (let i = 0; i < iterations; i++) {
+          const startTime = process.hrtime.bigint();
+          const format = await einvoice.detectFormat(edgeCase.content);
+          const endTime = process.hrtime.bigint();
+          const duration = Number(endTime - startTime) / 1_000_000;
+          times.push(duration);
+        }
+        
+        const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
+        
+        results.push({
+          name: edgeCase.name,
+          avgTime: avgTime.toFixed(3),
+          contentSize: edgeCase.content.length
+        });
+      }
+      
+      return results;
+    }
+  );
+  
+  // Test 5: Performance under memory pressure
+  const memoryPressureDetection = await performanceTracker.measureAsync(
+    'memory-pressure-detection',
+    async () => {
+      const einvoice = new EInvoice();
+      const results = {
+        baseline: null,
+        underPressure: null,
+        degradation: null
+      };
+      
+      // Baseline measurement
+      const baselineTimes = [];
+      const testXml = '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>MEM-TEST</ID></Invoice>';
+      
+      for (let i = 0; i < 50; i++) {
+        const start = process.hrtime.bigint();
+        await einvoice.detectFormat(testXml);
+        const end = process.hrtime.bigint();
+        baselineTimes.push(Number(end - start) / 1_000_000);
+      }
+      
+      results.baseline = baselineTimes.reduce((a, b) => a + b, 0) / baselineTimes.length;
+      
+      // Create memory pressure by allocating large arrays
+      const memoryHogs = [];
+      for (let i = 0; i < 10; i++) {
+        memoryHogs.push(new Array(1_000_000).fill(Math.random()));
+      }
+      
+      // Measurement under pressure
+      const pressureTimes = [];
+      
+      for (let i = 0; i < 50; i++) {
+        const start = process.hrtime.bigint();
+        await einvoice.detectFormat(testXml);
+        const end = process.hrtime.bigint();
+        pressureTimes.push(Number(end - start) / 1_000_000);
+      }
+      
+      results.underPressure = pressureTimes.reduce((a, b) => a + b, 0) / pressureTimes.length;
+      results.degradation = ((results.underPressure - results.baseline) / results.baseline * 100).toFixed(2) + '%';
+      
+      // Cleanup
+      memoryHogs.length = 0;
+      
+      return results;
+    }
+  );
+
+  // Summary
+  t.comment('\n=== PERF-01: Format Detection Speed Test Summary ===');
+  
+  t.comment('\nSingle File Detection Benchmarks (100 iterations each):');
+  singleFileDetection.result.forEach(bench => {
+    t.comment(`  ${bench.name}:`);
+    t.comment(`    - Min: ${bench.min.toFixed(3)}ms, Max: ${bench.max.toFixed(3)}ms`);
+    t.comment(`    - Avg: ${bench.avg.toFixed(3)}ms, Median: ${bench.median.toFixed(3)}ms`);
+    t.comment(`    - P95: ${bench.p95.toFixed(3)}ms, P99: ${bench.p99.toFixed(3)}ms`);
+  });
+  
+  t.comment(`\nCorpus Detection Performance (${corpusDetection.result.totalFiles} files):`);
+  t.comment(`  Overall statistics:`);
+  t.comment(`    - Min: ${corpusDetection.result.overallStats.min.toFixed(3)}ms`);
+  t.comment(`    - Max: ${corpusDetection.result.overallStats.max.toFixed(3)}ms`);
+  t.comment(`    - Avg: ${corpusDetection.result.overallStats.avg.toFixed(3)}ms`);
+  t.comment(`    - Median: ${corpusDetection.result.overallStats.median.toFixed(3)}ms`);
+  t.comment(`    - P95: ${corpusDetection.result.overallStats.p95.toFixed(3)}ms`);
+  t.comment(`  By file size:`);
+  Object.entries(corpusDetection.result.sizeCategories).forEach(([size, data]: [string, any]) => {
+    if (data.count > 0) {
+      t.comment(`    - ${size}: ${data.count} files, avg ${data.avgTime.toFixed(3)}ms`);
+    }
+  });
+  t.comment(`  Format distribution:`);
+  corpusDetection.result.formatDistribution.forEach(([format, count]) => {
+    t.comment(`    - ${format}: ${count} files`);
+  });
+  
+  t.comment('\nConcurrent Detection Performance:');
+  concurrentDetection.result.forEach(result => {
+    t.comment(`  ${result.concurrency} concurrent: ${result.duration}ms total, ${result.throughput}`);
+  });
+  
+  t.comment('\nEdge Case Detection:');
+  edgeCaseDetection.result.forEach(result => {
+    t.comment(`  ${result.name} (${result.contentSize} bytes): ${result.avgTime}ms avg`);
+  });
+  
+  t.comment('\nMemory Pressure Impact:');
+  t.comment(`  Baseline: ${memoryPressureDetection.result.baseline.toFixed(3)}ms`);
+  t.comment(`  Under pressure: ${memoryPressureDetection.result.underPressure.toFixed(3)}ms`);
+  t.comment(`  Performance degradation: ${memoryPressureDetection.result.degradation}`);
+  
+  // Performance targets check
+  t.comment('\n=== Performance Targets Check ===');
+  const avgDetectionTime = corpusDetection.result.overallStats.avg;
+  const targetTime = 10; // Target: <10ms for format detection
+  
+  if (avgDetectionTime < targetTime) {
+    t.comment(`✅ Format detection meets target: ${avgDetectionTime.toFixed(3)}ms < ${targetTime}ms`);
+  } else {
+    t.comment(`⚠️ Format detection exceeds target: ${avgDetectionTime.toFixed(3)}ms > ${targetTime}ms`);
+  }
+  
+  // Overall performance summary
+  t.comment('\n=== Overall Performance Summary ===');
+  performanceTracker.logSummary();
+
+  t.end();
+});
+
+tap.start();