import { expect, tap } from '@git.zone/tstest/tapbundle'; import { promises as fs } from 'fs'; import * as path from 'path'; import { CorpusLoader } from '../../helpers/corpus.loader.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; tap.test('FD-10: Mixed Format Detection - should correctly identify formats across different categories', async () => { // Get samples from multiple format categories const formatCategories = [ { name: 'CII XML-Rechnung', category: 'CII_XMLRECHNUNG' as const, expectedFormats: ['cii', 'xrechnung', 'facturx'] }, { name: 'UBL XML-Rechnung', category: 'UBL_XMLRECHNUNG' as const, expectedFormats: ['ubl', 'xrechnung'] }, { name: 'EN16931 CII', category: 'EN16931_CII' as const, expectedFormats: ['cii', 'facturx', 'zugferd'] }, // ZUGFeRD v1 files are valid here { name: 'EN16931 UBL', category: 'EN16931_UBL_EXAMPLES' as const, expectedFormats: ['ubl', 'xrechnung', 'fatturapa'] } // Some examples might be FatturaPA ]; console.log('Testing mixed format detection across multiple categories'); const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js'); const results: { category: string; correct: number; total: number; formats: Record }[] = []; for (const category of formatCategories) { try { const files = await CorpusLoader.getFiles(category.category); const xmlFiles = files.filter(f => f.endsWith('.xml')).slice(0, 3); // Test 3 per category if (xmlFiles.length === 0) { console.log(`No XML files found in ${category.name}, skipping`); continue; } const categoryResult = { category: category.name, correct: 0, total: xmlFiles.length, formats: {} as Record }; console.log(`\nTesting ${category.name} (${xmlFiles.length} files)`); for (const filePath of xmlFiles) { const fileName = path.basename(filePath); try { const xmlContent = await fs.readFile(filePath, 'utf-8'); const { result: format } = await PerformanceTracker.track( 'mixed-format-detection', async () => FormatDetector.detectFormat(xmlContent), { category: category.name, file: fileName } ); const formatStr = format.toString().toLowerCase(); categoryResult.formats[formatStr] = (categoryResult.formats[formatStr] || 0) + 1; // Check if detected format matches expected formats for this category const isCorrect = category.expectedFormats.some(expected => formatStr.includes(expected.toLowerCase()) ); if (isCorrect) { categoryResult.correct++; console.log(` ✓ ${fileName}: ${format} (expected for ${category.name})`); } else { console.log(` ○ ${fileName}: ${format} (unexpected for ${category.name})`); } } catch (error) { console.log(` ✗ ${fileName}: Error - ${error.message}`); } } const accuracy = (categoryResult.correct / categoryResult.total * 100).toFixed(1); console.log(` Accuracy: ${categoryResult.correct}/${categoryResult.total} (${accuracy}%)`); console.log(` Detected formats:`, categoryResult.formats); results.push(categoryResult); } catch (error) { console.log(`Error testing ${category.name}: ${error.message}`); } } // Overall summary console.log('\nMixed Format Detection Summary:'); let totalCorrect = 0; let totalFiles = 0; results.forEach(result => { totalCorrect += result.correct; totalFiles += result.total; console.log(` ${result.category}: ${result.correct}/${result.total} (${(result.correct/result.total*100).toFixed(1)}%)`); }); if (totalFiles > 0) { const overallAccuracy = (totalCorrect / totalFiles * 100).toFixed(1); console.log(` Overall: ${totalCorrect}/${totalFiles} (${overallAccuracy}%)`); // Expect reasonable accuracy across mixed formats expect(totalCorrect / totalFiles).toBeGreaterThan(0.7); } // Performance summary const perfSummary = await PerformanceTracker.getSummary('mixed-format-detection'); if (perfSummary) { console.log(`\nMixed Format Detection Performance:`); console.log(` Average: ${perfSummary.average.toFixed(2)}ms`); console.log(` P95: ${perfSummary.p95.toFixed(2)}ms`); } }); tap.test('FD-10: Format Ambiguity Resolution - should handle ambiguous cases correctly', async () => { const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js'); const ambiguousTests = [ { name: 'UBL with XRechnung CustomizationID', xml: ` urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0 AMBIG-001 `, expectedPriority: ['xrechnung', 'ubl'], // XRechnung should take priority over generic UBL description: 'Should prioritize XRechnung over UBL when CustomizationID is present' }, { name: 'CII with Factur-X profile', xml: ` urn:cen.eu:en16931:2017#compliant#urn:factur-x.eu:1p0:basic `, expectedPriority: ['facturx', 'cii'], // Factur-X should take priority over generic CII description: 'Should prioritize Factur-X over CII when profile is present' }, { name: 'Generic UBL without customization', xml: ` GENERIC-001 `, expectedPriority: ['ubl'], description: 'Should detect as generic UBL without specific customization' } ]; for (const test of ambiguousTests) { const { result: format } = await PerformanceTracker.track( 'ambiguity-resolution-test', async () => FormatDetector.detectFormat(test.xml) ); console.log(`\n${test.name}:`); console.log(` Description: ${test.description}`); console.log(` Detected: ${format}`); const formatStr = format.toString().toLowerCase(); const matchesPriority = test.expectedPriority.some(expected => formatStr.includes(expected) ); if (matchesPriority) { const primaryMatch = test.expectedPriority.find(expected => formatStr.includes(expected) ); console.log(` ✓ Correctly prioritized ${primaryMatch}`); } else { console.log(` ○ Expected one of: ${test.expectedPriority.join(', ')}`); } } }); tap.test('FD-10: Format Detection Consistency - should produce consistent results', async () => { const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js'); // Test the same XML multiple times to ensure consistency const testXml = ` CONSISTENCY-TEST 2024-01-01 `; console.log('Testing format detection consistency (10 iterations)'); const detectedFormats: string[] = []; const times: number[] = []; for (let i = 0; i < 10; i++) { const { result: format, metric } = await PerformanceTracker.track( 'consistency-test', async () => FormatDetector.detectFormat(testXml) ); detectedFormats.push(format.toString()); times.push(metric.duration); } // Check consistency const uniqueFormats = [...new Set(detectedFormats)]; console.log(`Detected formats: ${uniqueFormats.join(', ')}`); console.log(`Consistency: ${uniqueFormats.length === 1 ? 'CONSISTENT' : 'INCONSISTENT'}`); expect(uniqueFormats.length).toEqual(1); // Should always detect the same format // Check performance consistency const avgTime = times.reduce((a, b) => a + b, 0) / times.length; const maxTime = Math.max(...times); const minTime = Math.min(...times); const variance = maxTime - minTime; console.log(`Performance: avg ${avgTime.toFixed(2)}ms, range ${minTime.toFixed(2)}-${maxTime.toFixed(2)}ms`); console.log(`Variance: ${variance.toFixed(2)}ms`); // Performance should be relatively stable // Allow for some variation in timing due to system load expect(variance).toBeLessThan(Math.max(avgTime * 3, 0.5)); // Variance shouldn't exceed 3x average or 0.5ms }); tap.test('FD-10: Complex Document Structure - should handle complex nested structures', async () => { const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js'); const complexXml = ` urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0 urn:fdc:peppol.eu:2017:poacc:billing:01:1.0 COMPLEX-001 2024-01-01 EUR Complex Seller GmbH Musterstraße Berlin 10115 DE DE123456789 VAT 1 10 1000.00 Complex Product S 19 VAT `; console.log('Testing complex document structure detection'); const { result: format, metric } = await PerformanceTracker.track( 'complex-structure-detection', async () => FormatDetector.detectFormat(complexXml), { complexity: 'high', elements: complexXml.split('<').length } ); console.log(`Complex document detected as: ${format}`); console.log(`Detection time: ${metric.duration.toFixed(2)}ms`); console.log(`Document size: ${complexXml.length} bytes`); // Should still detect correctly despite complexity const formatStr = format.toString().toLowerCase(); const isValidFormat = formatStr.includes('xrechnung') || formatStr.includes('ubl'); expect(isValidFormat).toEqual(true); // Should still be fast despite complexity expect(metric.duration).toBeLessThan(20); // Should be under 20ms even for complex docs }); tap.start();