2025-05-25 19:45:37 +00:00
|
|
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
|
|
import { promises as fs } from 'fs';
|
|
|
|
import * as path from 'path';
|
|
|
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
|
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|
|
|
|
|
|
|
tap.test('FD-10: Mixed Format Detection - should correctly identify formats across different categories', async () => {
|
|
|
|
// Get samples from multiple format categories
|
|
|
|
const formatCategories = [
|
|
|
|
{ name: 'CII XML-Rechnung', category: 'CII_XMLRECHNUNG' as const, expectedFormats: ['cii', 'xrechnung', 'facturx'] },
|
|
|
|
{ name: 'UBL XML-Rechnung', category: 'UBL_XMLRECHNUNG' as const, expectedFormats: ['ubl', 'xrechnung'] },
|
2025-05-30 18:18:42 +00:00
|
|
|
{ name: 'EN16931 CII', category: 'EN16931_CII' as const, expectedFormats: ['cii', 'facturx', 'zugferd'] }, // ZUGFeRD v1 files are valid here
|
|
|
|
{ name: 'EN16931 UBL', category: 'EN16931_UBL_EXAMPLES' as const, expectedFormats: ['ubl', 'xrechnung', 'fatturapa'] } // Some examples might be FatturaPA
|
2025-05-25 19:45:37 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
console.log('Testing mixed format detection across multiple categories');
|
|
|
|
|
|
|
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
|
|
|
|
|
|
|
const results: { category: string; correct: number; total: number; formats: Record<string, number> }[] = [];
|
|
|
|
|
|
|
|
for (const category of formatCategories) {
|
|
|
|
try {
|
|
|
|
const files = await CorpusLoader.getFiles(category.category);
|
|
|
|
const xmlFiles = files.filter(f => f.endsWith('.xml')).slice(0, 3); // Test 3 per category
|
|
|
|
|
|
|
|
if (xmlFiles.length === 0) {
|
|
|
|
console.log(`No XML files found in ${category.name}, skipping`);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const categoryResult = {
|
|
|
|
category: category.name,
|
|
|
|
correct: 0,
|
|
|
|
total: xmlFiles.length,
|
|
|
|
formats: {} as Record<string, number>
|
|
|
|
};
|
|
|
|
|
|
|
|
console.log(`\nTesting ${category.name} (${xmlFiles.length} files)`);
|
|
|
|
|
|
|
|
for (const filePath of xmlFiles) {
|
|
|
|
const fileName = path.basename(filePath);
|
|
|
|
|
|
|
|
try {
|
|
|
|
const xmlContent = await fs.readFile(filePath, 'utf-8');
|
|
|
|
|
|
|
|
const { result: format } = await PerformanceTracker.track(
|
|
|
|
'mixed-format-detection',
|
|
|
|
async () => FormatDetector.detectFormat(xmlContent),
|
|
|
|
{ category: category.name, file: fileName }
|
|
|
|
);
|
|
|
|
|
|
|
|
const formatStr = format.toString().toLowerCase();
|
|
|
|
categoryResult.formats[formatStr] = (categoryResult.formats[formatStr] || 0) + 1;
|
|
|
|
|
|
|
|
// Check if detected format matches expected formats for this category
|
|
|
|
const isCorrect = category.expectedFormats.some(expected =>
|
|
|
|
formatStr.includes(expected.toLowerCase())
|
|
|
|
);
|
|
|
|
|
|
|
|
if (isCorrect) {
|
|
|
|
categoryResult.correct++;
|
|
|
|
console.log(` ✓ ${fileName}: ${format} (expected for ${category.name})`);
|
|
|
|
} else {
|
|
|
|
console.log(` ○ ${fileName}: ${format} (unexpected for ${category.name})`);
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(` ✗ ${fileName}: Error - ${error.message}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const accuracy = (categoryResult.correct / categoryResult.total * 100).toFixed(1);
|
|
|
|
console.log(` Accuracy: ${categoryResult.correct}/${categoryResult.total} (${accuracy}%)`);
|
|
|
|
console.log(` Detected formats:`, categoryResult.formats);
|
|
|
|
|
|
|
|
results.push(categoryResult);
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(`Error testing ${category.name}: ${error.message}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Overall summary
|
|
|
|
console.log('\nMixed Format Detection Summary:');
|
|
|
|
let totalCorrect = 0;
|
|
|
|
let totalFiles = 0;
|
|
|
|
|
|
|
|
results.forEach(result => {
|
|
|
|
totalCorrect += result.correct;
|
|
|
|
totalFiles += result.total;
|
|
|
|
console.log(` ${result.category}: ${result.correct}/${result.total} (${(result.correct/result.total*100).toFixed(1)}%)`);
|
|
|
|
});
|
|
|
|
|
|
|
|
if (totalFiles > 0) {
|
|
|
|
const overallAccuracy = (totalCorrect / totalFiles * 100).toFixed(1);
|
|
|
|
console.log(` Overall: ${totalCorrect}/${totalFiles} (${overallAccuracy}%)`);
|
|
|
|
|
|
|
|
// Expect reasonable accuracy across mixed formats
|
|
|
|
expect(totalCorrect / totalFiles).toBeGreaterThan(0.7);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Performance summary
|
|
|
|
const perfSummary = await PerformanceTracker.getSummary('mixed-format-detection');
|
|
|
|
if (perfSummary) {
|
|
|
|
console.log(`\nMixed Format Detection Performance:`);
|
|
|
|
console.log(` Average: ${perfSummary.average.toFixed(2)}ms`);
|
|
|
|
console.log(` P95: ${perfSummary.p95.toFixed(2)}ms`);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('FD-10: Format Ambiguity Resolution - should handle ambiguous cases correctly', async () => {
|
|
|
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
|
|
|
|
|
|
|
const ambiguousTests = [
|
|
|
|
{
|
|
|
|
name: 'UBL with XRechnung CustomizationID',
|
|
|
|
xml: `<?xml version="1.0"?>
|
|
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
|
|
|
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0</cbc:CustomizationID>
|
|
|
|
<cbc:ID>AMBIG-001</cbc:ID>
|
|
|
|
</Invoice>`,
|
|
|
|
expectedPriority: ['xrechnung', 'ubl'], // XRechnung should take priority over generic UBL
|
|
|
|
description: 'Should prioritize XRechnung over UBL when CustomizationID is present'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'CII with Factur-X profile',
|
|
|
|
xml: `<?xml version="1.0"?>
|
|
|
|
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
|
|
|
|
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
|
|
|
|
<rsm:ExchangedDocumentContext>
|
|
|
|
<ram:GuidelineSpecifiedDocumentContextParameter>
|
|
|
|
<ram:ID>urn:cen.eu:en16931:2017#compliant#urn:factur-x.eu:1p0:basic</ram:ID>
|
|
|
|
</ram:GuidelineSpecifiedDocumentContextParameter>
|
|
|
|
</rsm:ExchangedDocumentContext>
|
|
|
|
</rsm:CrossIndustryInvoice>`,
|
|
|
|
expectedPriority: ['facturx', 'cii'], // Factur-X should take priority over generic CII
|
|
|
|
description: 'Should prioritize Factur-X over CII when profile is present'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'Generic UBL without customization',
|
|
|
|
xml: `<?xml version="1.0"?>
|
|
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
|
|
<ID>GENERIC-001</ID>
|
|
|
|
</Invoice>`,
|
|
|
|
expectedPriority: ['ubl'],
|
|
|
|
description: 'Should detect as generic UBL without specific customization'
|
|
|
|
}
|
|
|
|
];
|
|
|
|
|
|
|
|
for (const test of ambiguousTests) {
|
|
|
|
const { result: format } = await PerformanceTracker.track(
|
|
|
|
'ambiguity-resolution-test',
|
|
|
|
async () => FormatDetector.detectFormat(test.xml)
|
|
|
|
);
|
|
|
|
|
|
|
|
console.log(`\n${test.name}:`);
|
|
|
|
console.log(` Description: ${test.description}`);
|
|
|
|
console.log(` Detected: ${format}`);
|
|
|
|
|
|
|
|
const formatStr = format.toString().toLowerCase();
|
|
|
|
const matchesPriority = test.expectedPriority.some(expected =>
|
|
|
|
formatStr.includes(expected)
|
|
|
|
);
|
|
|
|
|
|
|
|
if (matchesPriority) {
|
|
|
|
const primaryMatch = test.expectedPriority.find(expected =>
|
|
|
|
formatStr.includes(expected)
|
|
|
|
);
|
|
|
|
console.log(` ✓ Correctly prioritized ${primaryMatch}`);
|
|
|
|
} else {
|
|
|
|
console.log(` ○ Expected one of: ${test.expectedPriority.join(', ')}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('FD-10: Format Detection Consistency - should produce consistent results', async () => {
|
|
|
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
|
|
|
|
|
|
|
// Test the same XML multiple times to ensure consistency
|
|
|
|
const testXml = `<?xml version="1.0"?>
|
|
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
|
|
|
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
<cbc:ID>CONSISTENCY-TEST</cbc:ID>
|
|
|
|
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
|
|
|
</Invoice>`;
|
|
|
|
|
|
|
|
console.log('Testing format detection consistency (10 iterations)');
|
|
|
|
|
|
|
|
const detectedFormats: string[] = [];
|
|
|
|
const times: number[] = [];
|
|
|
|
|
|
|
|
for (let i = 0; i < 10; i++) {
|
|
|
|
const { result: format, metric } = await PerformanceTracker.track(
|
|
|
|
'consistency-test',
|
|
|
|
async () => FormatDetector.detectFormat(testXml)
|
|
|
|
);
|
|
|
|
|
|
|
|
detectedFormats.push(format.toString());
|
|
|
|
times.push(metric.duration);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check consistency
|
|
|
|
const uniqueFormats = [...new Set(detectedFormats)];
|
|
|
|
console.log(`Detected formats: ${uniqueFormats.join(', ')}`);
|
|
|
|
console.log(`Consistency: ${uniqueFormats.length === 1 ? 'CONSISTENT' : 'INCONSISTENT'}`);
|
|
|
|
|
|
|
|
expect(uniqueFormats.length).toEqual(1); // Should always detect the same format
|
|
|
|
|
|
|
|
// Check performance consistency
|
|
|
|
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
|
|
|
const maxTime = Math.max(...times);
|
|
|
|
const minTime = Math.min(...times);
|
|
|
|
const variance = maxTime - minTime;
|
|
|
|
|
|
|
|
console.log(`Performance: avg ${avgTime.toFixed(2)}ms, range ${minTime.toFixed(2)}-${maxTime.toFixed(2)}ms`);
|
|
|
|
console.log(`Variance: ${variance.toFixed(2)}ms`);
|
|
|
|
|
|
|
|
// Performance should be relatively stable
|
2025-05-30 18:18:42 +00:00
|
|
|
// Allow for some variation in timing due to system load
|
|
|
|
expect(variance).toBeLessThan(Math.max(avgTime * 3, 0.5)); // Variance shouldn't exceed 3x average or 0.5ms
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('FD-10: Complex Document Structure - should handle complex nested structures', async () => {
|
|
|
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
|
|
|
|
|
|
|
const complexXml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
|
|
|
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
|
|
|
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0</cbc:CustomizationID>
|
|
|
|
<cbc:ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</cbc:ProfileID>
|
|
|
|
<cbc:ID>COMPLEX-001</cbc:ID>
|
|
|
|
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
|
|
|
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
|
|
|
|
|
|
|
|
<cac:AccountingSupplierParty>
|
|
|
|
<cac:Party>
|
|
|
|
<cac:PartyName>
|
|
|
|
<cbc:Name>Complex Seller GmbH</cbc:Name>
|
|
|
|
</cac:PartyName>
|
|
|
|
<cac:PostalAddress>
|
|
|
|
<cbc:StreetName>Musterstraße</cbc:StreetName>
|
|
|
|
<cbc:CityName>Berlin</cbc:CityName>
|
|
|
|
<cbc:PostalZone>10115</cbc:PostalZone>
|
|
|
|
<cac:Country>
|
|
|
|
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
|
|
|
|
</cac:Country>
|
|
|
|
</cac:PostalAddress>
|
|
|
|
<cac:PartyTaxScheme>
|
|
|
|
<cbc:CompanyID>DE123456789</cbc:CompanyID>
|
|
|
|
<cac:TaxScheme>
|
|
|
|
<cbc:ID>VAT</cbc:ID>
|
|
|
|
</cac:TaxScheme>
|
|
|
|
</cac:PartyTaxScheme>
|
|
|
|
</cac:Party>
|
|
|
|
</cac:AccountingSupplierParty>
|
|
|
|
|
|
|
|
<cac:InvoiceLine>
|
|
|
|
<cbc:ID>1</cbc:ID>
|
|
|
|
<cbc:InvoicedQuantity unitCode="EA">10</cbc:InvoicedQuantity>
|
|
|
|
<cbc:LineExtensionAmount currencyID="EUR">1000.00</cbc:LineExtensionAmount>
|
|
|
|
<cac:Item>
|
|
|
|
<cbc:Name>Complex Product</cbc:Name>
|
|
|
|
<cac:ClassifiedTaxCategory>
|
|
|
|
<cbc:ID>S</cbc:ID>
|
|
|
|
<cbc:Percent>19</cbc:Percent>
|
|
|
|
<cac:TaxScheme>
|
|
|
|
<cbc:ID>VAT</cbc:ID>
|
|
|
|
</cac:TaxScheme>
|
|
|
|
</cac:ClassifiedTaxCategory>
|
|
|
|
</cac:Item>
|
|
|
|
</cac:InvoiceLine>
|
|
|
|
</Invoice>`;
|
|
|
|
|
|
|
|
console.log('Testing complex document structure detection');
|
|
|
|
|
|
|
|
const { result: format, metric } = await PerformanceTracker.track(
|
|
|
|
'complex-structure-detection',
|
|
|
|
async () => FormatDetector.detectFormat(complexXml),
|
|
|
|
{ complexity: 'high', elements: complexXml.split('<').length }
|
|
|
|
);
|
|
|
|
|
|
|
|
console.log(`Complex document detected as: ${format}`);
|
|
|
|
console.log(`Detection time: ${metric.duration.toFixed(2)}ms`);
|
|
|
|
console.log(`Document size: ${complexXml.length} bytes`);
|
|
|
|
|
|
|
|
// Should still detect correctly despite complexity
|
|
|
|
const formatStr = format.toString().toLowerCase();
|
|
|
|
const isValidFormat = formatStr.includes('xrechnung') || formatStr.includes('ubl');
|
|
|
|
expect(isValidFormat).toEqual(true);
|
|
|
|
|
|
|
|
// Should still be fast despite complexity
|
|
|
|
expect(metric.duration).toBeLessThan(20); // Should be under 20ms even for complex docs
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.start();
|