update
This commit is contained in:
297
test/suite/einvoice_format-detection/test.fd-10.mixed-formats.ts
Normal file
297
test/suite/einvoice_format-detection/test.fd-10.mixed-formats.ts
Normal file
@ -0,0 +1,297 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
||||
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
||||
|
||||
tap.test('FD-10: Mixed Format Detection - should correctly identify formats across different categories', async () => {
|
||||
// Get samples from multiple format categories
|
||||
const formatCategories = [
|
||||
{ name: 'CII XML-Rechnung', category: 'CII_XMLRECHNUNG' as const, expectedFormats: ['cii', 'xrechnung', 'facturx'] },
|
||||
{ name: 'UBL XML-Rechnung', category: 'UBL_XMLRECHNUNG' as const, expectedFormats: ['ubl', 'xrechnung'] },
|
||||
{ name: 'EN16931 CII', category: 'EN16931_CII' as const, expectedFormats: ['cii', 'facturx'] },
|
||||
{ name: 'EN16931 UBL', category: 'EN16931_UBL_EXAMPLES' as const, expectedFormats: ['ubl', 'xrechnung'] }
|
||||
];
|
||||
|
||||
console.log('Testing mixed format detection across multiple categories');
|
||||
|
||||
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||||
|
||||
const results: { category: string; correct: number; total: number; formats: Record<string, number> }[] = [];
|
||||
|
||||
for (const category of formatCategories) {
|
||||
try {
|
||||
const files = await CorpusLoader.getFiles(category.category);
|
||||
const xmlFiles = files.filter(f => f.endsWith('.xml')).slice(0, 3); // Test 3 per category
|
||||
|
||||
if (xmlFiles.length === 0) {
|
||||
console.log(`No XML files found in ${category.name}, skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const categoryResult = {
|
||||
category: category.name,
|
||||
correct: 0,
|
||||
total: xmlFiles.length,
|
||||
formats: {} as Record<string, number>
|
||||
};
|
||||
|
||||
console.log(`\nTesting ${category.name} (${xmlFiles.length} files)`);
|
||||
|
||||
for (const filePath of xmlFiles) {
|
||||
const fileName = path.basename(filePath);
|
||||
|
||||
try {
|
||||
const xmlContent = await fs.readFile(filePath, 'utf-8');
|
||||
|
||||
const { result: format } = await PerformanceTracker.track(
|
||||
'mixed-format-detection',
|
||||
async () => FormatDetector.detectFormat(xmlContent),
|
||||
{ category: category.name, file: fileName }
|
||||
);
|
||||
|
||||
const formatStr = format.toString().toLowerCase();
|
||||
categoryResult.formats[formatStr] = (categoryResult.formats[formatStr] || 0) + 1;
|
||||
|
||||
// Check if detected format matches expected formats for this category
|
||||
const isCorrect = category.expectedFormats.some(expected =>
|
||||
formatStr.includes(expected.toLowerCase())
|
||||
);
|
||||
|
||||
if (isCorrect) {
|
||||
categoryResult.correct++;
|
||||
console.log(` ✓ ${fileName}: ${format} (expected for ${category.name})`);
|
||||
} else {
|
||||
console.log(` ○ ${fileName}: ${format} (unexpected for ${category.name})`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ✗ ${fileName}: Error - ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const accuracy = (categoryResult.correct / categoryResult.total * 100).toFixed(1);
|
||||
console.log(` Accuracy: ${categoryResult.correct}/${categoryResult.total} (${accuracy}%)`);
|
||||
console.log(` Detected formats:`, categoryResult.formats);
|
||||
|
||||
results.push(categoryResult);
|
||||
|
||||
} catch (error) {
|
||||
console.log(`Error testing ${category.name}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Overall summary
|
||||
console.log('\nMixed Format Detection Summary:');
|
||||
let totalCorrect = 0;
|
||||
let totalFiles = 0;
|
||||
|
||||
results.forEach(result => {
|
||||
totalCorrect += result.correct;
|
||||
totalFiles += result.total;
|
||||
console.log(` ${result.category}: ${result.correct}/${result.total} (${(result.correct/result.total*100).toFixed(1)}%)`);
|
||||
});
|
||||
|
||||
if (totalFiles > 0) {
|
||||
const overallAccuracy = (totalCorrect / totalFiles * 100).toFixed(1);
|
||||
console.log(` Overall: ${totalCorrect}/${totalFiles} (${overallAccuracy}%)`);
|
||||
|
||||
// Expect reasonable accuracy across mixed formats
|
||||
expect(totalCorrect / totalFiles).toBeGreaterThan(0.7);
|
||||
}
|
||||
|
||||
// Performance summary
|
||||
const perfSummary = await PerformanceTracker.getSummary('mixed-format-detection');
|
||||
if (perfSummary) {
|
||||
console.log(`\nMixed Format Detection Performance:`);
|
||||
console.log(` Average: ${perfSummary.average.toFixed(2)}ms`);
|
||||
console.log(` P95: ${perfSummary.p95.toFixed(2)}ms`);
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('FD-10: Format Ambiguity Resolution - should handle ambiguous cases correctly', async () => {
|
||||
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||||
|
||||
const ambiguousTests = [
|
||||
{
|
||||
name: 'UBL with XRechnung CustomizationID',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0</cbc:CustomizationID>
|
||||
<cbc:ID>AMBIG-001</cbc:ID>
|
||||
</Invoice>`,
|
||||
expectedPriority: ['xrechnung', 'ubl'], // XRechnung should take priority over generic UBL
|
||||
description: 'Should prioritize XRechnung over UBL when CustomizationID is present'
|
||||
},
|
||||
{
|
||||
name: 'CII with Factur-X profile',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
|
||||
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
|
||||
<rsm:ExchangedDocumentContext>
|
||||
<ram:GuidelineSpecifiedDocumentContextParameter>
|
||||
<ram:ID>urn:cen.eu:en16931:2017#compliant#urn:factur-x.eu:1p0:basic</ram:ID>
|
||||
</ram:GuidelineSpecifiedDocumentContextParameter>
|
||||
</rsm:ExchangedDocumentContext>
|
||||
</rsm:CrossIndustryInvoice>`,
|
||||
expectedPriority: ['facturx', 'cii'], // Factur-X should take priority over generic CII
|
||||
description: 'Should prioritize Factur-X over CII when profile is present'
|
||||
},
|
||||
{
|
||||
name: 'Generic UBL without customization',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||||
<ID>GENERIC-001</ID>
|
||||
</Invoice>`,
|
||||
expectedPriority: ['ubl'],
|
||||
description: 'Should detect as generic UBL without specific customization'
|
||||
}
|
||||
];
|
||||
|
||||
for (const test of ambiguousTests) {
|
||||
const { result: format } = await PerformanceTracker.track(
|
||||
'ambiguity-resolution-test',
|
||||
async () => FormatDetector.detectFormat(test.xml)
|
||||
);
|
||||
|
||||
console.log(`\n${test.name}:`);
|
||||
console.log(` Description: ${test.description}`);
|
||||
console.log(` Detected: ${format}`);
|
||||
|
||||
const formatStr = format.toString().toLowerCase();
|
||||
const matchesPriority = test.expectedPriority.some(expected =>
|
||||
formatStr.includes(expected)
|
||||
);
|
||||
|
||||
if (matchesPriority) {
|
||||
const primaryMatch = test.expectedPriority.find(expected =>
|
||||
formatStr.includes(expected)
|
||||
);
|
||||
console.log(` ✓ Correctly prioritized ${primaryMatch}`);
|
||||
} else {
|
||||
console.log(` ○ Expected one of: ${test.expectedPriority.join(', ')}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('FD-10: Format Detection Consistency - should produce consistent results', async () => {
|
||||
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||||
|
||||
// Test the same XML multiple times to ensure consistency
|
||||
const testXml = `<?xml version="1.0"?>
|
||||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>CONSISTENCY-TEST</cbc:ID>
|
||||
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
||||
</Invoice>`;
|
||||
|
||||
console.log('Testing format detection consistency (10 iterations)');
|
||||
|
||||
const detectedFormats: string[] = [];
|
||||
const times: number[] = [];
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const { result: format, metric } = await PerformanceTracker.track(
|
||||
'consistency-test',
|
||||
async () => FormatDetector.detectFormat(testXml)
|
||||
);
|
||||
|
||||
detectedFormats.push(format.toString());
|
||||
times.push(metric.duration);
|
||||
}
|
||||
|
||||
// Check consistency
|
||||
const uniqueFormats = [...new Set(detectedFormats)];
|
||||
console.log(`Detected formats: ${uniqueFormats.join(', ')}`);
|
||||
console.log(`Consistency: ${uniqueFormats.length === 1 ? 'CONSISTENT' : 'INCONSISTENT'}`);
|
||||
|
||||
expect(uniqueFormats.length).toEqual(1); // Should always detect the same format
|
||||
|
||||
// Check performance consistency
|
||||
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
||||
const maxTime = Math.max(...times);
|
||||
const minTime = Math.min(...times);
|
||||
const variance = maxTime - minTime;
|
||||
|
||||
console.log(`Performance: avg ${avgTime.toFixed(2)}ms, range ${minTime.toFixed(2)}-${maxTime.toFixed(2)}ms`);
|
||||
console.log(`Variance: ${variance.toFixed(2)}ms`);
|
||||
|
||||
// Performance should be relatively stable
|
||||
expect(variance).toBeLessThan(avgTime * 2); // Variance shouldn't exceed 2x average
|
||||
});
|
||||
|
||||
tap.test('FD-10: Complex Document Structure - should handle complex nested structures', async () => {
|
||||
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||||
|
||||
const complexXml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xoev-de:kosit:standard:xrechnung_3.0</cbc:CustomizationID>
|
||||
<cbc:ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</cbc:ProfileID>
|
||||
<cbc:ID>COMPLEX-001</cbc:ID>
|
||||
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
||||
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
|
||||
|
||||
<cac:AccountingSupplierParty>
|
||||
<cac:Party>
|
||||
<cac:PartyName>
|
||||
<cbc:Name>Complex Seller GmbH</cbc:Name>
|
||||
</cac:PartyName>
|
||||
<cac:PostalAddress>
|
||||
<cbc:StreetName>Musterstraße</cbc:StreetName>
|
||||
<cbc:CityName>Berlin</cbc:CityName>
|
||||
<cbc:PostalZone>10115</cbc:PostalZone>
|
||||
<cac:Country>
|
||||
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
|
||||
</cac:Country>
|
||||
</cac:PostalAddress>
|
||||
<cac:PartyTaxScheme>
|
||||
<cbc:CompanyID>DE123456789</cbc:CompanyID>
|
||||
<cac:TaxScheme>
|
||||
<cbc:ID>VAT</cbc:ID>
|
||||
</cac:TaxScheme>
|
||||
</cac:PartyTaxScheme>
|
||||
</cac:Party>
|
||||
</cac:AccountingSupplierParty>
|
||||
|
||||
<cac:InvoiceLine>
|
||||
<cbc:ID>1</cbc:ID>
|
||||
<cbc:InvoicedQuantity unitCode="EA">10</cbc:InvoicedQuantity>
|
||||
<cbc:LineExtensionAmount currencyID="EUR">1000.00</cbc:LineExtensionAmount>
|
||||
<cac:Item>
|
||||
<cbc:Name>Complex Product</cbc:Name>
|
||||
<cac:ClassifiedTaxCategory>
|
||||
<cbc:ID>S</cbc:ID>
|
||||
<cbc:Percent>19</cbc:Percent>
|
||||
<cac:TaxScheme>
|
||||
<cbc:ID>VAT</cbc:ID>
|
||||
</cac:TaxScheme>
|
||||
</cac:ClassifiedTaxCategory>
|
||||
</cac:Item>
|
||||
</cac:InvoiceLine>
|
||||
</Invoice>`;
|
||||
|
||||
console.log('Testing complex document structure detection');
|
||||
|
||||
const { result: format, metric } = await PerformanceTracker.track(
|
||||
'complex-structure-detection',
|
||||
async () => FormatDetector.detectFormat(complexXml),
|
||||
{ complexity: 'high', elements: complexXml.split('<').length }
|
||||
);
|
||||
|
||||
console.log(`Complex document detected as: ${format}`);
|
||||
console.log(`Detection time: ${metric.duration.toFixed(2)}ms`);
|
||||
console.log(`Document size: ${complexXml.length} bytes`);
|
||||
|
||||
// Should still detect correctly despite complexity
|
||||
const formatStr = format.toString().toLowerCase();
|
||||
const isValidFormat = formatStr.includes('xrechnung') || formatStr.includes('ubl');
|
||||
expect(isValidFormat).toEqual(true);
|
||||
|
||||
// Should still be fast despite complexity
|
||||
expect(metric.duration).toBeLessThan(20); // Should be under 20ms even for complex docs
|
||||
});
|
||||
|
||||
tap.start();
|
Reference in New Issue
Block a user