einvoice/test/suite/einvoice_format-detection/test.fd-07.edge-cases.ts
2025-05-28 08:40:26 +00:00

253 lines
8.6 KiB
TypeScript

import { expect, tap } from '@git.zone/tstest/tapbundle';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
tap.test('FD-07: Edge Cases - should handle malformed and edge case inputs', async () => {
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
// Test empty input
const { result: emptyFormat } = await PerformanceTracker.track(
'edge-case-detection',
async () => FormatDetector.detectFormat('')
);
console.log(`Empty string: ${emptyFormat}`);
expect(emptyFormat.toString().toLowerCase()).toEqual('unknown');
// Test non-XML content
const { result: textFormat } = await PerformanceTracker.track(
'edge-case-detection',
async () => FormatDetector.detectFormat('This is not XML content')
);
console.log(`Non-XML text: ${textFormat}`);
expect(textFormat.toString().toLowerCase()).toEqual('unknown');
// Test minimal XML
const { result: minimalFormat } = await PerformanceTracker.track(
'edge-case-detection',
async () => FormatDetector.detectFormat('<?xml version="1.0"?><root></root>')
);
console.log(`Minimal XML: ${minimalFormat}`);
expect(minimalFormat.toString().toLowerCase()).toEqual('unknown');
// Test with BOM
const bomXml = '\ufeff<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"></Invoice>';
const { result: bomFormat } = await PerformanceTracker.track(
'edge-case-detection',
async () => FormatDetector.detectFormat(bomXml)
);
console.log(`XML with BOM: ${bomFormat}`);
expect(bomFormat.toString().toLowerCase()).toEqual('ubl');
// Test malformed XML
// Note: xmldom parser is lenient and can handle unclosed tags with warnings
// The format detector will still identify it as UBL based on the Invoice element
// The malformed XML would fail during actual parsing/validation
const malformedXml = '<?xml version="1.0"?><Invoice><unclosed>';
const { result: malformedFormat } = await PerformanceTracker.track(
'edge-case-detection',
async () => FormatDetector.detectFormat(malformedXml)
);
console.log(`Malformed XML: ${malformedFormat}`);
// xmldom is lenient with malformed XML, so it still detects the format
expect(malformedFormat.toString().toLowerCase()).toEqual('ubl');
});
tap.test('FD-07: Encoding Handling - should handle different character encodings', async () => {
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
const encodingTests = [
{
name: 'UTF-8 with special characters',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>Tëst-Invöice-001</ID>
<Note>Spëcial châractërs: àáâãäåæçèéêë</Note>
</Invoice>`,
expectedFormat: 'ubl'
},
{
name: 'ISO-8859-1 encoding declaration',
xml: `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>Test-001</ID>
</Invoice>`,
expectedFormat: 'ubl'
},
{
name: 'No encoding declaration',
xml: `<?xml version="1.0"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>Test-002</ID>
</Invoice>`,
expectedFormat: 'ubl'
}
];
for (const test of encodingTests) {
const { result: format } = await PerformanceTracker.track(
'encoding-detection',
async () => FormatDetector.detectFormat(test.xml)
);
console.log(`${test.name}: ${format}`);
expect(format.toString().toLowerCase()).toEqual(test.expectedFormat);
}
});
tap.test('FD-07: Namespace Variations - should handle different namespace patterns', async () => {
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
const namespaceTests = [
{
name: 'UBL with default namespace',
xml: `<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>UBL-001</ID>
</Invoice>`,
expectedFormat: 'ubl'
},
{
name: 'UBL with prefixed namespace',
xml: `<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ubl:ID>UBL-002</ubl:ID>
</ubl:Invoice>`,
expectedFormat: 'ubl'
},
{
name: 'CII with default namespace',
xml: `<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<rsm:ExchangedDocument/>
</rsm:CrossIndustryInvoice>`,
expectedFormat: 'cii'
},
{
name: 'Mixed namespace prefixes',
xml: `<inv:Invoice xmlns:inv="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<inv:ID>MIX-001</inv:ID>
</inv:Invoice>`,
expectedFormat: 'ubl'
}
];
for (const test of namespaceTests) {
const { result: format } = await PerformanceTracker.track(
'namespace-variation-detection',
async () => FormatDetector.detectFormat(test.xml)
);
console.log(`${test.name}: ${format}`);
const formatStr = format.toString().toLowerCase();
const isExpectedFormat = formatStr.includes(test.expectedFormat) ||
(test.expectedFormat === 'cii' && formatStr.includes('cii'));
expect(isExpectedFormat).toEqual(true);
}
});
tap.test('FD-07: Large Input Stress Test - should handle very large XML inputs', async () => {
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
// Generate a large UBL invoice with many line items
function generateLargeUBL(itemCount: number): string {
let xml = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>LARGE-TEST-${Date.now()}</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>`;
for (let i = 1; i <= itemCount; i++) {
xml += `
<cac:InvoiceLine>
<cbc:ID>${i}</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">${i}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${i * 100}</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Product ${i}</cbc:Name>
<cbc:Description>Description for product ${i} with some additional text to make it longer</cbc:Description>
</cac:Item>
</cac:InvoiceLine>`;
}
xml += '\n</Invoice>';
return xml;
}
const testSizes = [
{ name: 'Small (10 items)', itemCount: 10 },
{ name: 'Medium (100 items)', itemCount: 100 },
{ name: 'Large (1000 items)', itemCount: 1000 }
];
for (const test of testSizes) {
const xml = generateLargeUBL(test.itemCount);
const sizeKB = Math.round(xml.length / 1024);
console.log(`Testing ${test.name} - ${sizeKB}KB`);
// Test multiple times for accurate measurement
const times: number[] = [];
let detectedFormat = '';
for (let i = 0; i < 3; i++) {
const { result: format, metric } = await PerformanceTracker.track(
'large-input-detection',
async () => FormatDetector.detectFormat(xml)
);
times.push(metric.duration);
detectedFormat = format.toString();
}
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
console.log(` Format: ${detectedFormat}`);
console.log(` Average time: ${avgTime.toFixed(2)}ms`);
// Assertions
expect(detectedFormat.toLowerCase()).toEqual('ubl');
expect(avgTime).toBeLessThan(100); // Should be under 100ms even for large files
}
});
tap.test('FD-07: Invalid Format Edge Cases - should handle unknown formats gracefully', async () => {
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
const invalidTests = [
{
name: 'Valid XML, unknown invoice format',
xml: `<?xml version="1.0"?>
<SomeRandomDocument>
<ID>123</ID>
<Data>Some data</Data>
</SomeRandomDocument>`
},
{
name: 'HTML content',
xml: `<!DOCTYPE html>
<html>
<head><title>Not XML</title></head>
<body><p>This is HTML</p></body>
</html>`
},
{
name: 'JSON content',
xml: `{"invoice": {"id": "123", "amount": 100}}`
},
{
name: 'CSV content',
xml: `ID,Amount,Currency
123,100,EUR
124,200,USD`
}
];
for (const test of invalidTests) {
const { result: format } = await PerformanceTracker.track(
'invalid-format-detection',
async () => FormatDetector.detectFormat(test.xml)
);
console.log(`${test.name}: ${format}`);
expect(format.toString().toLowerCase()).toEqual('unknown');
}
});
tap.start();