249 lines
8.3 KiB
TypeScript
249 lines
8.3 KiB
TypeScript
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
||
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
||
|
|
||
|
tap.test('FD-07: Edge Cases - should handle malformed and edge case inputs', async () => {
|
||
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||
|
|
||
|
// Test empty input
|
||
|
const { result: emptyFormat } = await PerformanceTracker.track(
|
||
|
'edge-case-detection',
|
||
|
async () => FormatDetector.detectFormat('')
|
||
|
);
|
||
|
console.log(`Empty string: ${emptyFormat}`);
|
||
|
expect(emptyFormat.toString().toLowerCase()).toEqual('unknown');
|
||
|
|
||
|
// Test non-XML content
|
||
|
const { result: textFormat } = await PerformanceTracker.track(
|
||
|
'edge-case-detection',
|
||
|
async () => FormatDetector.detectFormat('This is not XML content')
|
||
|
);
|
||
|
console.log(`Non-XML text: ${textFormat}`);
|
||
|
expect(textFormat.toString().toLowerCase()).toEqual('unknown');
|
||
|
|
||
|
// Test minimal XML
|
||
|
const { result: minimalFormat } = await PerformanceTracker.track(
|
||
|
'edge-case-detection',
|
||
|
async () => FormatDetector.detectFormat('<?xml version="1.0"?><root></root>')
|
||
|
);
|
||
|
console.log(`Minimal XML: ${minimalFormat}`);
|
||
|
expect(minimalFormat.toString().toLowerCase()).toEqual('unknown');
|
||
|
|
||
|
// Test with BOM
|
||
|
const bomXml = '\ufeff<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"></Invoice>';
|
||
|
const { result: bomFormat } = await PerformanceTracker.track(
|
||
|
'edge-case-detection',
|
||
|
async () => FormatDetector.detectFormat(bomXml)
|
||
|
);
|
||
|
console.log(`XML with BOM: ${bomFormat}`);
|
||
|
expect(bomFormat.toString().toLowerCase()).toEqual('ubl');
|
||
|
|
||
|
// Test malformed XML
|
||
|
const malformedXml = '<?xml version="1.0"?><Invoice><unclosed>';
|
||
|
const { result: malformedFormat } = await PerformanceTracker.track(
|
||
|
'edge-case-detection',
|
||
|
async () => FormatDetector.detectFormat(malformedXml)
|
||
|
);
|
||
|
console.log(`Malformed XML: ${malformedFormat}`);
|
||
|
expect(malformedFormat.toString().toLowerCase()).toEqual('unknown');
|
||
|
});
|
||
|
|
||
|
tap.test('FD-07: Encoding Handling - should handle different character encodings', async () => {
|
||
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||
|
|
||
|
const encodingTests = [
|
||
|
{
|
||
|
name: 'UTF-8 with special characters',
|
||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<ID>Tëst-Invöice-001</ID>
|
||
|
<Note>Spëcial châractërs: àáâãäåæçèéêë</Note>
|
||
|
</Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
},
|
||
|
{
|
||
|
name: 'ISO-8859-1 encoding declaration',
|
||
|
xml: `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<ID>Test-001</ID>
|
||
|
</Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
},
|
||
|
{
|
||
|
name: 'No encoding declaration',
|
||
|
xml: `<?xml version="1.0"?>
|
||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<ID>Test-002</ID>
|
||
|
</Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of encodingTests) {
|
||
|
const { result: format } = await PerformanceTracker.track(
|
||
|
'encoding-detection',
|
||
|
async () => FormatDetector.detectFormat(test.xml)
|
||
|
);
|
||
|
|
||
|
console.log(`${test.name}: ${format}`);
|
||
|
expect(format.toString().toLowerCase()).toEqual(test.expectedFormat);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
tap.test('FD-07: Namespace Variations - should handle different namespace patterns', async () => {
|
||
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||
|
|
||
|
const namespaceTests = [
|
||
|
{
|
||
|
name: 'UBL with default namespace',
|
||
|
xml: `<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<ID>UBL-001</ID>
|
||
|
</Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
},
|
||
|
{
|
||
|
name: 'UBL with prefixed namespace',
|
||
|
xml: `<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<ubl:ID>UBL-002</ubl:ID>
|
||
|
</ubl:Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
},
|
||
|
{
|
||
|
name: 'CII with default namespace',
|
||
|
xml: `<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
|
||
|
<rsm:ExchangedDocument/>
|
||
|
</rsm:CrossIndustryInvoice>`,
|
||
|
expectedFormat: 'cii'
|
||
|
},
|
||
|
{
|
||
|
name: 'Mixed namespace prefixes',
|
||
|
xml: `<inv:Invoice xmlns:inv="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
|
<inv:ID>MIX-001</inv:ID>
|
||
|
</inv:Invoice>`,
|
||
|
expectedFormat: 'ubl'
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of namespaceTests) {
|
||
|
const { result: format } = await PerformanceTracker.track(
|
||
|
'namespace-variation-detection',
|
||
|
async () => FormatDetector.detectFormat(test.xml)
|
||
|
);
|
||
|
|
||
|
console.log(`${test.name}: ${format}`);
|
||
|
const formatStr = format.toString().toLowerCase();
|
||
|
const isExpectedFormat = formatStr.includes(test.expectedFormat) ||
|
||
|
(test.expectedFormat === 'cii' && formatStr.includes('cii'));
|
||
|
expect(isExpectedFormat).toEqual(true);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
tap.test('FD-07: Large Input Stress Test - should handle very large XML inputs', async () => {
|
||
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||
|
|
||
|
// Generate a large UBL invoice with many line items
|
||
|
function generateLargeUBL(itemCount: number): string {
|
||
|
let xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
|
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
|
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
|
<cbc:ID>LARGE-TEST-${Date.now()}</cbc:ID>
|
||
|
<cbc:IssueDate>2024-01-01</cbc:IssueDate>`;
|
||
|
|
||
|
for (let i = 1; i <= itemCount; i++) {
|
||
|
xml += `
|
||
|
<cac:InvoiceLine>
|
||
|
<cbc:ID>${i}</cbc:ID>
|
||
|
<cbc:InvoicedQuantity unitCode="EA">${i}</cbc:InvoicedQuantity>
|
||
|
<cbc:LineExtensionAmount currencyID="EUR">${i * 100}</cbc:LineExtensionAmount>
|
||
|
<cac:Item>
|
||
|
<cbc:Name>Product ${i}</cbc:Name>
|
||
|
<cbc:Description>Description for product ${i} with some additional text to make it longer</cbc:Description>
|
||
|
</cac:Item>
|
||
|
</cac:InvoiceLine>`;
|
||
|
}
|
||
|
|
||
|
xml += '\n</Invoice>';
|
||
|
return xml;
|
||
|
}
|
||
|
|
||
|
const testSizes = [
|
||
|
{ name: 'Small (10 items)', itemCount: 10 },
|
||
|
{ name: 'Medium (100 items)', itemCount: 100 },
|
||
|
{ name: 'Large (1000 items)', itemCount: 1000 }
|
||
|
];
|
||
|
|
||
|
for (const test of testSizes) {
|
||
|
const xml = generateLargeUBL(test.itemCount);
|
||
|
const sizeKB = Math.round(xml.length / 1024);
|
||
|
|
||
|
console.log(`Testing ${test.name} - ${sizeKB}KB`);
|
||
|
|
||
|
// Test multiple times for accurate measurement
|
||
|
const times: number[] = [];
|
||
|
let detectedFormat = '';
|
||
|
|
||
|
for (let i = 0; i < 3; i++) {
|
||
|
const { result: format, metric } = await PerformanceTracker.track(
|
||
|
'large-input-detection',
|
||
|
async () => FormatDetector.detectFormat(xml)
|
||
|
);
|
||
|
|
||
|
times.push(metric.duration);
|
||
|
detectedFormat = format.toString();
|
||
|
}
|
||
|
|
||
|
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
||
|
console.log(` Format: ${detectedFormat}`);
|
||
|
console.log(` Average time: ${avgTime.toFixed(2)}ms`);
|
||
|
|
||
|
// Assertions
|
||
|
expect(detectedFormat.toLowerCase()).toEqual('ubl');
|
||
|
expect(avgTime).toBeLessThan(100); // Should be under 100ms even for large files
|
||
|
}
|
||
|
});
|
||
|
|
||
|
tap.test('FD-07: Invalid Format Edge Cases - should handle unknown formats gracefully', async () => {
|
||
|
const { FormatDetector } = await import('../../../ts/formats/utils/format.detector.js');
|
||
|
|
||
|
const invalidTests = [
|
||
|
{
|
||
|
name: 'Valid XML, unknown invoice format',
|
||
|
xml: `<?xml version="1.0"?>
|
||
|
<SomeRandomDocument>
|
||
|
<ID>123</ID>
|
||
|
<Data>Some data</Data>
|
||
|
</SomeRandomDocument>`
|
||
|
},
|
||
|
{
|
||
|
name: 'HTML content',
|
||
|
xml: `<!DOCTYPE html>
|
||
|
<html>
|
||
|
<head><title>Not XML</title></head>
|
||
|
<body><p>This is HTML</p></body>
|
||
|
</html>`
|
||
|
},
|
||
|
{
|
||
|
name: 'JSON content',
|
||
|
xml: `{"invoice": {"id": "123", "amount": 100}}`
|
||
|
},
|
||
|
{
|
||
|
name: 'CSV content',
|
||
|
xml: `ID,Amount,Currency
|
||
|
123,100,EUR
|
||
|
124,200,USD`
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of invalidTests) {
|
||
|
const { result: format } = await PerformanceTracker.track(
|
||
|
'invalid-format-detection',
|
||
|
async () => FormatDetector.detectFormat(test.xml)
|
||
|
);
|
||
|
|
||
|
console.log(`${test.name}: ${format}`);
|
||
|
expect(format.toString().toLowerCase()).toEqual('unknown');
|
||
|
}
|
||
|
});
|
||
|
|
||
|
tap.start();
|