769 lines
28 KiB
TypeScript
769 lines
28 KiB
TypeScript
|
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
|||
|
import * as plugins from '../../../ts/plugins.ts';
|
|||
|
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
|
|||
|
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
|
|||
|
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
|
|||
|
|
|||
|
const testTimeout = 300000; // 5 minutes timeout for error handling tests
|
|||
|
|
|||
|
// ERR-01: Parsing Error Recovery
|
|||
|
// Tests error recovery mechanisms during XML parsing including
|
|||
|
// malformed XML, encoding issues, and partial document recovery
|
|||
|
|
|||
|
tap.test('ERR-01: Parsing Error Recovery - Malformed XML Recovery', async (tools) => {
|
|||
|
const startTime = Date.now();
|
|||
|
|
|||
|
// Test various malformed XML scenarios
|
|||
|
const malformedXmlTests = [
|
|||
|
{
|
|||
|
name: 'Missing closing tag',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MALFORMED-001</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380
|
|||
|
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: false
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Mismatched tags',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MALFORMED-002</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
<DocumentCurrencyCode>EUR</InvoiceCurrencyCode>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: false
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Invalid XML characters',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MALFORMED-003</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<Note>Invalid chars: ${String.fromCharCode(0x00)}${String.fromCharCode(0x01)}</Note>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: true
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Broken CDATA section',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MALFORMED-004</ID>
|
|||
|
<Note><![CDATA[Broken CDATA section]]</Note>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: false
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Unclosed attribute quote',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID schemeID="unclosed>MALFORMED-005</ID>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: false
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Invalid attribute value',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MALFORMED-006</ID>
|
|||
|
<TaxTotal>
|
|||
|
<TaxAmount currencyID="<>">100.00</TaxAmount>
|
|||
|
</TaxTotal>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
recoverable: true
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const testCase of malformedXmlTests) {
|
|||
|
tools.log(`Testing ${testCase.name}...`);
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new EInvoice();
|
|||
|
const parseResult = await invoice.fromXmlString(testCase.xml);
|
|||
|
|
|||
|
if (testCase.expectedError) {
|
|||
|
// If we expected an error but parsing succeeded, check if partial recovery happened
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ⚠ Expected error but parsing succeeded - checking recovery`);
|
|||
|
|
|||
|
// Test if we can extract any data
|
|||
|
try {
|
|||
|
const xmlOutput = await invoice.toXmlString();
|
|||
|
if (xmlOutput && xmlOutput.length > 50) {
|
|||
|
tools.log(` ✓ Partial recovery successful - extracted ${xmlOutput.length} chars`);
|
|||
|
|
|||
|
// Check if critical data was preserved
|
|||
|
const criticalDataPreserved = {
|
|||
|
hasId: xmlOutput.includes('MALFORMED'),
|
|||
|
hasDate: xmlOutput.includes('2024-01-15'),
|
|||
|
hasStructure: xmlOutput.includes('Invoice')
|
|||
|
};
|
|||
|
|
|||
|
tools.log(` ID preserved: ${criticalDataPreserved.hasId}`);
|
|||
|
tools.log(` Date preserved: ${criticalDataPreserved.hasDate}`);
|
|||
|
tools.log(` Structure preserved: ${criticalDataPreserved.hasStructure}`);
|
|||
|
}
|
|||
|
} catch (outputError) {
|
|||
|
tools.log(` ⚠ Recovery limited - output generation failed: ${outputError.message}`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
tools.log(` ✓ Expected error - no parsing result`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ✓ Parsing succeeded as expected`);
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected parsing failure`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
if (testCase.expectedError) {
|
|||
|
tools.log(` ✓ Expected parsing error caught: ${error.message}`);
|
|||
|
|
|||
|
// Check error quality
|
|||
|
expect(error.message).toBeTruthy();
|
|||
|
expect(error.message.length).toBeGreaterThan(10);
|
|||
|
|
|||
|
// Check if error provides helpful context
|
|||
|
const errorLower = error.message.toLowerCase();
|
|||
|
const hasContext = errorLower.includes('xml') ||
|
|||
|
errorLower.includes('parse') ||
|
|||
|
errorLower.includes('tag') ||
|
|||
|
errorLower.includes('attribute') ||
|
|||
|
errorLower.includes('invalid');
|
|||
|
|
|||
|
if (hasContext) {
|
|||
|
tools.log(` ✓ Error message provides context`);
|
|||
|
} else {
|
|||
|
tools.log(` ⚠ Error message lacks context`);
|
|||
|
}
|
|||
|
|
|||
|
// Test recovery attempt if recoverable
|
|||
|
if (testCase.recoverable) {
|
|||
|
tools.log(` Attempting recovery...`);
|
|||
|
try {
|
|||
|
// Try to clean the XML and parse again
|
|||
|
const cleanedXml = testCase.xml
|
|||
|
.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '') // Remove control chars
|
|||
|
.replace(/<>/g, ''); // Remove invalid brackets
|
|||
|
|
|||
|
const recoveryInvoice = new EInvoice();
|
|||
|
const recoveryResult = await recoveryInvoice.fromXmlString(cleanedXml);
|
|||
|
|
|||
|
if (recoveryResult) {
|
|||
|
tools.log(` ✓ Recovery successful after cleaning`);
|
|||
|
} else {
|
|||
|
tools.log(` ⚠ Recovery failed even after cleaning`);
|
|||
|
}
|
|||
|
} catch (recoveryError) {
|
|||
|
tools.log(` ⚠ Recovery attempt failed: ${recoveryError.message}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected error: ${error.message}`);
|
|||
|
throw error;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const duration = Date.now() - startTime;
|
|||
|
PerformanceTracker.recordMetric('error-handling-malformed-xml', duration);
|
|||
|
});
|
|||
|
|
|||
|
tap.test('ERR-01: Parsing Error Recovery - Encoding Issues', async (tools) => {
|
|||
|
const startTime = Date.now();
|
|||
|
|
|||
|
// Test various encoding-related parsing errors
|
|||
|
const encodingTests = [
|
|||
|
{
|
|||
|
name: 'Mismatched encoding declaration',
|
|||
|
xml: Buffer.from([
|
|||
|
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
|
|||
|
0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x55, 0x54,
|
|||
|
0x46, 0x2D, 0x38, 0x22, 0x3F, 0x3E, 0x0A, // <?xml version="1.0" encoding="UTF-8"?>
|
|||
|
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // <Invoice>
|
|||
|
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // <Note>
|
|||
|
0xC4, 0xD6, 0xDC, // ISO-8859-1 encoded German umlauts (not UTF-8)
|
|||
|
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // </Note>
|
|||
|
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // </Invoice>
|
|||
|
]),
|
|||
|
expectedError: true,
|
|||
|
description: 'UTF-8 declared but ISO-8859-1 content'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'BOM with wrong encoding',
|
|||
|
xml: Buffer.concat([
|
|||
|
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
|
|||
|
Buffer.from(`<?xml version="1.0" encoding="UTF-16"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>ENCODING-BOM-001</ID>
|
|||
|
</Invoice>`)
|
|||
|
]),
|
|||
|
expectedError: false, // Parser might handle this
|
|||
|
description: 'UTF-8 BOM with UTF-16 declaration'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Invalid UTF-8 sequences',
|
|||
|
xml: Buffer.from([
|
|||
|
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
|
|||
|
0x2E, 0x30, 0x22, 0x3F, 0x3E, 0x0A, // <?xml version="1.0"?>
|
|||
|
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // <Invoice>
|
|||
|
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // <Note>
|
|||
|
0xC0, 0x80, // Invalid UTF-8 sequence (overlong encoding of NULL)
|
|||
|
0xED, 0xA0, 0x80, // Invalid UTF-8 sequence (surrogate half)
|
|||
|
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // </Note>
|
|||
|
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // </Invoice>
|
|||
|
]),
|
|||
|
expectedError: true,
|
|||
|
description: 'Invalid UTF-8 byte sequences'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Mixed encoding in document',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>MIXED-ENCODING-001</ID>
|
|||
|
<Note>UTF-8 text: äöü €</Note>
|
|||
|
<AdditionalNote>${String.fromCharCode(0xA9)} ${String.fromCharCode(0xAE)}</AdditionalNote>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: false,
|
|||
|
description: 'Mixed but valid encoding'
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const testCase of encodingTests) {
|
|||
|
tools.log(`Testing ${testCase.name}: ${testCase.description}`);
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new EInvoice();
|
|||
|
let parseResult;
|
|||
|
|
|||
|
if (Buffer.isBuffer(testCase.xml)) {
|
|||
|
// For buffer tests, we might need to write to a temp file
|
|||
|
const tempPath = plugins.path.join(process.cwd(), '.nogit', `temp-encoding-${Date.now()}.xml`);
|
|||
|
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
|
|||
|
await plugins.fs.writeFile(tempPath, testCase.xml);
|
|||
|
|
|||
|
try {
|
|||
|
parseResult = await invoice.fromFile(tempPath);
|
|||
|
} finally {
|
|||
|
// Clean up temp file
|
|||
|
await plugins.fs.remove(tempPath);
|
|||
|
}
|
|||
|
} else {
|
|||
|
parseResult = await invoice.fromXmlString(testCase.xml);
|
|||
|
}
|
|||
|
|
|||
|
if (testCase.expectedError) {
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ⚠ Expected encoding error but parsing succeeded`);
|
|||
|
|
|||
|
// Check if data was corrupted
|
|||
|
const xmlOutput = await invoice.toXmlString();
|
|||
|
tools.log(` Output length: ${xmlOutput.length} chars`);
|
|||
|
|
|||
|
// Look for encoding artifacts
|
|||
|
const hasEncodingIssues = xmlOutput.includes('<27>') || // Replacement character
|
|||
|
xmlOutput.includes('\uFFFD') || // Unicode replacement
|
|||
|
!/^[\x00-\x7F]*$/.test(xmlOutput); // Non-ASCII when not expected
|
|||
|
|
|||
|
if (hasEncodingIssues) {
|
|||
|
tools.log(` ⚠ Encoding artifacts detected in output`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
tools.log(` ✓ Expected encoding error - no parsing result`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ✓ Parsing succeeded as expected`);
|
|||
|
|
|||
|
// Verify encoding preservation
|
|||
|
const xmlOutput = await invoice.toXmlString();
|
|||
|
if (testCase.xml.toString().includes('äöü') && xmlOutput.includes('äöü')) {
|
|||
|
tools.log(` ✓ Special characters preserved correctly`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected parsing failure`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
if (testCase.expectedError) {
|
|||
|
tools.log(` ✓ Expected encoding error caught: ${error.message}`);
|
|||
|
|
|||
|
// Check if error mentions encoding
|
|||
|
const errorLower = error.message.toLowerCase();
|
|||
|
if (errorLower.includes('encoding') ||
|
|||
|
errorLower.includes('utf') ||
|
|||
|
errorLower.includes('charset') ||
|
|||
|
errorLower.includes('decode')) {
|
|||
|
tools.log(` ✓ Error message indicates encoding issue`);
|
|||
|
}
|
|||
|
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected error: ${error.message}`);
|
|||
|
throw error;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const duration = Date.now() - startTime;
|
|||
|
PerformanceTracker.recordMetric('error-handling-encoding-issues', duration);
|
|||
|
});
|
|||
|
|
|||
|
tap.test('ERR-01: Parsing Error Recovery - Partial Document Recovery', async (tools) => {
|
|||
|
const startTime = Date.now();
|
|||
|
|
|||
|
// Test recovery from partially corrupted documents
|
|||
|
const partialDocumentTests = [
|
|||
|
{
|
|||
|
name: 'Truncated at invoice line',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>PARTIAL-001</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
|
|||
|
<AccountingSupplierParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>Partial Recovery Supplier</Name>
|
|||
|
</PartyName>
|
|||
|
</Party>
|
|||
|
</AccountingSupplierParty>
|
|||
|
<InvoiceLine>
|
|||
|
<ID>1</ID>
|
|||
|
<InvoicedQuantity unitCode="C62">5</InvoicedQuantity>
|
|||
|
<LineExtensionAmount currencyID="EUR">500.00</LineExtensionAmount>
|
|||
|
<Item>
|
|||
|
<Name>Product for partial recovery test</Name>`,
|
|||
|
recoverableData: ['PARTIAL-001', '2024-01-15', 'EUR', 'Partial Recovery Supplier']
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Missing end sections',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>PARTIAL-002</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
<DocumentCurrencyCode>USD</DocumentCurrencyCode>
|
|||
|
<Note>This invoice is missing its closing sections</Note>
|
|||
|
<AccountingSupplierParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>Incomplete Invoice Supplier</Name>
|
|||
|
</PartyName>
|
|||
|
<PostalAddress>
|
|||
|
<StreetName>Recovery Street 123</StreetName>
|
|||
|
<CityName>Test City</CityName>`,
|
|||
|
recoverableData: ['PARTIAL-002', '2024-01-15', 'USD', 'Incomplete Invoice Supplier', 'Recovery Street 123']
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Corrupted middle section',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>PARTIAL-003</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
<DocumentCurrencyCode>GBP</DocumentCurrencyCode>
|
|||
|
<AccountingSupplierParty>
|
|||
|
<Party>
|
|||
|
<<<CORRUPTED_DATA_SECTION>>>
|
|||
|
@#$%^&*()_+{}|:"<>?
|
|||
|
BINARY_GARBAGE: ${String.fromCharCode(0x00, 0x01, 0x02, 0x03)}
|
|||
|
</Party>
|
|||
|
</AccountingSupplierParty>
|
|||
|
<AccountingCustomerParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>Valid Customer After Corruption</Name>
|
|||
|
</PartyName>
|
|||
|
</Party>
|
|||
|
</AccountingCustomerParty>
|
|||
|
<LegalMonetaryTotal>
|
|||
|
<PayableAmount currencyID="GBP">1500.00</PayableAmount>
|
|||
|
</LegalMonetaryTotal>
|
|||
|
</Invoice>`,
|
|||
|
recoverableData: ['PARTIAL-003', '2024-01-15', 'GBP', 'Valid Customer After Corruption', '1500.00']
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const testCase of partialDocumentTests) {
|
|||
|
tools.log(`Testing ${testCase.name}...`);
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new EInvoice();
|
|||
|
const parseResult = await invoice.fromXmlString(testCase.xml);
|
|||
|
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ⚠ Partial document parsed - unexpected success`);
|
|||
|
|
|||
|
// Check what data was recovered
|
|||
|
try {
|
|||
|
const xmlOutput = await invoice.toXmlString();
|
|||
|
tools.log(` Checking recovered data...`);
|
|||
|
|
|||
|
let recoveredCount = 0;
|
|||
|
for (const expectedData of testCase.recoverableData) {
|
|||
|
if (xmlOutput.includes(expectedData)) {
|
|||
|
recoveredCount++;
|
|||
|
tools.log(` ✓ Recovered: ${expectedData}`);
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Lost: ${expectedData}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const recoveryRate = (recoveredCount / testCase.recoverableData.length) * 100;
|
|||
|
tools.log(` Recovery rate: ${recoveryRate.toFixed(1)}% (${recoveredCount}/${testCase.recoverableData.length})`);
|
|||
|
|
|||
|
} catch (outputError) {
|
|||
|
tools.log(` ⚠ Could not generate output from partial document: ${outputError.message}`);
|
|||
|
}
|
|||
|
|
|||
|
} else {
|
|||
|
tools.log(` ✓ Partial document parsing failed as expected`);
|
|||
|
}
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
tools.log(` ✓ Parsing error caught: ${error.message}`);
|
|||
|
|
|||
|
// Test if we can implement a recovery strategy
|
|||
|
tools.log(` Attempting recovery strategy...`);
|
|||
|
|
|||
|
try {
|
|||
|
// Strategy 1: Try to fix unclosed tags
|
|||
|
let recoveredXml = testCase.xml;
|
|||
|
|
|||
|
// Count opening and closing tags
|
|||
|
const openTags = (recoveredXml.match(/<[^/][^>]*>/g) || [])
|
|||
|
.filter(tag => !tag.includes('?') && !tag.includes('!'))
|
|||
|
.map(tag => tag.match(/<(\w+)/)?.[1])
|
|||
|
.filter(Boolean);
|
|||
|
|
|||
|
const closeTags = (recoveredXml.match(/<\/[^>]+>/g) || [])
|
|||
|
.map(tag => tag.match(/<\/(\w+)>/)?.[1])
|
|||
|
.filter(Boolean);
|
|||
|
|
|||
|
// Find unclosed tags
|
|||
|
const tagStack = [];
|
|||
|
for (const tag of openTags) {
|
|||
|
const closeIndex = closeTags.indexOf(tag);
|
|||
|
if (closeIndex === -1) {
|
|||
|
tagStack.push(tag);
|
|||
|
} else {
|
|||
|
closeTags.splice(closeIndex, 1);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Add missing closing tags
|
|||
|
if (tagStack.length > 0) {
|
|||
|
tools.log(` Found ${tagStack.length} unclosed tags`);
|
|||
|
while (tagStack.length > 0) {
|
|||
|
const tag = tagStack.pop();
|
|||
|
recoveredXml += `</${tag}>`;
|
|||
|
}
|
|||
|
|
|||
|
// Try parsing recovered XML
|
|||
|
const recoveryInvoice = new EInvoice();
|
|||
|
const recoveryResult = await recoveryInvoice.fromXmlString(recoveredXml);
|
|||
|
|
|||
|
if (recoveryResult) {
|
|||
|
tools.log(` ✓ Recovery successful after closing tags`);
|
|||
|
|
|||
|
// Check recovered data
|
|||
|
const recoveredOutput = await recoveryInvoice.toXmlString();
|
|||
|
let postRecoveryCount = 0;
|
|||
|
for (const expectedData of testCase.recoverableData) {
|
|||
|
if (recoveredOutput.includes(expectedData)) {
|
|||
|
postRecoveryCount++;
|
|||
|
}
|
|||
|
}
|
|||
|
tools.log(` Post-recovery data: ${postRecoveryCount}/${testCase.recoverableData.length} items`);
|
|||
|
|
|||
|
} else {
|
|||
|
tools.log(` ⚠ Recovery strategy failed`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (recoveryError) {
|
|||
|
tools.log(` Recovery attempt failed: ${recoveryError.message}`);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const duration = Date.now() - startTime;
|
|||
|
PerformanceTracker.recordMetric('error-handling-partial-recovery', duration);
|
|||
|
});
|
|||
|
|
|||
|
tap.test('ERR-01: Parsing Error Recovery - Namespace Issues', async (tools) => {
|
|||
|
const startTime = Date.now();
|
|||
|
|
|||
|
// Test namespace-related parsing errors and recovery
|
|||
|
const namespaceTests = [
|
|||
|
{
|
|||
|
name: 'Missing namespace declaration',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice>
|
|||
|
<ID>NAMESPACE-001</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: false, // May parse but validation should fail
|
|||
|
issue: 'No namespace declared'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Wrong namespace URI',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="http://wrong.namespace.uri/invoice">
|
|||
|
<ID>NAMESPACE-002</ID>
|
|||
|
<IssueDate>2024-01-15</IssueDate>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: false,
|
|||
|
issue: 'Incorrect namespace'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Conflicting namespace prefixes',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<ns1:Invoice xmlns:ns1="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
|||
|
xmlns:ns1="http://different.namespace">
|
|||
|
<ns1:ID>NAMESPACE-003</ns1:ID>
|
|||
|
</ns1:Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
issue: 'Duplicate prefix definition'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Undefined namespace prefix',
|
|||
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<ID>NAMESPACE-004</ID>
|
|||
|
<unknown:Element>Content</unknown:Element>
|
|||
|
</Invoice>`,
|
|||
|
expectedError: true,
|
|||
|
issue: 'Undefined prefix used'
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const testCase of namespaceTests) {
|
|||
|
tools.log(`Testing ${testCase.name}: ${testCase.issue}`);
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new EInvoice();
|
|||
|
const parseResult = await invoice.fromXmlString(testCase.xml);
|
|||
|
|
|||
|
if (testCase.expectedError) {
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ⚠ Expected namespace error but parsing succeeded`);
|
|||
|
|
|||
|
// Check if namespace issues are detected during validation
|
|||
|
try {
|
|||
|
const validationResult = await invoice.validate();
|
|||
|
if (!validationResult.valid) {
|
|||
|
tools.log(` ✓ Namespace issues detected during validation`);
|
|||
|
if (validationResult.errors) {
|
|||
|
for (const error of validationResult.errors) {
|
|||
|
if (error.message.toLowerCase().includes('namespace')) {
|
|||
|
tools.log(` Namespace error: ${error.message}`);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
} catch (validationError) {
|
|||
|
tools.log(` Validation failed: ${validationError.message}`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
tools.log(` ✓ Expected namespace error - no parsing result`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
if (parseResult) {
|
|||
|
tools.log(` ✓ Parsing succeeded as expected`);
|
|||
|
|
|||
|
// Test if we can detect namespace issues
|
|||
|
const xmlOutput = await invoice.toXmlString();
|
|||
|
const hasProperNamespace = xmlOutput.includes('urn:oasis:names:specification:ubl:schema:xsd:Invoice-2') ||
|
|||
|
xmlOutput.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice');
|
|||
|
|
|||
|
if (!hasProperNamespace) {
|
|||
|
tools.log(` ⚠ Output missing proper namespace declaration`);
|
|||
|
} else {
|
|||
|
tools.log(` ✓ Proper namespace maintained in output`);
|
|||
|
}
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected parsing failure`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
if (testCase.expectedError) {
|
|||
|
tools.log(` ✓ Expected namespace error caught: ${error.message}`);
|
|||
|
|
|||
|
// Check error quality
|
|||
|
const errorLower = error.message.toLowerCase();
|
|||
|
if (errorLower.includes('namespace') ||
|
|||
|
errorLower.includes('prefix') ||
|
|||
|
errorLower.includes('xmlns')) {
|
|||
|
tools.log(` ✓ Error message indicates namespace issue`);
|
|||
|
}
|
|||
|
|
|||
|
} else {
|
|||
|
tools.log(` ✗ Unexpected error: ${error.message}`);
|
|||
|
throw error;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const duration = Date.now() - startTime;
|
|||
|
PerformanceTracker.recordMetric('error-handling-namespace-issues', duration);
|
|||
|
});
|
|||
|
|
|||
|
tap.test('ERR-01: Parsing Error Recovery - Corpus Error Recovery', { timeout: testTimeout }, async (tools) => {
|
|||
|
const startTime = Date.now();
|
|||
|
|
|||
|
let processedFiles = 0;
|
|||
|
let parseErrors = 0;
|
|||
|
let recoveryAttempts = 0;
|
|||
|
let successfulRecoveries = 0;
|
|||
|
|
|||
|
try {
|
|||
|
// Test with potentially problematic files from corpus
|
|||
|
const categories = ['UBL_XML_RECHNUNG', 'CII_XML_RECHNUNG'];
|
|||
|
|
|||
|
for (const category of categories) {
|
|||
|
try {
|
|||
|
const files = await CorpusLoader.getFiles(category);
|
|||
|
const filesToProcess = files.slice(0, 5); // Process first 5 files per category
|
|||
|
|
|||
|
for (const filePath of filesToProcess) {
|
|||
|
processedFiles++;
|
|||
|
const fileName = plugins.path.basename(filePath);
|
|||
|
|
|||
|
// First, try normal parsing
|
|||
|
try {
|
|||
|
const invoice = new EInvoice();
|
|||
|
const parseResult = await invoice.fromFile(filePath);
|
|||
|
|
|||
|
if (!parseResult) {
|
|||
|
parseErrors++;
|
|||
|
tools.log(`⚠ ${fileName}: Parse returned no result`);
|
|||
|
|
|||
|
// Attempt recovery
|
|||
|
recoveryAttempts++;
|
|||
|
|
|||
|
// Read file content for recovery attempt
|
|||
|
const fileContent = await plugins.fs.readFile(filePath, 'utf-8');
|
|||
|
|
|||
|
// Try different recovery strategies
|
|||
|
const recoveryStrategies = [
|
|||
|
{
|
|||
|
name: 'Remove BOM',
|
|||
|
transform: (content: string) => content.replace(/^\uFEFF/, '')
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Fix encoding',
|
|||
|
transform: (content: string) => content.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '')
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Normalize whitespace',
|
|||
|
transform: (content: string) => content.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const strategy of recoveryStrategies) {
|
|||
|
try {
|
|||
|
const transformedContent = strategy.transform(fileContent);
|
|||
|
const recoveryInvoice = new EInvoice();
|
|||
|
const recoveryResult = await recoveryInvoice.fromXmlString(transformedContent);
|
|||
|
|
|||
|
if (recoveryResult) {
|
|||
|
successfulRecoveries++;
|
|||
|
tools.log(` ✓ Recovery successful with strategy: ${strategy.name}`);
|
|||
|
break;
|
|||
|
}
|
|||
|
} catch (strategyError) {
|
|||
|
// Strategy failed, try next
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
parseErrors++;
|
|||
|
tools.log(`✗ ${fileName}: Parse error - ${error.message}`);
|
|||
|
|
|||
|
// Log error characteristics
|
|||
|
const errorLower = error.message.toLowerCase();
|
|||
|
const errorType = errorLower.includes('encoding') ? 'encoding' :
|
|||
|
errorLower.includes('tag') ? 'structure' :
|
|||
|
errorLower.includes('namespace') ? 'namespace' :
|
|||
|
errorLower.includes('attribute') ? 'attribute' :
|
|||
|
'unknown';
|
|||
|
|
|||
|
tools.log(` Error type: ${errorType}`);
|
|||
|
|
|||
|
// Attempt recovery for known error types
|
|||
|
if (errorType !== 'unknown') {
|
|||
|
recoveryAttempts++;
|
|||
|
// Recovery logic would go here
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
} catch (categoryError) {
|
|||
|
tools.log(`Failed to process category ${category}: ${categoryError.message}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Summary statistics
|
|||
|
const errorRate = processedFiles > 0 ? (parseErrors / processedFiles) * 100 : 0;
|
|||
|
const recoveryRate = recoveryAttempts > 0 ? (successfulRecoveries / recoveryAttempts) * 100 : 0;
|
|||
|
|
|||
|
tools.log(`\nParsing Error Recovery Summary:`);
|
|||
|
tools.log(`- Files processed: ${processedFiles}`);
|
|||
|
tools.log(`- Parse errors: ${parseErrors} (${errorRate.toFixed(1)}%)`);
|
|||
|
tools.log(`- Recovery attempts: ${recoveryAttempts}`);
|
|||
|
tools.log(`- Successful recoveries: ${successfulRecoveries} (${recoveryRate.toFixed(1)}%)`);
|
|||
|
|
|||
|
// Most corpus files should parse without errors
|
|||
|
expect(errorRate).toBeLessThan(20); // Less than 20% error rate expected
|
|||
|
|
|||
|
} catch (error) {
|
|||
|
tools.log(`Corpus error recovery test failed: ${error.message}`);
|
|||
|
throw error;
|
|||
|
}
|
|||
|
|
|||
|
const totalDuration = Date.now() - startTime;
|
|||
|
PerformanceTracker.recordMetric('error-handling-corpus-recovery', totalDuration);
|
|||
|
|
|||
|
tools.log(`Corpus error recovery completed in ${totalDuration}ms`);
|
|||
|
});
|
|||
|
|
|||
|
tap.test('ERR-01: Performance Summary', async (tools) => {
|
|||
|
const operations = [
|
|||
|
'error-handling-malformed-xml',
|
|||
|
'error-handling-encoding-issues',
|
|||
|
'error-handling-partial-recovery',
|
|||
|
'error-handling-namespace-issues',
|
|||
|
'error-handling-corpus-recovery'
|
|||
|
];
|
|||
|
|
|||
|
tools.log(`\n=== Parsing Error Recovery Performance Summary ===`);
|
|||
|
|
|||
|
for (const operation of operations) {
|
|||
|
const summary = await PerformanceTracker.getSummary(operation);
|
|||
|
if (summary) {
|
|||
|
tools.log(`${operation}:`);
|
|||
|
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
tools.log(`\nParsing error recovery testing completed.`);
|
|||
|
tools.log(`Note: Some parsing errors are expected when testing error recovery mechanisms.`);
|
|||
|
});
|