import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../../ts/plugins.ts'; import { EInvoice } from '../../../ts/classes.xinvoice.ts'; import { CorpusLoader } from '../../helpers/corpus.loader.ts'; import { PerformanceTracker } from '../../helpers/performance.tracker.ts'; const testTimeout = 300000; // 5 minutes timeout for error handling tests // ERR-01: Parsing Error Recovery // Tests error recovery mechanisms during XML parsing including // malformed XML, encoding issues, and partial document recovery tap.test('ERR-01: Parsing Error Recovery - Malformed XML Recovery', async (tools) => { const startTime = Date.now(); // Test various malformed XML scenarios const malformedXmlTests = [ { name: 'Missing closing tag', xml: ` MALFORMED-001 2024-01-15 380 EUR `, expectedError: true, recoverable: false }, { name: 'Mismatched tags', xml: ` MALFORMED-002 2024-01-15 380 EUR `, expectedError: true, recoverable: false }, { name: 'Invalid XML characters', xml: ` MALFORMED-003 2024-01-15 Invalid chars: ${String.fromCharCode(0x00)}${String.fromCharCode(0x01)} `, expectedError: true, recoverable: true }, { name: 'Broken CDATA section', xml: ` MALFORMED-004 `, expectedError: true, recoverable: false }, { name: 'Unclosed attribute quote', xml: ` MALFORMED-006 100.00 `, expectedError: true, recoverable: true } ]; for (const testCase of malformedXmlTests) { tools.log(`Testing ${testCase.name}...`); try { const invoice = new EInvoice(); const parseResult = await invoice.fromXmlString(testCase.xml); if (testCase.expectedError) { // If we expected an error but parsing succeeded, check if partial recovery happened if (parseResult) { tools.log(` ⚠ Expected error but parsing succeeded - checking recovery`); // Test if we can extract any data try { const xmlOutput = await invoice.toXmlString(); if (xmlOutput && xmlOutput.length > 50) { tools.log(` ✓ Partial recovery successful - extracted ${xmlOutput.length} chars`); // Check if critical data was preserved const criticalDataPreserved = { hasId: xmlOutput.includes('MALFORMED'), hasDate: xmlOutput.includes('2024-01-15'), hasStructure: xmlOutput.includes('Invoice') }; tools.log(` ID preserved: ${criticalDataPreserved.hasId}`); tools.log(` Date preserved: ${criticalDataPreserved.hasDate}`); tools.log(` Structure preserved: ${criticalDataPreserved.hasStructure}`); } } catch (outputError) { tools.log(` ⚠ Recovery limited - output generation failed: ${outputError.message}`); } } else { tools.log(` ✓ Expected error - no parsing result`); } } else { if (parseResult) { tools.log(` ✓ Parsing succeeded as expected`); } else { tools.log(` ✗ Unexpected parsing failure`); } } } catch (error) { if (testCase.expectedError) { tools.log(` ✓ Expected parsing error caught: ${error.message}`); // Check error quality expect(error.message).toBeTruthy(); expect(error.message.length).toBeGreaterThan(10); // Check if error provides helpful context const errorLower = error.message.toLowerCase(); const hasContext = errorLower.includes('xml') || errorLower.includes('parse') || errorLower.includes('tag') || errorLower.includes('attribute') || errorLower.includes('invalid'); if (hasContext) { tools.log(` ✓ Error message provides context`); } else { tools.log(` ⚠ Error message lacks context`); } // Test recovery attempt if recoverable if (testCase.recoverable) { tools.log(` Attempting recovery...`); try { // Try to clean the XML and parse again const cleanedXml = testCase.xml .replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '') // Remove control chars .replace(/<>/g, ''); // Remove invalid brackets const recoveryInvoice = new EInvoice(); const recoveryResult = await recoveryInvoice.fromXmlString(cleanedXml); if (recoveryResult) { tools.log(` ✓ Recovery successful after cleaning`); } else { tools.log(` ⚠ Recovery failed even after cleaning`); } } catch (recoveryError) { tools.log(` ⚠ Recovery attempt failed: ${recoveryError.message}`); } } } else { tools.log(` ✗ Unexpected error: ${error.message}`); throw error; } } } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('error-handling-malformed-xml', duration); }); tap.test('ERR-01: Parsing Error Recovery - Encoding Issues', async (tools) => { const startTime = Date.now(); // Test various encoding-related parsing errors const encodingTests = [ { name: 'Mismatched encoding declaration', xml: Buffer.from([ 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x22, 0x3F, 0x3E, 0x0A, // 0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // 0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // 0xC4, 0xD6, 0xDC, // ISO-8859-1 encoded German umlauts (not UTF-8) 0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // 0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // ]), expectedError: true, description: 'UTF-8 declared but ISO-8859-1 content' }, { name: 'BOM with wrong encoding', xml: Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM Buffer.from(` ENCODING-BOM-001 `) ]), expectedError: false, // Parser might handle this description: 'UTF-8 BOM with UTF-16 declaration' }, { name: 'Invalid UTF-8 sequences', xml: Buffer.from([ 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E, 0x0A, // 0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // 0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // 0xC0, 0x80, // Invalid UTF-8 sequence (overlong encoding of NULL) 0xED, 0xA0, 0x80, // Invalid UTF-8 sequence (surrogate half) 0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // 0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // ]), expectedError: true, description: 'Invalid UTF-8 byte sequences' }, { name: 'Mixed encoding in document', xml: ` MIXED-ENCODING-001 UTF-8 text: äöü € ${String.fromCharCode(0xA9)} ${String.fromCharCode(0xAE)} `, expectedError: false, description: 'Mixed but valid encoding' } ]; for (const testCase of encodingTests) { tools.log(`Testing ${testCase.name}: ${testCase.description}`); try { const invoice = new EInvoice(); let parseResult; if (Buffer.isBuffer(testCase.xml)) { // For buffer tests, we might need to write to a temp file const tempPath = plugins.path.join(process.cwd(), '.nogit', `temp-encoding-${Date.now()}.xml`); await plugins.fs.ensureDir(plugins.path.dirname(tempPath)); await plugins.fs.writeFile(tempPath, testCase.xml); try { parseResult = await invoice.fromFile(tempPath); } finally { // Clean up temp file await plugins.fs.remove(tempPath); } } else { parseResult = await invoice.fromXmlString(testCase.xml); } if (testCase.expectedError) { if (parseResult) { tools.log(` ⚠ Expected encoding error but parsing succeeded`); // Check if data was corrupted const xmlOutput = await invoice.toXmlString(); tools.log(` Output length: ${xmlOutput.length} chars`); // Look for encoding artifacts const hasEncodingIssues = xmlOutput.includes('�') || // Replacement character xmlOutput.includes('\uFFFD') || // Unicode replacement !/^[\x00-\x7F]*$/.test(xmlOutput); // Non-ASCII when not expected if (hasEncodingIssues) { tools.log(` ⚠ Encoding artifacts detected in output`); } } else { tools.log(` ✓ Expected encoding error - no parsing result`); } } else { if (parseResult) { tools.log(` ✓ Parsing succeeded as expected`); // Verify encoding preservation const xmlOutput = await invoice.toXmlString(); if (testCase.xml.toString().includes('äöü') && xmlOutput.includes('äöü')) { tools.log(` ✓ Special characters preserved correctly`); } } else { tools.log(` ✗ Unexpected parsing failure`); } } } catch (error) { if (testCase.expectedError) { tools.log(` ✓ Expected encoding error caught: ${error.message}`); // Check if error mentions encoding const errorLower = error.message.toLowerCase(); if (errorLower.includes('encoding') || errorLower.includes('utf') || errorLower.includes('charset') || errorLower.includes('decode')) { tools.log(` ✓ Error message indicates encoding issue`); } } else { tools.log(` ✗ Unexpected error: ${error.message}`); throw error; } } } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('error-handling-encoding-issues', duration); }); tap.test('ERR-01: Parsing Error Recovery - Partial Document Recovery', async (tools) => { const startTime = Date.now(); // Test recovery from partially corrupted documents const partialDocumentTests = [ { name: 'Truncated at invoice line', xml: ` PARTIAL-001 2024-01-15 380 EUR Partial Recovery Supplier 1 5 500.00 Product for partial recovery test`, recoverableData: ['PARTIAL-001', '2024-01-15', 'EUR', 'Partial Recovery Supplier'] }, { name: 'Missing end sections', xml: ` PARTIAL-002 2024-01-15 380 USD This invoice is missing its closing sections Incomplete Invoice Supplier Recovery Street 123 Test City`, recoverableData: ['PARTIAL-002', '2024-01-15', 'USD', 'Incomplete Invoice Supplier', 'Recovery Street 123'] }, { name: 'Corrupted middle section', xml: ` PARTIAL-003 2024-01-15 380 GBP <<>> @#$%^&*()_+{}|:"<>? BINARY_GARBAGE: ${String.fromCharCode(0x00, 0x01, 0x02, 0x03)} Valid Customer After Corruption 1500.00 `, recoverableData: ['PARTIAL-003', '2024-01-15', 'GBP', 'Valid Customer After Corruption', '1500.00'] } ]; for (const testCase of partialDocumentTests) { tools.log(`Testing ${testCase.name}...`); try { const invoice = new EInvoice(); const parseResult = await invoice.fromXmlString(testCase.xml); if (parseResult) { tools.log(` ⚠ Partial document parsed - unexpected success`); // Check what data was recovered try { const xmlOutput = await invoice.toXmlString(); tools.log(` Checking recovered data...`); let recoveredCount = 0; for (const expectedData of testCase.recoverableData) { if (xmlOutput.includes(expectedData)) { recoveredCount++; tools.log(` ✓ Recovered: ${expectedData}`); } else { tools.log(` ✗ Lost: ${expectedData}`); } } const recoveryRate = (recoveredCount / testCase.recoverableData.length) * 100; tools.log(` Recovery rate: ${recoveryRate.toFixed(1)}% (${recoveredCount}/${testCase.recoverableData.length})`); } catch (outputError) { tools.log(` ⚠ Could not generate output from partial document: ${outputError.message}`); } } else { tools.log(` ✓ Partial document parsing failed as expected`); } } catch (error) { tools.log(` ✓ Parsing error caught: ${error.message}`); // Test if we can implement a recovery strategy tools.log(` Attempting recovery strategy...`); try { // Strategy 1: Try to fix unclosed tags let recoveredXml = testCase.xml; // Count opening and closing tags const openTags = (recoveredXml.match(/<[^/][^>]*>/g) || []) .filter(tag => !tag.includes('?') && !tag.includes('!')) .map(tag => tag.match(/<(\w+)/)?.[1]) .filter(Boolean); const closeTags = (recoveredXml.match(/<\/[^>]+>/g) || []) .map(tag => tag.match(/<\/(\w+)>/)?.[1]) .filter(Boolean); // Find unclosed tags const tagStack = []; for (const tag of openTags) { const closeIndex = closeTags.indexOf(tag); if (closeIndex === -1) { tagStack.push(tag); } else { closeTags.splice(closeIndex, 1); } } // Add missing closing tags if (tagStack.length > 0) { tools.log(` Found ${tagStack.length} unclosed tags`); while (tagStack.length > 0) { const tag = tagStack.pop(); recoveredXml += ``; } // Try parsing recovered XML const recoveryInvoice = new EInvoice(); const recoveryResult = await recoveryInvoice.fromXmlString(recoveredXml); if (recoveryResult) { tools.log(` ✓ Recovery successful after closing tags`); // Check recovered data const recoveredOutput = await recoveryInvoice.toXmlString(); let postRecoveryCount = 0; for (const expectedData of testCase.recoverableData) { if (recoveredOutput.includes(expectedData)) { postRecoveryCount++; } } tools.log(` Post-recovery data: ${postRecoveryCount}/${testCase.recoverableData.length} items`); } else { tools.log(` ⚠ Recovery strategy failed`); } } } catch (recoveryError) { tools.log(` Recovery attempt failed: ${recoveryError.message}`); } } } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('error-handling-partial-recovery', duration); }); tap.test('ERR-01: Parsing Error Recovery - Namespace Issues', async (tools) => { const startTime = Date.now(); // Test namespace-related parsing errors and recovery const namespaceTests = [ { name: 'Missing namespace declaration', xml: ` NAMESPACE-001 2024-01-15 380 `, expectedError: false, // May parse but validation should fail issue: 'No namespace declared' }, { name: 'Wrong namespace URI', xml: ` NAMESPACE-002 2024-01-15 `, expectedError: false, issue: 'Incorrect namespace' }, { name: 'Conflicting namespace prefixes', xml: ` NAMESPACE-003 `, expectedError: true, issue: 'Duplicate prefix definition' }, { name: 'Undefined namespace prefix', xml: ` NAMESPACE-004 Content `, expectedError: true, issue: 'Undefined prefix used' } ]; for (const testCase of namespaceTests) { tools.log(`Testing ${testCase.name}: ${testCase.issue}`); try { const invoice = new EInvoice(); const parseResult = await invoice.fromXmlString(testCase.xml); if (testCase.expectedError) { if (parseResult) { tools.log(` ⚠ Expected namespace error but parsing succeeded`); // Check if namespace issues are detected during validation try { const validationResult = await invoice.validate(); if (!validationResult.valid) { tools.log(` ✓ Namespace issues detected during validation`); if (validationResult.errors) { for (const error of validationResult.errors) { if (error.message.toLowerCase().includes('namespace')) { tools.log(` Namespace error: ${error.message}`); } } } } } catch (validationError) { tools.log(` Validation failed: ${validationError.message}`); } } else { tools.log(` ✓ Expected namespace error - no parsing result`); } } else { if (parseResult) { tools.log(` ✓ Parsing succeeded as expected`); // Test if we can detect namespace issues const xmlOutput = await invoice.toXmlString(); const hasProperNamespace = xmlOutput.includes('urn:oasis:names:specification:ubl:schema:xsd:Invoice-2') || xmlOutput.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice'); if (!hasProperNamespace) { tools.log(` ⚠ Output missing proper namespace declaration`); } else { tools.log(` ✓ Proper namespace maintained in output`); } } else { tools.log(` ✗ Unexpected parsing failure`); } } } catch (error) { if (testCase.expectedError) { tools.log(` ✓ Expected namespace error caught: ${error.message}`); // Check error quality const errorLower = error.message.toLowerCase(); if (errorLower.includes('namespace') || errorLower.includes('prefix') || errorLower.includes('xmlns')) { tools.log(` ✓ Error message indicates namespace issue`); } } else { tools.log(` ✗ Unexpected error: ${error.message}`); throw error; } } } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('error-handling-namespace-issues', duration); }); tap.test('ERR-01: Parsing Error Recovery - Corpus Error Recovery', { timeout: testTimeout }, async (tools) => { const startTime = Date.now(); let processedFiles = 0; let parseErrors = 0; let recoveryAttempts = 0; let successfulRecoveries = 0; try { // Test with potentially problematic files from corpus const categories = ['UBL_XML_RECHNUNG', 'CII_XML_RECHNUNG']; for (const category of categories) { try { const files = await CorpusLoader.getFiles(category); const filesToProcess = files.slice(0, 5); // Process first 5 files per category for (const filePath of filesToProcess) { processedFiles++; const fileName = plugins.path.basename(filePath); // First, try normal parsing try { const invoice = new EInvoice(); const parseResult = await invoice.fromFile(filePath); if (!parseResult) { parseErrors++; tools.log(`⚠ ${fileName}: Parse returned no result`); // Attempt recovery recoveryAttempts++; // Read file content for recovery attempt const fileContent = await plugins.fs.readFile(filePath, 'utf-8'); // Try different recovery strategies const recoveryStrategies = [ { name: 'Remove BOM', transform: (content: string) => content.replace(/^\uFEFF/, '') }, { name: 'Fix encoding', transform: (content: string) => content.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '') }, { name: 'Normalize whitespace', transform: (content: string) => content.replace(/\r\n/g, '\n').replace(/\r/g, '\n') } ]; for (const strategy of recoveryStrategies) { try { const transformedContent = strategy.transform(fileContent); const recoveryInvoice = new EInvoice(); const recoveryResult = await recoveryInvoice.fromXmlString(transformedContent); if (recoveryResult) { successfulRecoveries++; tools.log(` ✓ Recovery successful with strategy: ${strategy.name}`); break; } } catch (strategyError) { // Strategy failed, try next } } } } catch (error) { parseErrors++; tools.log(`✗ ${fileName}: Parse error - ${error.message}`); // Log error characteristics const errorLower = error.message.toLowerCase(); const errorType = errorLower.includes('encoding') ? 'encoding' : errorLower.includes('tag') ? 'structure' : errorLower.includes('namespace') ? 'namespace' : errorLower.includes('attribute') ? 'attribute' : 'unknown'; tools.log(` Error type: ${errorType}`); // Attempt recovery for known error types if (errorType !== 'unknown') { recoveryAttempts++; // Recovery logic would go here } } } } catch (categoryError) { tools.log(`Failed to process category ${category}: ${categoryError.message}`); } } // Summary statistics const errorRate = processedFiles > 0 ? (parseErrors / processedFiles) * 100 : 0; const recoveryRate = recoveryAttempts > 0 ? (successfulRecoveries / recoveryAttempts) * 100 : 0; tools.log(`\nParsing Error Recovery Summary:`); tools.log(`- Files processed: ${processedFiles}`); tools.log(`- Parse errors: ${parseErrors} (${errorRate.toFixed(1)}%)`); tools.log(`- Recovery attempts: ${recoveryAttempts}`); tools.log(`- Successful recoveries: ${successfulRecoveries} (${recoveryRate.toFixed(1)}%)`); // Most corpus files should parse without errors expect(errorRate).toBeLessThan(20); // Less than 20% error rate expected } catch (error) { tools.log(`Corpus error recovery test failed: ${error.message}`); throw error; } const totalDuration = Date.now() - startTime; PerformanceTracker.recordMetric('error-handling-corpus-recovery', totalDuration); tools.log(`Corpus error recovery completed in ${totalDuration}ms`); }); tap.test('ERR-01: Performance Summary', async (tools) => { const operations = [ 'error-handling-malformed-xml', 'error-handling-encoding-issues', 'error-handling-partial-recovery', 'error-handling-namespace-issues', 'error-handling-corpus-recovery' ]; tools.log(`\n=== Parsing Error Recovery Performance Summary ===`); for (const operation of operations) { const summary = await PerformanceTracker.getSummary(operation); if (summary) { tools.log(`${operation}:`); tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`); } } tools.log(`\nParsing error recovery testing completed.`); tools.log(`Note: Some parsing errors are expected when testing error recovery mechanisms.`); });