einvoice/test/suite/einvoice_error-handling/test.err-01.parsing-recovery.ts
2025-05-25 19:45:37 +00:00

769 lines
28 KiB
TypeScript
Raw Blame History

import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../../ts/plugins.ts';
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
const testTimeout = 300000; // 5 minutes timeout for error handling tests
// ERR-01: Parsing Error Recovery
// Tests error recovery mechanisms during XML parsing including
// malformed XML, encoding issues, and partial document recovery
tap.test('ERR-01: Parsing Error Recovery - Malformed XML Recovery', async (tools) => {
const startTime = Date.now();
// Test various malformed XML scenarios
const malformedXmlTests = [
{
name: 'Missing closing tag',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MALFORMED-001</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
</Invoice>`,
expectedError: true,
recoverable: false
},
{
name: 'Mismatched tags',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MALFORMED-002</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<DocumentCurrencyCode>EUR</InvoiceCurrencyCode>
</Invoice>`,
expectedError: true,
recoverable: false
},
{
name: 'Invalid XML characters',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MALFORMED-003</ID>
<IssueDate>2024-01-15</IssueDate>
<Note>Invalid chars: ${String.fromCharCode(0x00)}${String.fromCharCode(0x01)}</Note>
</Invoice>`,
expectedError: true,
recoverable: true
},
{
name: 'Broken CDATA section',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MALFORMED-004</ID>
<Note><![CDATA[Broken CDATA section]]</Note>
</Invoice>`,
expectedError: true,
recoverable: false
},
{
name: 'Unclosed attribute quote',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID schemeID="unclosed>MALFORMED-005</ID>
</Invoice>`,
expectedError: true,
recoverable: false
},
{
name: 'Invalid attribute value',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MALFORMED-006</ID>
<TaxTotal>
<TaxAmount currencyID="<>">100.00</TaxAmount>
</TaxTotal>
</Invoice>`,
expectedError: true,
recoverable: true
}
];
for (const testCase of malformedXmlTests) {
tools.log(`Testing ${testCase.name}...`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (testCase.expectedError) {
// If we expected an error but parsing succeeded, check if partial recovery happened
if (parseResult) {
tools.log(` ⚠ Expected error but parsing succeeded - checking recovery`);
// Test if we can extract any data
try {
const xmlOutput = await invoice.toXmlString();
if (xmlOutput && xmlOutput.length > 50) {
tools.log(` ✓ Partial recovery successful - extracted ${xmlOutput.length} chars`);
// Check if critical data was preserved
const criticalDataPreserved = {
hasId: xmlOutput.includes('MALFORMED'),
hasDate: xmlOutput.includes('2024-01-15'),
hasStructure: xmlOutput.includes('Invoice')
};
tools.log(` ID preserved: ${criticalDataPreserved.hasId}`);
tools.log(` Date preserved: ${criticalDataPreserved.hasDate}`);
tools.log(` Structure preserved: ${criticalDataPreserved.hasStructure}`);
}
} catch (outputError) {
tools.log(` ⚠ Recovery limited - output generation failed: ${outputError.message}`);
}
} else {
tools.log(` ✓ Expected error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected parsing error caught: ${error.message}`);
// Check error quality
expect(error.message).toBeTruthy();
expect(error.message.length).toBeGreaterThan(10);
// Check if error provides helpful context
const errorLower = error.message.toLowerCase();
const hasContext = errorLower.includes('xml') ||
errorLower.includes('parse') ||
errorLower.includes('tag') ||
errorLower.includes('attribute') ||
errorLower.includes('invalid');
if (hasContext) {
tools.log(` ✓ Error message provides context`);
} else {
tools.log(` ⚠ Error message lacks context`);
}
// Test recovery attempt if recoverable
if (testCase.recoverable) {
tools.log(` Attempting recovery...`);
try {
// Try to clean the XML and parse again
const cleanedXml = testCase.xml
.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '') // Remove control chars
.replace(/<>/g, ''); // Remove invalid brackets
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(cleanedXml);
if (recoveryResult) {
tools.log(` ✓ Recovery successful after cleaning`);
} else {
tools.log(` ⚠ Recovery failed even after cleaning`);
}
} catch (recoveryError) {
tools.log(` ⚠ Recovery attempt failed: ${recoveryError.message}`);
}
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-malformed-xml', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Encoding Issues', async (tools) => {
const startTime = Date.now();
// Test various encoding-related parsing errors
const encodingTests = [
{
name: 'Mismatched encoding declaration',
xml: Buffer.from([
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x55, 0x54,
0x46, 0x2D, 0x38, 0x22, 0x3F, 0x3E, 0x0A, // <?xml version="1.0" encoding="UTF-8"?>
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // <Invoice>
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // <Note>
0xC4, 0xD6, 0xDC, // ISO-8859-1 encoded German umlauts (not UTF-8)
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // </Note>
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // </Invoice>
]),
expectedError: true,
description: 'UTF-8 declared but ISO-8859-1 content'
},
{
name: 'BOM with wrong encoding',
xml: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from(`<?xml version="1.0" encoding="UTF-16"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>ENCODING-BOM-001</ID>
</Invoice>`)
]),
expectedError: false, // Parser might handle this
description: 'UTF-8 BOM with UTF-16 declaration'
},
{
name: 'Invalid UTF-8 sequences',
xml: Buffer.from([
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
0x2E, 0x30, 0x22, 0x3F, 0x3E, 0x0A, // <?xml version="1.0"?>
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, // <Invoice>
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // <Note>
0xC0, 0x80, // Invalid UTF-8 sequence (overlong encoding of NULL)
0xED, 0xA0, 0x80, // Invalid UTF-8 sequence (surrogate half)
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, // </Note>
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E // </Invoice>
]),
expectedError: true,
description: 'Invalid UTF-8 byte sequences'
},
{
name: 'Mixed encoding in document',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>MIXED-ENCODING-001</ID>
<Note>UTF-8 text: äöü €</Note>
<AdditionalNote>${String.fromCharCode(0xA9)} ${String.fromCharCode(0xAE)}</AdditionalNote>
</Invoice>`,
expectedError: false,
description: 'Mixed but valid encoding'
}
];
for (const testCase of encodingTests) {
tools.log(`Testing ${testCase.name}: ${testCase.description}`);
try {
const invoice = new EInvoice();
let parseResult;
if (Buffer.isBuffer(testCase.xml)) {
// For buffer tests, we might need to write to a temp file
const tempPath = plugins.path.join(process.cwd(), '.nogit', `temp-encoding-${Date.now()}.xml`);
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
await plugins.fs.writeFile(tempPath, testCase.xml);
try {
parseResult = await invoice.fromFile(tempPath);
} finally {
// Clean up temp file
await plugins.fs.remove(tempPath);
}
} else {
parseResult = await invoice.fromXmlString(testCase.xml);
}
if (testCase.expectedError) {
if (parseResult) {
tools.log(` ⚠ Expected encoding error but parsing succeeded`);
// Check if data was corrupted
const xmlOutput = await invoice.toXmlString();
tools.log(` Output length: ${xmlOutput.length} chars`);
// Look for encoding artifacts
const hasEncodingIssues = xmlOutput.includes('<27>') || // Replacement character
xmlOutput.includes('\uFFFD') || // Unicode replacement
!/^[\x00-\x7F]*$/.test(xmlOutput); // Non-ASCII when not expected
if (hasEncodingIssues) {
tools.log(` ⚠ Encoding artifacts detected in output`);
}
} else {
tools.log(` ✓ Expected encoding error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
// Verify encoding preservation
const xmlOutput = await invoice.toXmlString();
if (testCase.xml.toString().includes('äöü') && xmlOutput.includes('äöü')) {
tools.log(` ✓ Special characters preserved correctly`);
}
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected encoding error caught: ${error.message}`);
// Check if error mentions encoding
const errorLower = error.message.toLowerCase();
if (errorLower.includes('encoding') ||
errorLower.includes('utf') ||
errorLower.includes('charset') ||
errorLower.includes('decode')) {
tools.log(` ✓ Error message indicates encoding issue`);
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-encoding-issues', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Partial Document Recovery', async (tools) => {
const startTime = Date.now();
// Test recovery from partially corrupted documents
const partialDocumentTests = [
{
name: 'Truncated at invoice line',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>PARTIAL-001</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Partial Recovery Supplier</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<InvoiceLine>
<ID>1</ID>
<InvoicedQuantity unitCode="C62">5</InvoicedQuantity>
<LineExtensionAmount currencyID="EUR">500.00</LineExtensionAmount>
<Item>
<Name>Product for partial recovery test</Name>`,
recoverableData: ['PARTIAL-001', '2024-01-15', 'EUR', 'Partial Recovery Supplier']
},
{
name: 'Missing end sections',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>PARTIAL-002</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<DocumentCurrencyCode>USD</DocumentCurrencyCode>
<Note>This invoice is missing its closing sections</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Incomplete Invoice Supplier</Name>
</PartyName>
<PostalAddress>
<StreetName>Recovery Street 123</StreetName>
<CityName>Test City</CityName>`,
recoverableData: ['PARTIAL-002', '2024-01-15', 'USD', 'Incomplete Invoice Supplier', 'Recovery Street 123']
},
{
name: 'Corrupted middle section',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>PARTIAL-003</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<DocumentCurrencyCode>GBP</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<<<CORRUPTED_DATA_SECTION>>>
@#$%^&*()_+{}|:"<>?
BINARY_GARBAGE: ${String.fromCharCode(0x00, 0x01, 0x02, 0x03)}
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Valid Customer After Corruption</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
<LegalMonetaryTotal>
<PayableAmount currencyID="GBP">1500.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`,
recoverableData: ['PARTIAL-003', '2024-01-15', 'GBP', 'Valid Customer After Corruption', '1500.00']
}
];
for (const testCase of partialDocumentTests) {
tools.log(`Testing ${testCase.name}...`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (parseResult) {
tools.log(` ⚠ Partial document parsed - unexpected success`);
// Check what data was recovered
try {
const xmlOutput = await invoice.toXmlString();
tools.log(` Checking recovered data...`);
let recoveredCount = 0;
for (const expectedData of testCase.recoverableData) {
if (xmlOutput.includes(expectedData)) {
recoveredCount++;
tools.log(` ✓ Recovered: ${expectedData}`);
} else {
tools.log(` ✗ Lost: ${expectedData}`);
}
}
const recoveryRate = (recoveredCount / testCase.recoverableData.length) * 100;
tools.log(` Recovery rate: ${recoveryRate.toFixed(1)}% (${recoveredCount}/${testCase.recoverableData.length})`);
} catch (outputError) {
tools.log(` ⚠ Could not generate output from partial document: ${outputError.message}`);
}
} else {
tools.log(` ✓ Partial document parsing failed as expected`);
}
} catch (error) {
tools.log(` ✓ Parsing error caught: ${error.message}`);
// Test if we can implement a recovery strategy
tools.log(` Attempting recovery strategy...`);
try {
// Strategy 1: Try to fix unclosed tags
let recoveredXml = testCase.xml;
// Count opening and closing tags
const openTags = (recoveredXml.match(/<[^/][^>]*>/g) || [])
.filter(tag => !tag.includes('?') && !tag.includes('!'))
.map(tag => tag.match(/<(\w+)/)?.[1])
.filter(Boolean);
const closeTags = (recoveredXml.match(/<\/[^>]+>/g) || [])
.map(tag => tag.match(/<\/(\w+)>/)?.[1])
.filter(Boolean);
// Find unclosed tags
const tagStack = [];
for (const tag of openTags) {
const closeIndex = closeTags.indexOf(tag);
if (closeIndex === -1) {
tagStack.push(tag);
} else {
closeTags.splice(closeIndex, 1);
}
}
// Add missing closing tags
if (tagStack.length > 0) {
tools.log(` Found ${tagStack.length} unclosed tags`);
while (tagStack.length > 0) {
const tag = tagStack.pop();
recoveredXml += `</${tag}>`;
}
// Try parsing recovered XML
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(recoveredXml);
if (recoveryResult) {
tools.log(` ✓ Recovery successful after closing tags`);
// Check recovered data
const recoveredOutput = await recoveryInvoice.toXmlString();
let postRecoveryCount = 0;
for (const expectedData of testCase.recoverableData) {
if (recoveredOutput.includes(expectedData)) {
postRecoveryCount++;
}
}
tools.log(` Post-recovery data: ${postRecoveryCount}/${testCase.recoverableData.length} items`);
} else {
tools.log(` ⚠ Recovery strategy failed`);
}
}
} catch (recoveryError) {
tools.log(` Recovery attempt failed: ${recoveryError.message}`);
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-partial-recovery', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Namespace Issues', async (tools) => {
const startTime = Date.now();
// Test namespace-related parsing errors and recovery
const namespaceTests = [
{
name: 'Missing namespace declaration',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice>
<ID>NAMESPACE-001</ID>
<IssueDate>2024-01-15</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
</Invoice>`,
expectedError: false, // May parse but validation should fail
issue: 'No namespace declared'
},
{
name: 'Wrong namespace URI',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="http://wrong.namespace.uri/invoice">
<ID>NAMESPACE-002</ID>
<IssueDate>2024-01-15</IssueDate>
</Invoice>`,
expectedError: false,
issue: 'Incorrect namespace'
},
{
name: 'Conflicting namespace prefixes',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ns1:Invoice xmlns:ns1="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:ns1="http://different.namespace">
<ns1:ID>NAMESPACE-003</ns1:ID>
</ns1:Invoice>`,
expectedError: true,
issue: 'Duplicate prefix definition'
},
{
name: 'Undefined namespace prefix',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>NAMESPACE-004</ID>
<unknown:Element>Content</unknown:Element>
</Invoice>`,
expectedError: true,
issue: 'Undefined prefix used'
}
];
for (const testCase of namespaceTests) {
tools.log(`Testing ${testCase.name}: ${testCase.issue}`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (testCase.expectedError) {
if (parseResult) {
tools.log(` ⚠ Expected namespace error but parsing succeeded`);
// Check if namespace issues are detected during validation
try {
const validationResult = await invoice.validate();
if (!validationResult.valid) {
tools.log(` ✓ Namespace issues detected during validation`);
if (validationResult.errors) {
for (const error of validationResult.errors) {
if (error.message.toLowerCase().includes('namespace')) {
tools.log(` Namespace error: ${error.message}`);
}
}
}
}
} catch (validationError) {
tools.log(` Validation failed: ${validationError.message}`);
}
} else {
tools.log(` ✓ Expected namespace error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
// Test if we can detect namespace issues
const xmlOutput = await invoice.toXmlString();
const hasProperNamespace = xmlOutput.includes('urn:oasis:names:specification:ubl:schema:xsd:Invoice-2') ||
xmlOutput.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice');
if (!hasProperNamespace) {
tools.log(` ⚠ Output missing proper namespace declaration`);
} else {
tools.log(` ✓ Proper namespace maintained in output`);
}
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected namespace error caught: ${error.message}`);
// Check error quality
const errorLower = error.message.toLowerCase();
if (errorLower.includes('namespace') ||
errorLower.includes('prefix') ||
errorLower.includes('xmlns')) {
tools.log(` ✓ Error message indicates namespace issue`);
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-namespace-issues', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Corpus Error Recovery', { timeout: testTimeout }, async (tools) => {
const startTime = Date.now();
let processedFiles = 0;
let parseErrors = 0;
let recoveryAttempts = 0;
let successfulRecoveries = 0;
try {
// Test with potentially problematic files from corpus
const categories = ['UBL_XML_RECHNUNG', 'CII_XML_RECHNUNG'];
for (const category of categories) {
try {
const files = await CorpusLoader.getFiles(category);
const filesToProcess = files.slice(0, 5); // Process first 5 files per category
for (const filePath of filesToProcess) {
processedFiles++;
const fileName = plugins.path.basename(filePath);
// First, try normal parsing
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromFile(filePath);
if (!parseResult) {
parseErrors++;
tools.log(`${fileName}: Parse returned no result`);
// Attempt recovery
recoveryAttempts++;
// Read file content for recovery attempt
const fileContent = await plugins.fs.readFile(filePath, 'utf-8');
// Try different recovery strategies
const recoveryStrategies = [
{
name: 'Remove BOM',
transform: (content: string) => content.replace(/^\uFEFF/, '')
},
{
name: 'Fix encoding',
transform: (content: string) => content.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '')
},
{
name: 'Normalize whitespace',
transform: (content: string) => content.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
}
];
for (const strategy of recoveryStrategies) {
try {
const transformedContent = strategy.transform(fileContent);
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(transformedContent);
if (recoveryResult) {
successfulRecoveries++;
tools.log(` ✓ Recovery successful with strategy: ${strategy.name}`);
break;
}
} catch (strategyError) {
// Strategy failed, try next
}
}
}
} catch (error) {
parseErrors++;
tools.log(`${fileName}: Parse error - ${error.message}`);
// Log error characteristics
const errorLower = error.message.toLowerCase();
const errorType = errorLower.includes('encoding') ? 'encoding' :
errorLower.includes('tag') ? 'structure' :
errorLower.includes('namespace') ? 'namespace' :
errorLower.includes('attribute') ? 'attribute' :
'unknown';
tools.log(` Error type: ${errorType}`);
// Attempt recovery for known error types
if (errorType !== 'unknown') {
recoveryAttempts++;
// Recovery logic would go here
}
}
}
} catch (categoryError) {
tools.log(`Failed to process category ${category}: ${categoryError.message}`);
}
}
// Summary statistics
const errorRate = processedFiles > 0 ? (parseErrors / processedFiles) * 100 : 0;
const recoveryRate = recoveryAttempts > 0 ? (successfulRecoveries / recoveryAttempts) * 100 : 0;
tools.log(`\nParsing Error Recovery Summary:`);
tools.log(`- Files processed: ${processedFiles}`);
tools.log(`- Parse errors: ${parseErrors} (${errorRate.toFixed(1)}%)`);
tools.log(`- Recovery attempts: ${recoveryAttempts}`);
tools.log(`- Successful recoveries: ${successfulRecoveries} (${recoveryRate.toFixed(1)}%)`);
// Most corpus files should parse without errors
expect(errorRate).toBeLessThan(20); // Less than 20% error rate expected
} catch (error) {
tools.log(`Corpus error recovery test failed: ${error.message}`);
throw error;
}
const totalDuration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-corpus-recovery', totalDuration);
tools.log(`Corpus error recovery completed in ${totalDuration}ms`);
});
tap.test('ERR-01: Performance Summary', async (tools) => {
const operations = [
'error-handling-malformed-xml',
'error-handling-encoding-issues',
'error-handling-partial-recovery',
'error-handling-namespace-issues',
'error-handling-corpus-recovery'
];
tools.log(`\n=== Parsing Error Recovery Performance Summary ===`);
for (const operation of operations) {
const summary = await PerformanceTracker.getSummary(operation);
if (summary) {
tools.log(`${operation}:`);
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
}
}
tools.log(`\nParsing error recovery testing completed.`);
tools.log(`Note: Some parsing errors are expected when testing error recovery mechanisms.`);
});