import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../../ts/plugins.ts';
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
const testTimeout = 300000; // 5 minutes timeout for error handling tests
// ERR-01: Parsing Error Recovery
// Tests error recovery mechanisms during XML parsing including
// malformed XML, encoding issues, and partial document recovery
tap.test('ERR-01: Parsing Error Recovery - Malformed XML Recovery', async (tools) => {
const startTime = Date.now();
// Test various malformed XML scenarios
const malformedXmlTests = [
{
name: 'Missing closing tag',
xml: `
MALFORMED-001
2024-01-15
380
EUR
`,
expectedError: true,
recoverable: false
},
{
name: 'Mismatched tags',
xml: `
MALFORMED-002
2024-01-15
380
EUR
`,
expectedError: true,
recoverable: false
},
{
name: 'Invalid XML characters',
xml: `
MALFORMED-003
2024-01-15
Invalid chars: ${String.fromCharCode(0x00)}${String.fromCharCode(0x01)}
`,
expectedError: true,
recoverable: true
},
{
name: 'Broken CDATA section',
xml: `
MALFORMED-004
`,
expectedError: true,
recoverable: false
},
{
name: 'Unclosed attribute quote',
xml: `
MALFORMED-006
100.00
`,
expectedError: true,
recoverable: true
}
];
for (const testCase of malformedXmlTests) {
tools.log(`Testing ${testCase.name}...`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (testCase.expectedError) {
// If we expected an error but parsing succeeded, check if partial recovery happened
if (parseResult) {
tools.log(` ⚠ Expected error but parsing succeeded - checking recovery`);
// Test if we can extract any data
try {
const xmlOutput = await invoice.toXmlString();
if (xmlOutput && xmlOutput.length > 50) {
tools.log(` ✓ Partial recovery successful - extracted ${xmlOutput.length} chars`);
// Check if critical data was preserved
const criticalDataPreserved = {
hasId: xmlOutput.includes('MALFORMED'),
hasDate: xmlOutput.includes('2024-01-15'),
hasStructure: xmlOutput.includes('Invoice')
};
tools.log(` ID preserved: ${criticalDataPreserved.hasId}`);
tools.log(` Date preserved: ${criticalDataPreserved.hasDate}`);
tools.log(` Structure preserved: ${criticalDataPreserved.hasStructure}`);
}
} catch (outputError) {
tools.log(` ⚠ Recovery limited - output generation failed: ${outputError.message}`);
}
} else {
tools.log(` ✓ Expected error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected parsing error caught: ${error.message}`);
// Check error quality
expect(error.message).toBeTruthy();
expect(error.message.length).toBeGreaterThan(10);
// Check if error provides helpful context
const errorLower = error.message.toLowerCase();
const hasContext = errorLower.includes('xml') ||
errorLower.includes('parse') ||
errorLower.includes('tag') ||
errorLower.includes('attribute') ||
errorLower.includes('invalid');
if (hasContext) {
tools.log(` ✓ Error message provides context`);
} else {
tools.log(` ⚠ Error message lacks context`);
}
// Test recovery attempt if recoverable
if (testCase.recoverable) {
tools.log(` Attempting recovery...`);
try {
// Try to clean the XML and parse again
const cleanedXml = testCase.xml
.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '') // Remove control chars
.replace(/<>/g, ''); // Remove invalid brackets
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(cleanedXml);
if (recoveryResult) {
tools.log(` ✓ Recovery successful after cleaning`);
} else {
tools.log(` ⚠ Recovery failed even after cleaning`);
}
} catch (recoveryError) {
tools.log(` ⚠ Recovery attempt failed: ${recoveryError.message}`);
}
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-malformed-xml', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Encoding Issues', async (tools) => {
const startTime = Date.now();
// Test various encoding-related parsing errors
const encodingTests = [
{
name: 'Mismatched encoding declaration',
xml: Buffer.from([
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x55, 0x54,
0x46, 0x2D, 0x38, 0x22, 0x3F, 0x3E, 0x0A, //
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, //
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, //
0xC4, 0xD6, 0xDC, // ISO-8859-1 encoded German umlauts (not UTF-8)
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, //
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E //
]),
expectedError: true,
description: 'UTF-8 declared but ISO-8859-1 content'
},
{
name: 'BOM with wrong encoding',
xml: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from(`
ENCODING-BOM-001
`)
]),
expectedError: false, // Parser might handle this
description: 'UTF-8 BOM with UTF-16 declaration'
},
{
name: 'Invalid UTF-8 sequences',
xml: Buffer.from([
0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31,
0x2E, 0x30, 0x22, 0x3F, 0x3E, 0x0A, //
0x3C, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E, //
0x3C, 0x4E, 0x6F, 0x74, 0x65, 0x3E, //
0xC0, 0x80, // Invalid UTF-8 sequence (overlong encoding of NULL)
0xED, 0xA0, 0x80, // Invalid UTF-8 sequence (surrogate half)
0x3C, 0x2F, 0x4E, 0x6F, 0x74, 0x65, 0x3E, //
0x3C, 0x2F, 0x49, 0x6E, 0x76, 0x6F, 0x69, 0x63, 0x65, 0x3E //
]),
expectedError: true,
description: 'Invalid UTF-8 byte sequences'
},
{
name: 'Mixed encoding in document',
xml: `
MIXED-ENCODING-001
UTF-8 text: äöü €
${String.fromCharCode(0xA9)} ${String.fromCharCode(0xAE)}
`,
expectedError: false,
description: 'Mixed but valid encoding'
}
];
for (const testCase of encodingTests) {
tools.log(`Testing ${testCase.name}: ${testCase.description}`);
try {
const invoice = new EInvoice();
let parseResult;
if (Buffer.isBuffer(testCase.xml)) {
// For buffer tests, we might need to write to a temp file
const tempPath = plugins.path.join(process.cwd(), '.nogit', `temp-encoding-${Date.now()}.xml`);
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
await plugins.fs.writeFile(tempPath, testCase.xml);
try {
parseResult = await invoice.fromFile(tempPath);
} finally {
// Clean up temp file
await plugins.fs.remove(tempPath);
}
} else {
parseResult = await invoice.fromXmlString(testCase.xml);
}
if (testCase.expectedError) {
if (parseResult) {
tools.log(` ⚠ Expected encoding error but parsing succeeded`);
// Check if data was corrupted
const xmlOutput = await invoice.toXmlString();
tools.log(` Output length: ${xmlOutput.length} chars`);
// Look for encoding artifacts
const hasEncodingIssues = xmlOutput.includes('�') || // Replacement character
xmlOutput.includes('\uFFFD') || // Unicode replacement
!/^[\x00-\x7F]*$/.test(xmlOutput); // Non-ASCII when not expected
if (hasEncodingIssues) {
tools.log(` ⚠ Encoding artifacts detected in output`);
}
} else {
tools.log(` ✓ Expected encoding error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
// Verify encoding preservation
const xmlOutput = await invoice.toXmlString();
if (testCase.xml.toString().includes('äöü') && xmlOutput.includes('äöü')) {
tools.log(` ✓ Special characters preserved correctly`);
}
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected encoding error caught: ${error.message}`);
// Check if error mentions encoding
const errorLower = error.message.toLowerCase();
if (errorLower.includes('encoding') ||
errorLower.includes('utf') ||
errorLower.includes('charset') ||
errorLower.includes('decode')) {
tools.log(` ✓ Error message indicates encoding issue`);
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-encoding-issues', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Partial Document Recovery', async (tools) => {
const startTime = Date.now();
// Test recovery from partially corrupted documents
const partialDocumentTests = [
{
name: 'Truncated at invoice line',
xml: `
PARTIAL-001
2024-01-15
380
EUR
Partial Recovery Supplier
1
5
500.00
-
Product for partial recovery test`,
recoverableData: ['PARTIAL-001', '2024-01-15', 'EUR', 'Partial Recovery Supplier']
},
{
name: 'Missing end sections',
xml: `
PARTIAL-002
2024-01-15
380
USD
This invoice is missing its closing sections
Incomplete Invoice Supplier
Recovery Street 123
Test City`,
recoverableData: ['PARTIAL-002', '2024-01-15', 'USD', 'Incomplete Invoice Supplier', 'Recovery Street 123']
},
{
name: 'Corrupted middle section',
xml: `
PARTIAL-003
2024-01-15
380
GBP
<<>>
@#$%^&*()_+{}|:"<>?
BINARY_GARBAGE: ${String.fromCharCode(0x00, 0x01, 0x02, 0x03)}
Valid Customer After Corruption
1500.00
`,
recoverableData: ['PARTIAL-003', '2024-01-15', 'GBP', 'Valid Customer After Corruption', '1500.00']
}
];
for (const testCase of partialDocumentTests) {
tools.log(`Testing ${testCase.name}...`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (parseResult) {
tools.log(` ⚠ Partial document parsed - unexpected success`);
// Check what data was recovered
try {
const xmlOutput = await invoice.toXmlString();
tools.log(` Checking recovered data...`);
let recoveredCount = 0;
for (const expectedData of testCase.recoverableData) {
if (xmlOutput.includes(expectedData)) {
recoveredCount++;
tools.log(` ✓ Recovered: ${expectedData}`);
} else {
tools.log(` ✗ Lost: ${expectedData}`);
}
}
const recoveryRate = (recoveredCount / testCase.recoverableData.length) * 100;
tools.log(` Recovery rate: ${recoveryRate.toFixed(1)}% (${recoveredCount}/${testCase.recoverableData.length})`);
} catch (outputError) {
tools.log(` ⚠ Could not generate output from partial document: ${outputError.message}`);
}
} else {
tools.log(` ✓ Partial document parsing failed as expected`);
}
} catch (error) {
tools.log(` ✓ Parsing error caught: ${error.message}`);
// Test if we can implement a recovery strategy
tools.log(` Attempting recovery strategy...`);
try {
// Strategy 1: Try to fix unclosed tags
let recoveredXml = testCase.xml;
// Count opening and closing tags
const openTags = (recoveredXml.match(/<[^/][^>]*>/g) || [])
.filter(tag => !tag.includes('?') && !tag.includes('!'))
.map(tag => tag.match(/<(\w+)/)?.[1])
.filter(Boolean);
const closeTags = (recoveredXml.match(/<\/[^>]+>/g) || [])
.map(tag => tag.match(/<\/(\w+)>/)?.[1])
.filter(Boolean);
// Find unclosed tags
const tagStack = [];
for (const tag of openTags) {
const closeIndex = closeTags.indexOf(tag);
if (closeIndex === -1) {
tagStack.push(tag);
} else {
closeTags.splice(closeIndex, 1);
}
}
// Add missing closing tags
if (tagStack.length > 0) {
tools.log(` Found ${tagStack.length} unclosed tags`);
while (tagStack.length > 0) {
const tag = tagStack.pop();
recoveredXml += `${tag}>`;
}
// Try parsing recovered XML
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(recoveredXml);
if (recoveryResult) {
tools.log(` ✓ Recovery successful after closing tags`);
// Check recovered data
const recoveredOutput = await recoveryInvoice.toXmlString();
let postRecoveryCount = 0;
for (const expectedData of testCase.recoverableData) {
if (recoveredOutput.includes(expectedData)) {
postRecoveryCount++;
}
}
tools.log(` Post-recovery data: ${postRecoveryCount}/${testCase.recoverableData.length} items`);
} else {
tools.log(` ⚠ Recovery strategy failed`);
}
}
} catch (recoveryError) {
tools.log(` Recovery attempt failed: ${recoveryError.message}`);
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-partial-recovery', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Namespace Issues', async (tools) => {
const startTime = Date.now();
// Test namespace-related parsing errors and recovery
const namespaceTests = [
{
name: 'Missing namespace declaration',
xml: `
NAMESPACE-001
2024-01-15
380
`,
expectedError: false, // May parse but validation should fail
issue: 'No namespace declared'
},
{
name: 'Wrong namespace URI',
xml: `
NAMESPACE-002
2024-01-15
`,
expectedError: false,
issue: 'Incorrect namespace'
},
{
name: 'Conflicting namespace prefixes',
xml: `
NAMESPACE-003
`,
expectedError: true,
issue: 'Duplicate prefix definition'
},
{
name: 'Undefined namespace prefix',
xml: `
NAMESPACE-004
Content
`,
expectedError: true,
issue: 'Undefined prefix used'
}
];
for (const testCase of namespaceTests) {
tools.log(`Testing ${testCase.name}: ${testCase.issue}`);
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromXmlString(testCase.xml);
if (testCase.expectedError) {
if (parseResult) {
tools.log(` ⚠ Expected namespace error but parsing succeeded`);
// Check if namespace issues are detected during validation
try {
const validationResult = await invoice.validate();
if (!validationResult.valid) {
tools.log(` ✓ Namespace issues detected during validation`);
if (validationResult.errors) {
for (const error of validationResult.errors) {
if (error.message.toLowerCase().includes('namespace')) {
tools.log(` Namespace error: ${error.message}`);
}
}
}
}
} catch (validationError) {
tools.log(` Validation failed: ${validationError.message}`);
}
} else {
tools.log(` ✓ Expected namespace error - no parsing result`);
}
} else {
if (parseResult) {
tools.log(` ✓ Parsing succeeded as expected`);
// Test if we can detect namespace issues
const xmlOutput = await invoice.toXmlString();
const hasProperNamespace = xmlOutput.includes('urn:oasis:names:specification:ubl:schema:xsd:Invoice-2') ||
xmlOutput.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice');
if (!hasProperNamespace) {
tools.log(` ⚠ Output missing proper namespace declaration`);
} else {
tools.log(` ✓ Proper namespace maintained in output`);
}
} else {
tools.log(` ✗ Unexpected parsing failure`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(` ✓ Expected namespace error caught: ${error.message}`);
// Check error quality
const errorLower = error.message.toLowerCase();
if (errorLower.includes('namespace') ||
errorLower.includes('prefix') ||
errorLower.includes('xmlns')) {
tools.log(` ✓ Error message indicates namespace issue`);
}
} else {
tools.log(` ✗ Unexpected error: ${error.message}`);
throw error;
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-namespace-issues', duration);
});
tap.test('ERR-01: Parsing Error Recovery - Corpus Error Recovery', { timeout: testTimeout }, async (tools) => {
const startTime = Date.now();
let processedFiles = 0;
let parseErrors = 0;
let recoveryAttempts = 0;
let successfulRecoveries = 0;
try {
// Test with potentially problematic files from corpus
const categories = ['UBL_XML_RECHNUNG', 'CII_XML_RECHNUNG'];
for (const category of categories) {
try {
const files = await CorpusLoader.getFiles(category);
const filesToProcess = files.slice(0, 5); // Process first 5 files per category
for (const filePath of filesToProcess) {
processedFiles++;
const fileName = plugins.path.basename(filePath);
// First, try normal parsing
try {
const invoice = new EInvoice();
const parseResult = await invoice.fromFile(filePath);
if (!parseResult) {
parseErrors++;
tools.log(`⚠ ${fileName}: Parse returned no result`);
// Attempt recovery
recoveryAttempts++;
// Read file content for recovery attempt
const fileContent = await plugins.fs.readFile(filePath, 'utf-8');
// Try different recovery strategies
const recoveryStrategies = [
{
name: 'Remove BOM',
transform: (content: string) => content.replace(/^\uFEFF/, '')
},
{
name: 'Fix encoding',
transform: (content: string) => content.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F]/g, '')
},
{
name: 'Normalize whitespace',
transform: (content: string) => content.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
}
];
for (const strategy of recoveryStrategies) {
try {
const transformedContent = strategy.transform(fileContent);
const recoveryInvoice = new EInvoice();
const recoveryResult = await recoveryInvoice.fromXmlString(transformedContent);
if (recoveryResult) {
successfulRecoveries++;
tools.log(` ✓ Recovery successful with strategy: ${strategy.name}`);
break;
}
} catch (strategyError) {
// Strategy failed, try next
}
}
}
} catch (error) {
parseErrors++;
tools.log(`✗ ${fileName}: Parse error - ${error.message}`);
// Log error characteristics
const errorLower = error.message.toLowerCase();
const errorType = errorLower.includes('encoding') ? 'encoding' :
errorLower.includes('tag') ? 'structure' :
errorLower.includes('namespace') ? 'namespace' :
errorLower.includes('attribute') ? 'attribute' :
'unknown';
tools.log(` Error type: ${errorType}`);
// Attempt recovery for known error types
if (errorType !== 'unknown') {
recoveryAttempts++;
// Recovery logic would go here
}
}
}
} catch (categoryError) {
tools.log(`Failed to process category ${category}: ${categoryError.message}`);
}
}
// Summary statistics
const errorRate = processedFiles > 0 ? (parseErrors / processedFiles) * 100 : 0;
const recoveryRate = recoveryAttempts > 0 ? (successfulRecoveries / recoveryAttempts) * 100 : 0;
tools.log(`\nParsing Error Recovery Summary:`);
tools.log(`- Files processed: ${processedFiles}`);
tools.log(`- Parse errors: ${parseErrors} (${errorRate.toFixed(1)}%)`);
tools.log(`- Recovery attempts: ${recoveryAttempts}`);
tools.log(`- Successful recoveries: ${successfulRecoveries} (${recoveryRate.toFixed(1)}%)`);
// Most corpus files should parse without errors
expect(errorRate).toBeLessThan(20); // Less than 20% error rate expected
} catch (error) {
tools.log(`Corpus error recovery test failed: ${error.message}`);
throw error;
}
const totalDuration = Date.now() - startTime;
PerformanceTracker.recordMetric('error-handling-corpus-recovery', totalDuration);
tools.log(`Corpus error recovery completed in ${totalDuration}ms`);
});
tap.test('ERR-01: Performance Summary', async (tools) => {
const operations = [
'error-handling-malformed-xml',
'error-handling-encoding-issues',
'error-handling-partial-recovery',
'error-handling-namespace-issues',
'error-handling-corpus-recovery'
];
tools.log(`\n=== Parsing Error Recovery Performance Summary ===`);
for (const operation of operations) {
const summary = await PerformanceTracker.getSummary(operation);
if (summary) {
tools.log(`${operation}:`);
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
}
}
tools.log(`\nParsing error recovery testing completed.`);
tools.log(`Note: Some parsing errors are expected when testing error recovery mechanisms.`);
});