import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-09: Encoding Errors - should handle encoding errors and mismatches gracefully', async (t) => { // ENC-09: Verify proper handling of encoding errors and recovery strategies // This test ensures the system can handle malformed encodings and mismatches const performanceTracker = new PerformanceTracker('ENC-09: Encoding Errors'); const corpusLoader = new CorpusLoader(); t.test('Encoding mismatch detection', async () => { const startTime = performance.now(); // UTF-8 content declared as ISO-8859-1 const utf8Content = ` 2.1 ENCODING-MISMATCH-001 UTF-8 content: € £ ¥ 中文 العربية русский Société Générale (société anonyme) `; const einvoice = new EInvoice(); try { // Try loading with potential encoding mismatch await einvoice.loadFromString(utf8Content); const xmlString = einvoice.getXmlString(); // Should handle the content somehow expect(xmlString).toContain('ENCODING-MISMATCH-001'); // Check if special characters survived if (xmlString.includes('€') && xmlString.includes('中文')) { console.log('Encoding mismatch handled: UTF-8 content preserved'); } else { console.log('Encoding mismatch resulted in character loss'); } } catch (error) { console.log('Encoding mismatch error:', error.message); expect(error.message).toMatch(/encoding|character|parse/i); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('encoding-mismatch', elapsed); }); t.test('Invalid byte sequences', async () => { const startTime = performance.now(); // Create buffer with invalid UTF-8 sequences const invalidUtf8 = Buffer.concat([ Buffer.from('\n\nINVALID-BYTES\n'), Buffer.from([0xFF, 0xFE, 0xFD]), // Invalid UTF-8 bytes Buffer.from('\n') ]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(invalidUtf8); // If it succeeds, check how invalid bytes were handled const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('INVALID-BYTES'); console.log('Invalid bytes were handled/replaced'); } catch (error) { console.log('Invalid byte sequence error:', error.message); expect(error.message).toMatch(/invalid|malformed|byte|sequence/i); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('invalid-bytes', elapsed); }); t.test('Incomplete multi-byte sequences', async () => { const startTime = performance.now(); // Create UTF-8 with incomplete multi-byte sequences const incompleteSequences = [ Buffer.from('\n\n'), Buffer.from('Test '), Buffer.from([0xC3]), // Incomplete 2-byte sequence (missing second byte) Buffer.from(' text '), Buffer.from([0xE2, 0x82]), // Incomplete 3-byte sequence (missing third byte) Buffer.from(' end\n') ]; const incompleteUtf8 = Buffer.concat(incompleteSequences); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(incompleteUtf8); const xmlString = einvoice.getXmlString(); console.log('Incomplete sequences were handled'); expect(xmlString).toContain('Test'); expect(xmlString).toContain('text'); expect(xmlString).toContain('end'); } catch (error) { console.log('Incomplete sequence error:', error.message); expect(error.message).toMatch(/incomplete|invalid|sequence/i); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('incomplete-sequences', elapsed); }); t.test('Wrong encoding declaration', async () => { const startTime = performance.now(); // UTF-16 content with UTF-8 declaration const utf16Content = Buffer.from( '\n\nWRONG-DECL\nUTF-16 content\n', 'utf16le' ); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(utf16Content); // Might detect and handle the mismatch const xmlString = einvoice.getXmlString(); console.log('Wrong encoding declaration handled'); } catch (error) { console.log('Wrong encoding declaration:', error.message); expect(error.message).toMatch(/encoding|parse|invalid/i); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('wrong-declaration', elapsed); }); t.test('Mixed encoding in single document', async () => { const startTime = performance.now(); // Document with mixed encodings (simulated by incorrect concatenation) const mixedEncoding = Buffer.concat([ Buffer.from('\n\n'), Buffer.from('UTF-8 text: München', 'utf8'), Buffer.from('\n'), Buffer.from('Latin-1 text: ', 'utf8'), Buffer.from('Düsseldorf', 'latin1'), // Different encoding Buffer.from('\n', 'utf8') ]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(mixedEncoding); const xmlString = einvoice.getXmlString(); // Check which parts survived expect(xmlString).toContain('München'); // Should be correct // Düsseldorf might be garbled console.log('Mixed encoding document processed'); } catch (error) { console.log('Mixed encoding error:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('mixed-encoding', elapsed); }); t.test('Unsupported encoding declarations', async () => { const startTime = performance.now(); const unsupportedEncodings = [ 'EBCDIC', 'Shift_JIS', 'Big5', 'KOI8-R', 'Windows-1252' ]; for (const encoding of unsupportedEncodings) { const xmlContent = ` UNSUPPORTED-${encoding} Test with ${encoding} encoding `; const einvoice = new EInvoice(); try { await einvoice.loadFromString(xmlContent); // Some parsers might handle it anyway const xmlString = einvoice.getXmlString(); console.log(`${encoding} encoding handled`); expect(xmlString).toContain(`UNSUPPORTED-${encoding}`); } catch (error) { console.log(`${encoding} encoding error:`, error.message); expect(error.message).toMatch(/unsupported|encoding|unknown/i); } } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('unsupported-encodings', elapsed); }); t.test('BOM conflicts', async () => { const startTime = performance.now(); // UTF-8 BOM with UTF-16 declaration const conflictBuffer = Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM Buffer.from('\n\nBOM-CONFLICT\n') ]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(conflictBuffer); const xmlString = einvoice.getXmlString(); console.log('BOM conflict resolved'); expect(xmlString).toContain('BOM-CONFLICT'); } catch (error) { console.log('BOM conflict error:', error.message); } // UTF-16 LE BOM with UTF-8 declaration const conflictBuffer2 = Buffer.concat([ Buffer.from([0xFF, 0xFE]), // UTF-16 LE BOM Buffer.from('\n\nBOM-CONFLICT-2\n', 'utf16le') ]); try { await einvoice.loadFromBuffer(conflictBuffer2); console.log('UTF-16 BOM with UTF-8 declaration handled'); } catch (error) { console.log('UTF-16 BOM conflict:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('bom-conflicts', elapsed); }); t.test('Character normalization issues', async () => { const startTime = performance.now(); // Different Unicode normalization forms const nfcContent = ` NORM-NFC Café (NFC: U+00E9) André `; // Same content but with NFD (decomposed) const nfdContent = ` NORM-NFD Café (NFD: U+0065 U+0301) André `; const einvoice1 = new EInvoice(); const einvoice2 = new EInvoice(); await einvoice1.loadFromString(nfcContent); await einvoice2.loadFromString(nfdContent); const xml1 = einvoice1.getXmlString(); const xml2 = einvoice2.getXmlString(); // Both should work but might normalize differently expect(xml1).toContain('Café'); expect(xml2).toContain('Café'); expect(xml1).toContain('André'); expect(xml2).toContain('André'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('normalization', elapsed); }); t.test('Encoding error recovery strategies', async () => { const startTime = performance.now(); // Test various recovery strategies const problematicContent = Buffer.concat([ Buffer.from('\n\n\n'), Buffer.from(''), Buffer.from(''), Buffer.from([0xC0, 0x80]), // Overlong encoding (security issue) Buffer.from('99.99'), Buffer.from('\n\n') ]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(problematicContent); const xmlString = einvoice.getXmlString(); console.log('Problematic content recovered'); // Check what survived expect(xmlString).toContain('Test'); expect(xmlString).toContain('Product'); expect(xmlString).toContain('99.99'); } catch (error) { console.log('Recovery failed:', error.message); // Try fallback strategies try { // Remove invalid bytes const cleaned = problematicContent.toString('utf8', 0, problematicContent.length) .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, ''); await einvoice.loadFromString(cleaned); console.log('Fallback recovery succeeded'); } catch (fallbackError) { console.log('Fallback also failed:', fallbackError.message); } } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('error-recovery', elapsed); }); t.test('Corpus encoding error analysis', async () => { const startTime = performance.now(); let processedCount = 0; let encodingIssues = 0; const issueTypes: Record = {}; const files = await corpusLoader.getAllFiles(); const xmlFiles = files.filter(f => f.endsWith('.xml')); // Check corpus for encoding issues const sampleSize = Math.min(100, xmlFiles.length); const sample = xmlFiles.slice(0, sampleSize); for (const file of sample) { try { const content = await corpusLoader.readFile(file); const einvoice = new EInvoice(); // Try to detect encoding issues if (Buffer.isBuffer(content)) { // Check for BOM if (content.length >= 3) { if (content[0] === 0xEF && content[1] === 0xBB && content[2] === 0xBF) { issueTypes['UTF-8 BOM'] = (issueTypes['UTF-8 BOM'] || 0) + 1; } else if (content[0] === 0xFF && content[1] === 0xFE) { issueTypes['UTF-16 LE BOM'] = (issueTypes['UTF-16 LE BOM'] || 0) + 1; } else if (content[0] === 0xFE && content[1] === 0xFF) { issueTypes['UTF-16 BE BOM'] = (issueTypes['UTF-16 BE BOM'] || 0) + 1; } } // Try parsing try { await einvoice.loadFromBuffer(content); } catch (parseError) { encodingIssues++; if (parseError.message.match(/encoding/i)) { issueTypes['Encoding error'] = (issueTypes['Encoding error'] || 0) + 1; } } } else { await einvoice.loadFromString(content); } processedCount++; } catch (error) { encodingIssues++; issueTypes['General error'] = (issueTypes['General error'] || 0) + 1; } } console.log(`Encoding error corpus analysis (${processedCount} files):`); console.log(`- Files with encoding issues: ${encodingIssues}`); console.log('Issue types:', issueTypes); expect(processedCount).toBeGreaterThan(0); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-errors', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(200); // Error handling may be slower }); tap.start();