import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-02: UTF-16 Encoding - should handle UTF-16 encoded documents correctly', async (t) => { // ENC-02: Verify correct handling of UTF-16 encoded XML documents (both BE and LE) // This test ensures proper support for UTF-16 encoding variants const performanceTracker = new PerformanceTracker('ENC-02: UTF-16 Encoding'); const corpusLoader = new CorpusLoader(); t.test('UTF-16 BE (Big Endian) encoding', async () => { const startTime = performance.now(); // Create UTF-16 BE content const xmlContent = ` 2.1 UTF16BE-TEST 2025-01-25 UTF-16 BE Test: €100 für Bücher EUR Großhändler GmbH 100.00 `; // Convert to UTF-16 BE with BOM const utf16BeBom = Buffer.from([0xFE, 0xFF]); // UTF-16 BE BOM const utf16BeContent = Buffer.from(xmlContent, 'utf16le').swap16(); // Convert to BE const contentWithBom = Buffer.concat([utf16BeBom, utf16BeContent]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(contentWithBom); const parsedData = einvoice.getInvoiceData(); expect(parsedData).toBeTruthy(); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('UTF16BE-TEST'); expect(xmlString).toContain('€100 für Bücher'); expect(xmlString).toContain('Großhändler GmbH'); } catch (error) { console.log('UTF-16 BE not fully supported:', error.message); // Try alternative approach const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, ''); await einvoice.loadFromString(decoded); expect(einvoice.getXmlString()).toContain('UTF16BE-TEST'); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-be', elapsed); }); t.test('UTF-16 LE (Little Endian) encoding', async () => { const startTime = performance.now(); // Create UTF-16 LE content const xmlContent = ` 2.1 UTF16LE-TEST 2025-01-25 UTF-16 LE: Special chars → ← ↑ ↓ ♠ ♣ ♥ ♦ François & Søren Ltd. `; // Convert to UTF-16 LE with BOM const utf16LeBom = Buffer.from([0xFF, 0xFE]); // UTF-16 LE BOM const utf16LeContent = Buffer.from(xmlContent, 'utf16le'); const contentWithBom = Buffer.concat([utf16LeBom, utf16LeContent]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(contentWithBom); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('UTF16LE-TEST'); expect(xmlString).toContain('→ ← ↑ ↓'); expect(xmlString).toContain('♠ ♣ ♥ ♦'); expect(xmlString).toContain('François & Søren Ltd.'); } catch (error) { console.log('UTF-16 LE not fully supported:', error.message); // Try fallback const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, ''); await einvoice.loadFromString(decoded); expect(einvoice.getXmlString()).toContain('UTF16LE-TEST'); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-le', elapsed); }); t.test('UTF-16 without BOM', async () => { const startTime = performance.now(); // UTF-16 without BOM (should detect from encoding declaration) const xmlContent = ` 2.1 UTF16-NO-BOM Ψ Ω α β γ δ ε ζ η θ `; // Create UTF-16 without BOM (system default endianness) const utf16Content = Buffer.from(xmlContent, 'utf16le'); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(utf16Content); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('UTF16-NO-BOM'); expect(xmlString).toContain('Ψ Ω α β γ δ ε ζ η θ'); } catch (error) { console.log('UTF-16 without BOM requires explicit handling:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-no-bom', elapsed); }); t.test('UTF-16 surrogate pairs', async () => { const startTime = performance.now(); // Test UTF-16 surrogate pairs (for characters outside BMP) const xmlContent = ` 2.1 UTF16-SURROGATE Emojis: 😀😃😄😁 Math: 𝕳𝖊𝖑𝖑𝖔 CJK Ext: 𠀀𠀁 Ancient scripts: 𐌀𐌁𐌂 𓀀𓀁𓀂 `; const utf16Bom = Buffer.from([0xFF, 0xFE]); // UTF-16 LE BOM const utf16Content = Buffer.from(xmlContent, 'utf16le'); const contentWithBom = Buffer.concat([utf16Bom, utf16Content]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(contentWithBom); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('😀😃😄😁'); expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔'); expect(xmlString).toContain('𠀀𠀁'); expect(xmlString).toContain('𐌀𐌁𐌂'); expect(xmlString).toContain('𓀀𓀁𓀂'); } catch (error) { console.log('Surrogate pair handling:', error.message); // Try string approach const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, ''); await einvoice.loadFromString(decoded); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-surrogates', elapsed); }); t.test('UTF-16 to UTF-8 conversion', async () => { const startTime = performance.now(); // Test that UTF-16 input can be converted to UTF-8 output const xmlContent = ` 2.1 UTF16-TO-UTF8 Müller, François, 北京, Москва `; const utf16Bom = Buffer.from([0xFF, 0xFE]); const utf16Content = Buffer.from(xmlContent, 'utf16le'); const contentWithBom = Buffer.concat([utf16Bom, utf16Content]); const einvoice = new EInvoice(); try { // Load UTF-16 content await einvoice.loadFromBuffer(contentWithBom); // Get as UTF-8 string const xmlString = einvoice.getXmlString(); // Should be valid UTF-8 now expect(xmlString).toContain('Müller'); expect(xmlString).toContain('François'); expect(xmlString).toContain('北京'); expect(xmlString).toContain('Москва'); // Verify it's valid UTF-8 const utf8Buffer = Buffer.from(xmlString, 'utf8'); expect(utf8Buffer.toString('utf8')).toBe(xmlString); } catch (error) { console.log('UTF-16 to UTF-8 conversion not supported:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-to-utf8', elapsed); }); t.test('Mixed content with UTF-16', async () => { const startTime = performance.now(); const xmlContent = ` 2.1 UTF16-MIXED Payment terms: 30 days net • Early payment: 2% discount • Late payment: 1.5% interest → Bank: Sparkasse München ← Account: DE89 3704 0044 0532 0130 00 Bücher (10× @ €15) `; const utf16Bom = Buffer.from([0xFF, 0xFE]); const utf16Content = Buffer.from(xmlContent, 'utf16le'); const contentWithBom = Buffer.concat([utf16Bom, utf16Content]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(contentWithBom); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('•'); expect(xmlString).toContain('→'); expect(xmlString).toContain('←'); expect(xmlString).toContain('×'); expect(xmlString).toContain('€'); expect(xmlString).toContain('Sparkasse München'); } catch (error) { console.log('UTF-16 mixed content:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf16-mixed', elapsed); }); t.test('Corpus UTF-16 detection', async () => { const startTime = performance.now(); let utf16Count = 0; let checkedCount = 0; const files = await corpusLoader.getAllFiles(); const xmlFiles = files.filter(f => f.endsWith('.xml')); // Check a sample for UTF-16 encoded files const sampleSize = Math.min(30, xmlFiles.length); const sample = xmlFiles.slice(0, sampleSize); for (const file of sample) { try { const content = await corpusLoader.readFile(file); if (Buffer.isBuffer(content)) { // Check for UTF-16 BOMs if ((content[0] === 0xFE && content[1] === 0xFF) || (content[0] === 0xFF && content[1] === 0xFE)) { utf16Count++; console.log(`Found UTF-16 file: ${file}`); } } checkedCount++; } catch (error) { // Skip files that can't be read } } console.log(`UTF-16 corpus scan: ${utf16Count}/${checkedCount} files use UTF-16`); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-utf16', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(150); // UTF-16 operations may be slightly slower than UTF-8 }); tap.start();