import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async (t) => { // ENC-01: Verify correct handling of UTF-8 encoded XML documents // This test ensures that the library can properly read, process, and write UTF-8 encoded invoices const performanceTracker = new PerformanceTracker('ENC-01: UTF-8 Encoding'); const corpusLoader = new CorpusLoader(); t.test('Basic UTF-8 encoding support', async () => { const startTime = performance.now(); // Test with UTF-8 encoded content containing various characters const utf8Content = ` 2.1 urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0 urn:fdc:peppol.eu:2017:poacc:billing:01:1.0 UTF8-TEST-001 2025-01-25 380 UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧 EUR UTF-8 Supplier GmbH Büßer & Müller GmbH 100.00 119.00 119.00 `; const einvoice = new EInvoice(); await einvoice.loadFromString(utf8Content); // Verify encoding is preserved const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('encoding="UTF-8"'); expect(xmlString).toContain('€£¥ñüäöß'); expect(xmlString).toContain('中文'); expect(xmlString).toContain('العربية'); expect(xmlString).toContain('русский'); expect(xmlString).toContain('日本語'); expect(xmlString).toContain('한국어'); expect(xmlString).toContain('🌍📧'); expect(xmlString).toContain('Büßer & Müller GmbH'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('basic-utf8', elapsed); }); t.test('UTF-8 BOM handling', async () => { const startTime = performance.now(); // Test with UTF-8 BOM (Byte Order Mark) const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]); const xmlContent = ` 2.1 UTF8-BOM-TEST 2025-01-25 UTF-8 with BOM: Spëcïål Chäracters `; const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(contentWithBOM); // Verify BOM is handled correctly const parsedData = einvoice.getInvoiceData(); expect(parsedData).toBeTruthy(); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('UTF8-BOM-TEST'); expect(xmlString).toContain('Spëcïål Chäracters'); // BOM should not appear in the output expect(xmlString.charCodeAt(0)).not.toBe(0xFEFF); } catch (error) { // Some implementations might not support BOM console.log('UTF-8 BOM handling not supported:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf8-bom', elapsed); }); t.test('UTF-8 without explicit declaration', async () => { const startTime = performance.now(); // Test UTF-8 content without encoding declaration (should default to UTF-8) const implicitUtf8 = ` 2.1 IMPLICIT-UTF8 Köln München København `; const einvoice = new EInvoice(); await einvoice.loadFromString(implicitUtf8); // Verify UTF-8 is used by default const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('Köln München København'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('implicit-utf8', elapsed); }); t.test('Multi-byte UTF-8 sequences', async () => { const startTime = performance.now(); // Test various UTF-8 multi-byte sequences const multiByteContent = ` 2.1 MULTIBYTE-UTF8 2-byte: £¥€ñüäöß 3-byte: ₹₽₨ 中文漢字 4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀 Mixed: Prix: 42,50€ (včetně DPH) `; const einvoice = new EInvoice(); await einvoice.loadFromString(multiByteContent); const xmlString = einvoice.getXmlString(); // Verify all multi-byte sequences are preserved expect(xmlString).toContain('£¥€ñüäöß'); expect(xmlString).toContain('₹₽₨'); expect(xmlString).toContain('中文漢字'); expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔'); expect(xmlString).toContain('🎉🌍🚀'); expect(xmlString).toContain('42,50€'); expect(xmlString).toContain('včetně DPH'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('multibyte-utf8', elapsed); }); t.test('UTF-8 encoding in attributes', async () => { const startTime = performance.now(); const attributeContent = ` 2.1 UTF8-ATTR-TEST 30 Büro für Städtebau Sparkasse Köln/Bonn 19.00 `; const einvoice = new EInvoice(); await einvoice.loadFromString(attributeContent); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('name="Überweisung"'); expect(xmlString).toContain('Büro für Städtebau'); expect(xmlString).toContain('Sparkasse Köln/Bonn'); expect(xmlString).toContain('symbol="€"'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf8-attributes', elapsed); }); t.test('UTF-8 corpus validation', async () => { const startTime = performance.now(); let processedCount = 0; let utf8Count = 0; const files = await corpusLoader.getAllFiles(); const xmlFiles = files.filter(f => f.endsWith('.xml')); // Test a sample of XML files for UTF-8 handling const sampleSize = Math.min(50, xmlFiles.length); const sample = xmlFiles.slice(0, sampleSize); for (const file of sample) { try { const content = await corpusLoader.readFile(file); const einvoice = new EInvoice(); if (typeof content === 'string') { await einvoice.loadFromString(content); } else { await einvoice.loadFromBuffer(content); } const xmlString = einvoice.getXmlString(); // Check if encoding is preserved or defaulted to UTF-8 if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) { utf8Count++; } // Verify content is properly encoded expect(xmlString).toBeTruthy(); expect(xmlString.length).toBeGreaterThan(0); processedCount++; } catch (error) { // Some files might have different encodings console.log(`Non-UTF-8 or invalid file: ${file}`); } } console.log(`UTF-8 corpus test: ${utf8Count}/${processedCount} files explicitly use UTF-8`); expect(processedCount).toBeGreaterThan(0); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-utf8', elapsed); }); t.test('UTF-8 normalization', async () => { const startTime = performance.now(); // Test Unicode normalization forms (NFC, NFD) const unnormalizedContent = ` 2.1 NORMALIZATION-TEST Café (NFC) vs Café (NFD) André's Büro `; const einvoice = new EInvoice(); await einvoice.loadFromString(unnormalizedContent); const xmlString = einvoice.getXmlString(); // Both forms should be preserved expect(xmlString).toContain('Café'); expect(xmlString).toContain("André's Büro"); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('utf8-normalization', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast }); tap.start();