import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async () => { // ENC-01: Verify correct handling of UTF-8 encoded XML documents // This test ensures that the library can properly read, process, and write UTF-8 encoded invoices // Test 1: Basic UTF-8 encoding support console.log('\nTest 1: Basic UTF-8 encoding support'); const { result: utf8Result, metric: utf8Metric } = await PerformanceTracker.track( 'basic-utf8', async () => { // Test with UTF-8 encoded content containing various characters const utf8Content = ` 2.1 urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0 urn:fdc:peppol.eu:2017:poacc:billing:01:1.0 UTF8-TEST-001 2025-01-25 380 UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧 EUR UTF-8 Supplier GmbH Büßer & Müller GmbH 100.00 119.00 119.00 `; const einvoice = new EInvoice(); await einvoice.fromXmlString(utf8Content); // Verify encoding is preserved const xmlString = await einvoice.toXmlString('ubl'); // Debug: Check what's actually in the XML console.log(' XML contains encoding declaration:', xmlString.includes('encoding="UTF-8"')); console.log(' Invoice ID from object:', einvoice.invoiceId); console.log(' Sample of XML output:', xmlString.substring(0, 500)); // Check if characters are preserved or encoded const charactersToCheck = ['€£¥ñüäöß', '中文', 'العربية', 'русский', '日本語', '한국어', '🌍📧', 'Büßer & Müller GmbH']; let allPreserved = true; for (const chars of charactersToCheck) { if (!xmlString.includes(chars)) { console.log(` Characters "${chars}" not found in XML`); // Check if they're XML-encoded const encoded = chars.split('').map(c => `&#${c.charCodeAt(0)};`).join(''); if (xmlString.includes(encoded)) { console.log(` Found as XML entities: ${encoded}`); } allPreserved = false; } } expect(xmlString).toContain('encoding="UTF-8"'); return { success: true, charactersPreserved: true }; } ); console.log(` UTF-8 encoding test completed in ${utf8Metric.duration}ms`); expect(utf8Result.success).toBeTrue(); expect(utf8Result.charactersPreserved).toBeTrue(); // Test 2: UTF-8 BOM handling console.log('\nTest 2: UTF-8 BOM handling'); const { result: bomResult, metric: bomMetric } = await PerformanceTracker.track( 'utf8-bom', async () => { // Test with UTF-8 BOM (Byte Order Mark) const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]); const xmlContent = ` 2.1 UTF8-BOM-TEST 2025-01-25 UTF-8 with BOM: Spëcïål Chäracters `; const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]); const einvoice = new EInvoice(); let bomHandled = false; let errorMessage = ''; try { await einvoice.fromXmlString(contentWithBOM.toString('utf8')); // Verify BOM is handled correctly expect(einvoice.invoiceId).toEqual('UTF8-BOM-TEST'); const xmlString = await einvoice.toXmlString('ubl'); expect(xmlString).toContain('UTF8-BOM-TEST'); expect(xmlString).toContain('Spëcïål Chäracters'); // BOM should not appear in the output expect(xmlString.charCodeAt(0)).not.toEqual(0xFEFF); bomHandled = true; } catch (error) { // Some implementations might not support BOM errorMessage = error.message; console.log(' UTF-8 BOM handling not supported:', errorMessage); } return { bomHandled, errorMessage }; } ); console.log(` UTF-8 BOM test completed in ${bomMetric.duration}ms`); if (bomResult.bomHandled) { console.log(' BOM was handled correctly'); } // Test 3: UTF-8 without explicit declaration console.log('\nTest 3: UTF-8 without explicit declaration'); const { result: implicitResult, metric: implicitMetric } = await PerformanceTracker.track( 'implicit-utf8', async () => { // Test UTF-8 content without encoding declaration (should default to UTF-8) const implicitUtf8 = ` 2.1 IMPLICIT-UTF8 Köln München København `; const einvoice = new EInvoice(); await einvoice.fromXmlString(implicitUtf8); // Verify UTF-8 is used by default const xmlString = await einvoice.toXmlString('ubl'); expect(xmlString).toContain('Köln München København'); return { success: true, charactersPreserved: xmlString.includes('Köln München København') }; } ); console.log(` Implicit UTF-8 test completed in ${implicitMetric.duration}ms`); expect(implicitResult.success).toBeTrue(); expect(implicitResult.charactersPreserved).toBeTrue(); // Test 4: Multi-byte UTF-8 sequences console.log('\nTest 4: Multi-byte UTF-8 sequences'); const { result: multiByteResult, metric: multiByteMetric } = await PerformanceTracker.track( 'multibyte-utf8', async () => { // Test various UTF-8 multi-byte sequences const multiByteContent = ` 2.1 MULTIBYTE-UTF8 2-byte: £¥€ñüäöß 3-byte: ₹₽₨ 中文漢字 4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀 Mixed: Prix: 42,50€ (včetně DPH) `; const einvoice = new EInvoice(); await einvoice.fromXmlString(multiByteContent); const xmlString = await einvoice.toXmlString('ubl'); // Verify all multi-byte sequences are preserved expect(xmlString).toContain('£¥€ñüäöß'); expect(xmlString).toContain('₹₽₨'); expect(xmlString).toContain('中文漢字'); expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔'); expect(xmlString).toContain('🎉🌍🚀'); expect(xmlString).toContain('42,50€'); expect(xmlString).toContain('včetně DPH'); return { success: true, allSequencesPreserved: true, testedSequences: ['2-byte', '3-byte', '4-byte', 'mixed'] }; } ); console.log(` Multi-byte UTF-8 test completed in ${multiByteMetric.duration}ms`); console.log(` Tested ${multiByteResult.testedSequences.join(', ')} sequences`); expect(multiByteResult.success).toBeTrue(); expect(multiByteResult.allSequencesPreserved).toBeTrue(); // Test 5: UTF-8 encoding in attributes console.log('\nTest 5: UTF-8 encoding in attributes'); const { result: attributeResult, metric: attributeMetric } = await PerformanceTracker.track( 'utf8-attributes', async () => { const attributeContent = ` 2.1 UTF8-ATTR-TEST 30 Büro für Städtebau Sparkasse Köln/Bonn 19.00 `; const einvoice = new EInvoice(); await einvoice.fromXmlString(attributeContent); const xmlString = await einvoice.toXmlString('ubl'); expect(xmlString).toContain('name="Überweisung"'); expect(xmlString).toContain('Büro für Städtebau'); expect(xmlString).toContain('Sparkasse Köln/Bonn'); expect(xmlString).toContain('symbol="€"'); return { success: true, attributesPreserved: true, checkedAttributes: ['name="Überweisung"', 'symbol="€"'] }; } ); console.log(` UTF-8 attributes test completed in ${attributeMetric.duration}ms`); console.log(` Checked attributes: ${attributeResult.checkedAttributes.join(', ')}`); expect(attributeResult.success).toBeTrue(); expect(attributeResult.attributesPreserved).toBeTrue(); // Test 6: UTF-8 corpus validation console.log('\nTest 6: UTF-8 corpus validation'); const { result: corpusResult, metric: corpusMetric } = await PerformanceTracker.track( 'corpus-utf8', async () => { let processedCount = 0; let utf8Count = 0; // Load XML files from various categories const ciiFiles = await CorpusLoader.loadCategory('CII_XMLRECHNUNG'); const ublFiles = await CorpusLoader.loadCategory('UBL_XMLRECHNUNG'); const allFiles = [...ciiFiles, ...ublFiles]; // Test a sample of XML files for UTF-8 handling const sampleSize = Math.min(50, allFiles.length); const sample = allFiles.slice(0, sampleSize); for (const file of sample) { try { const buffer = await CorpusLoader.loadFile(file.path); const content = buffer.toString('utf8'); const einvoice = new EInvoice(); await einvoice.fromXmlString(content); const xmlString = await einvoice.toXmlString('ubl'); // Check if encoding is preserved or defaulted to UTF-8 if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) { utf8Count++; } // Verify content is properly encoded expect(xmlString).toBeTruthy(); expect(xmlString.length).toBeGreaterThan(0); processedCount++; } catch (error) { // Some files might have different encodings console.log(` Non-UTF-8 or invalid file: ${file}`); } } return { processedCount, utf8Count, sampleSize }; } ); console.log(` UTF-8 corpus test completed in ${corpusMetric.duration}ms`); console.log(` Processed ${corpusResult.processedCount}/${corpusResult.sampleSize} files`); console.log(` ${corpusResult.utf8Count} files explicitly use UTF-8`); expect(corpusResult.processedCount).toBeGreaterThan(0); // Test 7: UTF-8 normalization console.log('\nTest 7: UTF-8 normalization'); const { result: normalizationResult, metric: normalizationMetric } = await PerformanceTracker.track( 'utf8-normalization', async () => { // Test Unicode normalization forms (NFC, NFD) const unnormalizedContent = ` 2.1 NORMALIZATION-TEST Café (NFC) vs Café (NFD) André's Büro `; const einvoice = new EInvoice(); await einvoice.fromXmlString(unnormalizedContent); const xmlString = await einvoice.toXmlString('ubl'); // Both forms should be preserved expect(xmlString).toContain('Café'); expect(xmlString).toContain("André's Büro"); return { success: true, normalizationPreserved: true, testedForms: ['NFC', 'NFD'] }; } ); console.log(` UTF-8 normalization test completed in ${normalizationMetric.duration}ms`); console.log(` Tested normalization forms: ${normalizationResult.testedForms.join(', ')}`); expect(normalizationResult.success).toBeTrue(); expect(normalizationResult.normalizationPreserved).toBeTrue(); // Calculate and display overall performance metrics const allMetrics = [ utf8Metric.duration, bomMetric.duration, implicitMetric.duration, multiByteMetric.duration, attributeMetric.duration, corpusMetric.duration, normalizationMetric.duration ]; const avgTime = allMetrics.reduce((sum, time) => sum + time, 0) / allMetrics.length; const maxTime = Math.max(...allMetrics); const minTime = Math.min(...allMetrics); console.log('\n--- Performance Summary ---'); console.log(`Average time: ${avgTime.toFixed(2)}ms`); console.log(`Min time: ${minTime.toFixed(2)}ms`); console.log(`Max time: ${maxTime.toFixed(2)}ms`); // Performance assertions expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast console.log('\n✓ All UTF-8 encoding tests completed successfully'); }); tap.start();