import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async () => { // ENC-01: Verify correct handling of UTF-8 encoded XML documents // This test ensures that the library can properly read, process, and write UTF-8 encoded invoices // Test 1: Basic UTF-8 encoding support console.log('\nTest 1: Basic UTF-8 encoding support'); const { result: utf8Result, metric: utf8Metric } = await PerformanceTracker.track( 'basic-utf8', async () => { // Create invoice with UTF-8 characters in various fields const einvoice = new EInvoice(); einvoice.id = 'UTF8-TEST-€£¥-001'; einvoice.issueDate = new Date(2025, 0, 25); einvoice.invoiceId = 'UTF8-TEST-€£¥-001'; einvoice.accountingDocId = 'UTF8-TEST-€£¥-001'; einvoice.subject = 'UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧'; einvoice.notes = ['Special chars test: Zürich, Köln, München, København']; // Set supplier with UTF-8 characters einvoice.from = { type: 'company', name: 'Büßer & Müller GmbH', description: 'German company with umlauts äöüß', address: { streetName: 'Hauptstraße', houseNumber: '42', postalCode: '80331', city: 'München', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789', registrationId: 'HRB 12345', registrationName: 'Handelsregister München' } }; // Set customer with UTF-8 characters einvoice.to = { type: 'company', name: 'José García S.L.', description: 'Spanish company with ñ', address: { streetName: 'Calle Alcalá', houseNumber: '123', postalCode: '28009', city: 'Madrid', country: 'ES' }, status: 'active', foundedDate: { year: 2019, month: 1, day: 1 }, registrationDetails: { vatId: 'ES987654321', registrationId: 'B-87654321', registrationName: 'Registro Mercantil de Madrid' } }; // Add items with UTF-8 characters einvoice.items = [ { position: 1, name: 'Spëcïål Îtëm with diacritics', description: 'Contains: €£¥ symbols', articleNumber: 'ART-UTF8-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }, { position: 2, name: '中文商品 (Chinese Product)', description: 'Multi-script: العربية русский 日本語 한국어', articleNumber: 'ART-UTF8-002', unitType: 'EA', unitQuantity: 2, unitNetPrice: 50, vatPercentage: 19 }, { position: 3, name: 'Emoji test 🌍📧💰', description: 'Modern Unicode: 😀🎉🚀', articleNumber: 'ART-UTF8-003', unitType: 'EA', unitQuantity: 1, unitNetPrice: 25, vatPercentage: 19 } ]; // Export to XML const xmlString = await einvoice.toXmlString('ubl'); // Debug: Check what's actually in the XML console.log(' XML contains encoding declaration:', xmlString.includes('encoding="UTF-8"')); console.log(' Invoice ID preserved:', xmlString.includes('UTF8-TEST-€£¥-001')); // Check if characters are preserved const charactersToCheck = [ 'Büßer & Müller GmbH', 'José García S.L.', 'München', 'Spëcïål Îtëm', '中文商品', 'العربية', 'русский', '日本語', '한국어', '🌍📧💰' ]; let preservedCount = 0; for (const chars of charactersToCheck) { if (xmlString.includes(chars)) { preservedCount++; } else { console.log(` Characters "${chars}" not found in XML`); // Check if they're XML-encoded const encoded = chars.split('').map(c => { const code = c.charCodeAt(0); return code > 127 ? `&#${code};` : c; }).join(''); if (xmlString.includes(encoded)) { console.log(` Found as XML entities: ${encoded}`); preservedCount++; } } } console.log(` Characters preserved: ${preservedCount}/${charactersToCheck.length}`); // Verify encoding declaration expect(xmlString).toContain('encoding="UTF-8"'); // Round-trip test const newInvoice = new EInvoice(); await newInvoice.fromXmlString(xmlString); // Check if key fields are preserved const roundTripSuccess = newInvoice.invoiceId === einvoice.invoiceId && newInvoice.from.name === einvoice.from.name && newInvoice.to.name === einvoice.to.name && newInvoice.items.length === einvoice.items.length; console.log(` Round-trip test: ${roundTripSuccess ? 'success' : 'failed'}`); return { success: true, charactersPreserved: preservedCount > 0, roundTripSuccess }; } ); console.log(` UTF-8 encoding test completed in ${utf8Metric.duration}ms`); expect(utf8Result.success).toBeTrue(); expect(utf8Result.charactersPreserved).toBeTrue(); expect(utf8Result.roundTripSuccess).toBeTrue(); // Test 2: UTF-8 BOM handling console.log('\nTest 2: UTF-8 BOM handling'); const { result: bomResult, metric: bomMetric } = await PerformanceTracker.track( 'utf8-bom', async () => { // Create invoice with UTF-8 characters const einvoice = new EInvoice(); einvoice.id = 'UTF8-BOM-TEST'; einvoice.issueDate = new Date(2025, 0, 25); einvoice.invoiceId = 'UTF8-BOM-TEST'; einvoice.accountingDocId = 'UTF8-BOM-TEST'; einvoice.subject = 'UTF-8 with BOM: Spëcïål Chäracters'; einvoice.from = { type: 'company', name: 'BOM Test Company', description: 'Testing UTF-8 BOM handling', address: { streetName: 'Test Street', houseNumber: '1', postalCode: '12345', city: 'Test City', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789', registrationId: 'HRB 12345', registrationName: 'Commercial Register' } }; einvoice.to = { type: 'person', name: 'Test', surname: 'Customer', salutation: 'Mr' as const, sex: 'male' as const, title: 'Doctor' as const, description: 'Test customer', address: { streetName: 'Customer Street', houseNumber: '2', postalCode: '54321', city: 'Customer City', country: 'DE' } }; einvoice.items = [{ position: 1, name: 'Item with spëcïål characters', articleNumber: 'BOM-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }]; // Export to XML const xmlString = await einvoice.toXmlString('ubl'); // Test with UTF-8 BOM (Byte Order Mark) const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]); const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlString, 'utf8')]); let bomHandled = false; let errorMessage = ''; try { // Try to parse XML with BOM const newInvoice = new EInvoice(); await newInvoice.fromXmlString(contentWithBOM.toString('utf8')); // Verify BOM is handled correctly expect(newInvoice.invoiceId).toEqual('UTF8-BOM-TEST'); const exportedXml = await newInvoice.toXmlString('ubl'); expect(exportedXml).toContain('UTF8-BOM-TEST'); expect(exportedXml).toContain('spëcïål characters'); // BOM should not appear in the output expect(exportedXml.charCodeAt(0)).not.toEqual(0xFEFF); bomHandled = true; } catch (error) { // Some implementations might not support BOM errorMessage = error.message; console.log(' UTF-8 BOM handling not supported:', errorMessage); } return { bomHandled, errorMessage }; } ); console.log(` UTF-8 BOM test completed in ${bomMetric.duration}ms`); expect(bomResult.bomHandled || bomResult.errorMessage.includes('BOM')).toBeTrue(); // Test 3: UTF-8 without explicit declaration console.log('\nTest 3: UTF-8 without explicit declaration'); const { result: implicitResult, metric: implicitMetric } = await PerformanceTracker.track( 'implicit-utf8', async () => { // Create invoice and export to XML const einvoice = new EInvoice(); einvoice.issueDate = new Date(2025, 0, 1); einvoice.invoiceId = 'UTF8-IMPLICIT'; einvoice.subject = 'No encoding declaration: Köln München København'; einvoice.from = { type: 'company', name: 'Implicit UTF-8 Test GmbH', description: 'Testing implicit UTF-8', address: { streetName: 'Königstraße', houseNumber: '1', postalCode: '50667', city: 'Köln', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789', registrationId: 'HRB 12345', registrationName: 'Handelsregister Köln' } }; einvoice.to = { type: 'company', name: 'København Company A/S', description: 'Danish company', address: { streetName: 'Østergade', houseNumber: '42', postalCode: '1100', city: 'København', country: 'DK' }, status: 'active', foundedDate: { year: 2019, month: 1, day: 1 }, registrationDetails: { vatId: 'DK12345678', registrationId: 'CVR 12345678', registrationName: 'Erhvervsstyrelsen' } }; einvoice.items = [{ position: 1, name: 'München-København Express Service', description: 'Cities: Köln, München, København', articleNumber: 'IMP-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }]; // Export to XML and check encoding const xmlString = await einvoice.toXmlString('ubl'); expect(xmlString).toContain('encoding="UTF-8"'); // Check if special characters are preserved const citiesPreserved = xmlString.includes('Köln') && xmlString.includes('München') && xmlString.includes('København'); console.log(` Cities preserved in XML: ${citiesPreserved}`); // Round-trip test const newInvoice = new EInvoice(); await newInvoice.fromXmlString(xmlString); const roundTripSuccess = newInvoice.from.address.city === 'Köln' && newInvoice.to.address.city === 'København'; console.log(` Round-trip preservation: ${roundTripSuccess}`); return { success: true, charactersPreserved: citiesPreserved }; } ); console.log(` UTF-8 without declaration test completed in ${implicitMetric.duration}ms`); expect(implicitResult.success).toBeTrue(); expect(implicitResult.charactersPreserved).toBeTrue(); // Test 4: Multi-byte UTF-8 sequences console.log('\nTest 4: Multi-byte UTF-8 sequences'); const { result: multiByteResult, metric: multiByteMetric } = await PerformanceTracker.track( 'multi-byte', async () => { // Test different UTF-8 byte sequences const multiByteTests = [ { name: '2-byte', text: 'äöüß ñç', desc: 'Latin extended' }, { name: '3-byte', text: '中文 日本語 한국어', desc: 'CJK characters' }, { name: '4-byte', text: '😀🎉🚀 𝐇𝐞𝐥𝐥𝐨', desc: 'Emoji and math symbols' }, { name: 'mixed', text: 'Hello мир 世界 🌍', desc: 'Mixed scripts' } ]; let allSuccessful = true; for (const test of multiByteTests) { const einvoice = new EInvoice(); einvoice.issueDate = new Date(2025, 0, 1); einvoice.invoiceId = `MB-${test.name}`; einvoice.subject = test.text; einvoice.from = { type: 'company', name: test.text, description: test.desc, address: { streetName: 'Test Street', houseNumber: '1', postalCode: '12345', city: 'Test City', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789', registrationId: 'HRB 12345', registrationName: 'Commercial Register' } }; einvoice.to = { type: 'person', name: 'Test', surname: 'Customer', salutation: 'Mr' as const, sex: 'male' as const, title: 'Doctor' as const, description: 'Test customer', address: { streetName: 'Customer Street', houseNumber: '2', postalCode: '54321', city: 'Customer City', country: 'DE' } }; einvoice.items = [{ position: 1, name: test.text, description: test.desc, articleNumber: 'MB-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }]; const xmlString = await einvoice.toXmlString('ubl'); const byteLength = Buffer.from(test.text, 'utf8').length; const charLength = test.text.length; const graphemeLength = [...new Intl.Segmenter().segment(test.text)].length; console.log(` ${test.name}: chars=${charLength}, bytes=${byteLength}, graphemes=${graphemeLength}`); // Check preservation const preserved = xmlString.includes(test.text); console.log(` Preserved in XML: ${preserved}`); if (!preserved) { allSuccessful = false; } } return { success: allSuccessful }; } ); console.log(` Multi-byte UTF-8 test completed in ${multiByteMetric.duration}ms`); expect(multiByteResult.success).toBeTrue(); // Test 5: UTF-8 encoding in attributes console.log('\nTest 5: UTF-8 encoding in attributes'); const { result: attrResult, metric: attrMetric } = await PerformanceTracker.track( 'utf8-attributes', async () => { const einvoice = new EInvoice(); einvoice.id = 'INV-2024-ñ-001'; einvoice.issueDate = new Date(2025, 0, 1); einvoice.invoiceId = 'INV-2024-ñ-001'; einvoice.accountingDocId = 'INV-2024-ñ-001'; einvoice.subject = 'UTF-8 in attributes test'; einvoice.currency = 'EUR'; // Currency symbol: € einvoice.from = { type: 'company', name: 'Attribute Test GmbH', description: 'Testing UTF-8 in XML attributes', address: { streetName: 'Test Street', houseNumber: '1ñ', // Special char in house number postalCode: '12345', city: 'Test City', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789ñ', registrationId: 'HRB 12345', registrationName: 'Commercial Register' } }; einvoice.to = { type: 'person', name: 'José', surname: 'García', salutation: 'Mr' as const, sex: 'male' as const, title: 'Doctor' as const, description: 'Customer with special chars', address: { streetName: 'Customer Street', houseNumber: '2', postalCode: '54321', city: 'Customer City', country: 'ES' } }; einvoice.items = [{ position: 1, name: 'Product with € symbol', articleNumber: 'ART-€-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }]; const xmlString = await einvoice.toXmlString('ubl'); // Check if special chars in attributes are preserved const invoiceIdPreserved = xmlString.includes('INV-2024-ñ-001'); console.log(` Invoice ID with ñ preserved: ${invoiceIdPreserved}`); // Round-trip test const newInvoice = new EInvoice(); await newInvoice.fromXmlString(xmlString); const roundTripSuccess = newInvoice.invoiceId === 'INV-2024-ñ-001'; console.log(` Round-trip preservation: ${roundTripSuccess}`); return { success: invoiceIdPreserved && roundTripSuccess }; } ); console.log(` UTF-8 attributes test completed in ${attrMetric.duration}ms`); expect(attrResult.success).toBeTrue(); // Test 6: UTF-8 corpus validation console.log('\nTest 6: UTF-8 corpus validation'); const { result: corpusResult, metric: corpusMetric } = await PerformanceTracker.track( 'corpus-utf8', async () => { let processedCount = 0; let utf8Count = 0; // Load XML files from various categories const ciiFiles = await CorpusLoader.loadCategory('CII_XMLRECHNUNG'); const ublFiles = await CorpusLoader.loadCategory('UBL_XMLRECHNUNG'); const allFiles = [...ciiFiles, ...ublFiles]; // Test a sample of XML files for UTF-8 handling const sampleSize = Math.min(50, allFiles.length); const sample = allFiles.slice(0, sampleSize); for (const file of sample) { try { const buffer = await CorpusLoader.loadFile(file.path); const content = buffer.toString('utf8'); const einvoice = new EInvoice(); await einvoice.fromXmlString(content); const xmlString = await einvoice.toXmlString('ubl'); // Check if encoding is preserved or defaulted to UTF-8 if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) { utf8Count++; } processedCount++; } catch (error) { // Some files might not be valid invoices console.log(` Skipped file ${file.path}: ${error.message}`); } } console.log(` Processed ${processedCount} files, ${utf8Count} had UTF-8 encoding`); return { processedCount, utf8Count, success: utf8Count > 0 }; } ); console.log(` Corpus validation completed in ${corpusMetric.duration}ms`); console.log(` UTF-8 files: ${corpusResult.utf8Count}/${corpusResult.processedCount}`); // Test 7: UTF-8 normalization console.log('\nTest 7: UTF-8 normalization'); const { result: normResult, metric: normMetric } = await PerformanceTracker.track( 'utf8-normalization', async () => { // Test different Unicode normalization forms const normTests = [ { form: 'NFC', text: 'café', desc: 'Composed form' }, { form: 'NFD', text: 'café'.normalize('NFD'), desc: 'Decomposed form' }, { form: 'mixed', text: 'Ω≈ç√∫', desc: 'Math symbols' } ]; let allNormalized = true; for (const test of normTests) { const einvoice = new EInvoice(); einvoice.issueDate = new Date(2025, 0, 1); einvoice.invoiceId = `NORM-${test.form}`; einvoice.subject = test.text; einvoice.from = { type: 'company', name: 'Normalization Test', description: test.desc, address: { streetName: 'Test Street', houseNumber: '1', postalCode: '12345', city: 'Test City', country: 'DE' }, status: 'active', foundedDate: { year: 2020, month: 1, day: 1 }, registrationDetails: { vatId: 'DE123456789', registrationId: 'HRB 12345', registrationName: 'Commercial Register' } }; einvoice.to = { type: 'person', name: 'Test', surname: 'Customer', salutation: 'Mr' as const, sex: 'male' as const, title: 'Doctor' as const, description: 'Test customer', address: { streetName: 'Customer Street', houseNumber: '2', postalCode: '54321', city: 'Customer City', country: 'DE' } }; einvoice.items = [{ position: 1, name: test.text, articleNumber: 'NORM-001', unitType: 'EA', unitQuantity: 1, unitNetPrice: 100, vatPercentage: 19 }]; const xmlString = await einvoice.toXmlString('ubl'); // Check if text is preserved (may be normalized) const preserved = xmlString.includes(test.text) || xmlString.includes(test.text.normalize('NFC')); console.log(` ${test.form} (${test.desc}): ${preserved ? 'preserved' : 'modified'}`); if (!preserved) { allNormalized = false; } } return { success: allNormalized }; } ); console.log(` Normalization test completed in ${normMetric.duration}ms`); expect(normResult.success).toBeTrue(); // Generate performance summary const allMetrics = [ { name: 'Basic UTF-8', duration: utf8Metric.duration }, { name: 'BOM handling', duration: bomMetric.duration }, { name: 'Implicit UTF-8', duration: implicitMetric.duration }, { name: 'Multi-byte', duration: multiByteMetric.duration }, { name: 'Attributes', duration: attrMetric.duration }, { name: 'Corpus validation', duration: corpusMetric.duration }, { name: 'Normalization', duration: normMetric.duration } ]; const totalDuration = allMetrics.reduce((sum, m) => sum + m.duration, 0); const avgDuration = totalDuration / allMetrics.length; console.log('\n=== UTF-8 Encoding Test Summary ==='); console.log(`Total tests: ${allMetrics.length}`); console.log(`Total duration: ${totalDuration.toFixed(2)}ms`); console.log(`Average duration: ${avgDuration.toFixed(2)}ms`); console.log(`Slowest test: ${allMetrics.reduce((max, m) => m.duration > max.duration ? m : max).name}`); console.log(`Fastest test: ${allMetrics.reduce((min, m) => m.duration < min.duration ? m : min).name}`); }); // Run the test tap.start();