import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('ENC-03: ISO-8859-1 Encoding - should handle ISO-8859-1 (Latin-1) encoded documents', async (t) => { // ENC-03: Verify correct handling of ISO-8859-1 encoded XML documents // This test ensures support for legacy Western European character encoding const performanceTracker = new PerformanceTracker('ENC-03: ISO-8859-1 Encoding'); const corpusLoader = new CorpusLoader(); t.test('Basic ISO-8859-1 encoding', async () => { const startTime = performance.now(); // Create ISO-8859-1 content with Latin-1 specific characters const xmlContent = ` 2.1 ISO88591-TEST 2025-01-25 ISO-8859-1 Test: àáâãäåæçèéêëìíîïñòóôõöøùúûüý EUR Société Générale Rue de la Paix Paris FR Müller & Söhne GmbH Königsallee Düsseldorf Prix unitaire: 25,50 € (vingt-cinq euros cinquante) `; // Convert to ISO-8859-1 buffer const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(iso88591Buffer); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('ISO88591-TEST'); expect(xmlString).toContain('àáâãäåæçèéêëìíîïñòóôõöøùúûüý'); expect(xmlString).toContain('Société Générale'); expect(xmlString).toContain('Müller & Söhne GmbH'); expect(xmlString).toContain('Königsallee'); expect(xmlString).toContain('Düsseldorf'); expect(xmlString).toContain('25,50 €'); } catch (error) { console.log('ISO-8859-1 handling issue:', error.message); // Try string conversion fallback const decoded = iso88591Buffer.toString('latin1'); await einvoice.loadFromString(decoded); expect(einvoice.getXmlString()).toContain('ISO88591-TEST'); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('basic-iso88591', elapsed); }); t.test('ISO-8859-1 special characters', async () => { const startTime = performance.now(); // Test all printable ISO-8859-1 characters (160-255) const xmlContent = ` 2.1 ISO88591-SPECIAL Special chars: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ REF°12345 Amount: £100 or €120 (±5%) S 19 VAT § 19 100.00 119.00 `; const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(iso88591Buffer); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿'); expect(xmlString).toContain('REF°12345'); expect(xmlString).toContain('£100 or €120 (±5%)'); expect(xmlString).toContain('VAT § 19'); } catch (error) { console.log('ISO-8859-1 special characters:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('iso88591-special', elapsed); }); t.test('ISO-8859-1 to UTF-8 conversion', async () => { const startTime = performance.now(); // Test conversion from ISO-8859-1 to UTF-8 const xmlContent = ` 2.1 ISO-TO-UTF8 André's Café François Müller françois@café.fr Crème brûlée Dessert français traditionnel `; const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(iso88591Buffer); // Get as UTF-8 string const xmlString = einvoice.getXmlString(); // Verify content is properly converted expect(xmlString).toContain("André's Café"); expect(xmlString).toContain('François Müller'); expect(xmlString).toContain('françois@café.fr'); expect(xmlString).toContain('Crème brûlée'); expect(xmlString).toContain('Dessert français traditionnel'); // Verify output is valid UTF-8 const utf8Buffer = Buffer.from(xmlString, 'utf8'); expect(utf8Buffer.toString('utf8')).toBe(xmlString); } catch (error) { console.log('ISO-8859-1 to UTF-8 conversion:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('iso-to-utf8', elapsed); }); t.test('ISO-8859-1 limitations', async () => { const startTime = performance.now(); // Test characters outside ISO-8859-1 range const xmlContent = ` 2.1 ISO88591-LIMITS Euro: € Pound: £ Yen: ¥ Temperature: 20°C (68°F) Naïve café `; const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); try { await einvoice.loadFromBuffer(iso88591Buffer); const xmlString = einvoice.getXmlString(); // These characters exist in ISO-8859-1 expect(xmlString).toContain('£'); // Pound sign (163) expect(xmlString).toContain('¥'); // Yen sign (165) expect(xmlString).toContain('°'); // Degree sign (176) expect(xmlString).toContain('Naïve café'); // Note: Euro sign (€) is NOT in ISO-8859-1 (it's in ISO-8859-15) // It might be replaced or cause issues } catch (error) { console.log('ISO-8859-1 limitation test:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('iso88591-limits', elapsed); }); t.test('Mixed encoding scenarios', async () => { const startTime = performance.now(); // Test file declared as ISO-8859-1 but might contain other encodings const xmlContent = ` 2.1 MIXED-ENCODING José García S.A. Passeig de Gràcia Barcelona Catalunya ES Pago: 30 días fecha factura `; const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); await einvoice.loadFromBuffer(iso88591Buffer); const xmlString = einvoice.getXmlString(); expect(xmlString).toContain('José García S.A.'); expect(xmlString).toContain('Passeig de Gràcia'); expect(xmlString).toContain('Catalunya'); expect(xmlString).toContain('30 días fecha factura'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('mixed-encoding', elapsed); }); t.test('Corpus ISO-8859-1 detection', async () => { const startTime = performance.now(); let iso88591Count = 0; let checkedCount = 0; const files = await corpusLoader.getAllFiles(); const xmlFiles = files.filter(f => f.endsWith('.xml')); // Check sample for ISO-8859-1 encoded files const sampleSize = Math.min(40, xmlFiles.length); const sample = xmlFiles.slice(0, sampleSize); for (const file of sample) { try { const content = await corpusLoader.readFile(file); let xmlString: string; if (Buffer.isBuffer(content)) { xmlString = content.toString('utf8'); } else { xmlString = content; } // Check for ISO-8859-1 encoding declaration if (xmlString.includes('encoding="ISO-8859-1"') || xmlString.includes("encoding='ISO-8859-1'") || xmlString.includes('encoding="iso-8859-1"')) { iso88591Count++; console.log(`Found ISO-8859-1 file: ${file}`); } checkedCount++; } catch (error) { // Skip problematic files } } console.log(`ISO-8859-1 corpus scan: ${iso88591Count}/${checkedCount} files use ISO-8859-1`); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-iso88591', elapsed); }); t.test('Character reference handling', async () => { const startTime = performance.now(); // Test numeric character references for chars outside ISO-8859-1 const xmlContent = ` 2.1 CHAR-REF-TEST Euro: € Em dash: — Ellipsis: … Smart quotes: “Hello” ‘World’ Trademark™ Product Copyright © 2025 `; const iso88591Buffer = Buffer.from(xmlContent, 'latin1'); const einvoice = new EInvoice(); await einvoice.loadFromBuffer(iso88591Buffer); const xmlString = einvoice.getXmlString(); // Character references should be preserved or converted expect(xmlString).toMatch(/Euro:.*€|€/); expect(xmlString).toMatch(/Copyright.*©|©/); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('char-references', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(120); // ISO-8859-1 operations should be reasonably fast }); tap.start();