einvoice/test/suite/einvoice_encoding/test.enc-01.utf8-encoding.ts

280 lines
9.9 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async (t) => {
// ENC-01: Verify correct handling of UTF-8 encoded XML documents
// This test ensures that the library can properly read, process, and write UTF-8 encoded invoices
const performanceTracker = new PerformanceTracker('ENC-01: UTF-8 Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic UTF-8 encoding support', async () => {
const startTime = performance.now();
// Test with UTF-8 encoded content containing various characters
const utf8Content = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0</CustomizationID>
<ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<ID>UTF8-TEST-001</ID>
<IssueDate>2025-01-25</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<Note>UTF-8 Test: £¥ñüäöß العربية русский 🌍📧</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>UTF-8 Supplier GmbH</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Büßer & Müller GmbH</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
<LegalMonetaryTotal>
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
<TaxInclusiveAmount currencyID="EUR">119.00</TaxInclusiveAmount>
<PayableAmount currencyID="EUR">119.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(utf8Content);
// Verify encoding is preserved
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('encoding="UTF-8"');
expect(xmlString).toContain('€£¥ñüäöß');
expect(xmlString).toContain('中文');
expect(xmlString).toContain('العربية');
expect(xmlString).toContain('русский');
expect(xmlString).toContain('日本語');
expect(xmlString).toContain('한국어');
expect(xmlString).toContain('🌍📧');
expect(xmlString).toContain('Büßer & Müller GmbH');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-utf8', elapsed);
});
t.test('UTF-8 BOM handling', async () => {
const startTime = performance.now();
// Test with UTF-8 BOM (Byte Order Mark)
const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]);
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-BOM-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>UTF-8 with BOM: Spëcïål Chäracters</Note>
</Invoice>`;
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBOM);
// Verify BOM is handled correctly
const parsedData = einvoice.getInvoiceData();
expect(parsedData).toBeTruthy();
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('UTF8-BOM-TEST');
expect(xmlString).toContain('Spëcïål Chäracters');
// BOM should not appear in the output
expect(xmlString.charCodeAt(0)).not.toBe(0xFEFF);
} catch (error) {
// Some implementations might not support BOM
console.log('UTF-8 BOM handling not supported:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-bom', elapsed);
});
t.test('UTF-8 without explicit declaration', async () => {
const startTime = performance.now();
// Test UTF-8 content without encoding declaration (should default to UTF-8)
const implicitUtf8 = `<?xml version="1.0"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>IMPLICIT-UTF8</ID>
<Note>Köln München København</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(implicitUtf8);
// Verify UTF-8 is used by default
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('Köln München København');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('implicit-utf8', elapsed);
});
t.test('Multi-byte UTF-8 sequences', async () => {
const startTime = performance.now();
// Test various UTF-8 multi-byte sequences
const multiByteContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MULTIBYTE-UTF8</ID>
<Note>
2-byte: £¥ñüäöß
3-byte:
4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀
Mixed: Prix: 42,50 (včetně DPH)
</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(multiByteContent);
const xmlString = einvoice.getXmlString();
// Verify all multi-byte sequences are preserved
expect(xmlString).toContain('£¥€ñüäöß');
expect(xmlString).toContain('₹₽₨');
expect(xmlString).toContain('中文漢字');
expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔');
expect(xmlString).toContain('🎉🌍🚀');
expect(xmlString).toContain('42,50€');
expect(xmlString).toContain('včetně DPH');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('multibyte-utf8', elapsed);
});
t.test('UTF-8 encoding in attributes', async () => {
const startTime = performance.now();
const attributeContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-ATTR-TEST</ID>
<PaymentMeans>
<PaymentMeansCode name="Überweisung">30</PaymentMeansCode>
<PayeeFinancialAccount>
<Name>Büro für Städtebau</Name>
<FinancialInstitutionBranch>
<Name>Sparkasse Köln/Bonn</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<TaxTotal>
<TaxAmount currencyID="EUR" symbol="€">19.00</TaxAmount>
</TaxTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(attributeContent);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('name="Überweisung"');
expect(xmlString).toContain('Büro für Städtebau');
expect(xmlString).toContain('Sparkasse Köln/Bonn');
expect(xmlString).toContain('symbol="€"');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-attributes', elapsed);
});
t.test('UTF-8 corpus validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let utf8Count = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Test a sample of XML files for UTF-8 handling
const sampleSize = Math.min(50, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check if encoding is preserved or defaulted to UTF-8
if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) {
utf8Count++;
}
// Verify content is properly encoded
expect(xmlString).toBeTruthy();
expect(xmlString.length).toBeGreaterThan(0);
processedCount++;
} catch (error) {
// Some files might have different encodings
console.log(`Non-UTF-8 or invalid file: ${file}`);
}
}
console.log(`UTF-8 corpus test: ${utf8Count}/${processedCount} files explicitly use UTF-8`);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-utf8', elapsed);
});
t.test('UTF-8 normalization', async () => {
const startTime = performance.now();
// Test Unicode normalization forms (NFC, NFD)
const unnormalizedContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NORMALIZATION-TEST</ID>
<Note>Café (NFC) vs Café (NFD)</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>André's Büro</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(unnormalizedContent);
const xmlString = einvoice.getXmlString();
// Both forms should be preserved
expect(xmlString).toContain('Café');
expect(xmlString).toContain("André's Büro");
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-normalization', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast
});
tap.start();