280 lines
9.9 KiB
TypeScript
280 lines
9.9 KiB
TypeScript
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|||
|
import * as plugins from '../plugins.js';
|
|||
|
import { EInvoice } from '../../../ts/index.js';
|
|||
|
import { CorpusLoader } from '../corpus.loader.js';
|
|||
|
import { PerformanceTracker } from '../performance.tracker.js';
|
|||
|
|
|||
|
tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async (t) => {
|
|||
|
// ENC-01: Verify correct handling of UTF-8 encoded XML documents
|
|||
|
// This test ensures that the library can properly read, process, and write UTF-8 encoded invoices
|
|||
|
|
|||
|
const performanceTracker = new PerformanceTracker('ENC-01: UTF-8 Encoding');
|
|||
|
const corpusLoader = new CorpusLoader();
|
|||
|
|
|||
|
t.test('Basic UTF-8 encoding support', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
// Test with UTF-8 encoded content containing various characters
|
|||
|
const utf8Content = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0</CustomizationID>
|
|||
|
<ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
|
|||
|
<ID>UTF8-TEST-001</ID>
|
|||
|
<IssueDate>2025-01-25</IssueDate>
|
|||
|
<InvoiceTypeCode>380</InvoiceTypeCode>
|
|||
|
<Note>UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧</Note>
|
|||
|
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
|
|||
|
<AccountingSupplierParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>UTF-8 Supplier GmbH</Name>
|
|||
|
</PartyName>
|
|||
|
</Party>
|
|||
|
</AccountingSupplierParty>
|
|||
|
<AccountingCustomerParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>Büßer & Müller GmbH</Name>
|
|||
|
</PartyName>
|
|||
|
</Party>
|
|||
|
</AccountingCustomerParty>
|
|||
|
<LegalMonetaryTotal>
|
|||
|
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
|
|||
|
<TaxInclusiveAmount currencyID="EUR">119.00</TaxInclusiveAmount>
|
|||
|
<PayableAmount currencyID="EUR">119.00</PayableAmount>
|
|||
|
</LegalMonetaryTotal>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(utf8Content);
|
|||
|
|
|||
|
// Verify encoding is preserved
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
expect(xmlString).toContain('encoding="UTF-8"');
|
|||
|
expect(xmlString).toContain('€£¥ñüäöß');
|
|||
|
expect(xmlString).toContain('中文');
|
|||
|
expect(xmlString).toContain('العربية');
|
|||
|
expect(xmlString).toContain('русский');
|
|||
|
expect(xmlString).toContain('日本語');
|
|||
|
expect(xmlString).toContain('한국어');
|
|||
|
expect(xmlString).toContain('🌍📧');
|
|||
|
expect(xmlString).toContain('Büßer & Müller GmbH');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('basic-utf8', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('UTF-8 BOM handling', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
// Test with UTF-8 BOM (Byte Order Mark)
|
|||
|
const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]);
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>UTF8-BOM-TEST</ID>
|
|||
|
<IssueDate>2025-01-25</IssueDate>
|
|||
|
<Note>UTF-8 with BOM: Spëcïål Chäracters</Note>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]);
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
try {
|
|||
|
await einvoice.loadFromBuffer(contentWithBOM);
|
|||
|
|
|||
|
// Verify BOM is handled correctly
|
|||
|
const parsedData = einvoice.getInvoiceData();
|
|||
|
expect(parsedData).toBeTruthy();
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
expect(xmlString).toContain('UTF8-BOM-TEST');
|
|||
|
expect(xmlString).toContain('Spëcïål Chäracters');
|
|||
|
// BOM should not appear in the output
|
|||
|
expect(xmlString.charCodeAt(0)).not.toBe(0xFEFF);
|
|||
|
} catch (error) {
|
|||
|
// Some implementations might not support BOM
|
|||
|
console.log('UTF-8 BOM handling not supported:', error.message);
|
|||
|
}
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('utf8-bom', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('UTF-8 without explicit declaration', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
// Test UTF-8 content without encoding declaration (should default to UTF-8)
|
|||
|
const implicitUtf8 = `<?xml version="1.0"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>IMPLICIT-UTF8</ID>
|
|||
|
<Note>Köln München København</Note>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(implicitUtf8);
|
|||
|
|
|||
|
// Verify UTF-8 is used by default
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
expect(xmlString).toContain('Köln München København');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('implicit-utf8', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Multi-byte UTF-8 sequences', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
// Test various UTF-8 multi-byte sequences
|
|||
|
const multiByteContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MULTIBYTE-UTF8</ID>
|
|||
|
<Note>
|
|||
|
2-byte: £¥€ñüäöß
|
|||
|
3-byte: ₹₽₨ 中文漢字
|
|||
|
4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀
|
|||
|
Mixed: Prix: 42,50€ (včetně DPH)
|
|||
|
</Note>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(multiByteContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
// Verify all multi-byte sequences are preserved
|
|||
|
expect(xmlString).toContain('£¥€ñüäöß');
|
|||
|
expect(xmlString).toContain('₹₽₨');
|
|||
|
expect(xmlString).toContain('中文漢字');
|
|||
|
expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔');
|
|||
|
expect(xmlString).toContain('🎉🌍🚀');
|
|||
|
expect(xmlString).toContain('42,50€');
|
|||
|
expect(xmlString).toContain('včetně DPH');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('multibyte-utf8', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('UTF-8 encoding in attributes', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const attributeContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>UTF8-ATTR-TEST</ID>
|
|||
|
<PaymentMeans>
|
|||
|
<PaymentMeansCode name="Überweisung">30</PaymentMeansCode>
|
|||
|
<PayeeFinancialAccount>
|
|||
|
<Name>Büro für Städtebau</Name>
|
|||
|
<FinancialInstitutionBranch>
|
|||
|
<Name>Sparkasse Köln/Bonn</Name>
|
|||
|
</FinancialInstitutionBranch>
|
|||
|
</PayeeFinancialAccount>
|
|||
|
</PaymentMeans>
|
|||
|
<TaxTotal>
|
|||
|
<TaxAmount currencyID="EUR" symbol="€">19.00</TaxAmount>
|
|||
|
</TaxTotal>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(attributeContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
expect(xmlString).toContain('name="Überweisung"');
|
|||
|
expect(xmlString).toContain('Büro für Städtebau');
|
|||
|
expect(xmlString).toContain('Sparkasse Köln/Bonn');
|
|||
|
expect(xmlString).toContain('symbol="€"');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('utf8-attributes', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('UTF-8 corpus validation', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
let processedCount = 0;
|
|||
|
let utf8Count = 0;
|
|||
|
|
|||
|
const files = await corpusLoader.getAllFiles();
|
|||
|
const xmlFiles = files.filter(f => f.endsWith('.xml'));
|
|||
|
|
|||
|
// Test a sample of XML files for UTF-8 handling
|
|||
|
const sampleSize = Math.min(50, xmlFiles.length);
|
|||
|
const sample = xmlFiles.slice(0, sampleSize);
|
|||
|
|
|||
|
for (const file of sample) {
|
|||
|
try {
|
|||
|
const content = await corpusLoader.readFile(file);
|
|||
|
const einvoice = new EInvoice();
|
|||
|
|
|||
|
if (typeof content === 'string') {
|
|||
|
await einvoice.loadFromString(content);
|
|||
|
} else {
|
|||
|
await einvoice.loadFromBuffer(content);
|
|||
|
}
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Check if encoding is preserved or defaulted to UTF-8
|
|||
|
if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) {
|
|||
|
utf8Count++;
|
|||
|
}
|
|||
|
|
|||
|
// Verify content is properly encoded
|
|||
|
expect(xmlString).toBeTruthy();
|
|||
|
expect(xmlString.length).toBeGreaterThan(0);
|
|||
|
|
|||
|
processedCount++;
|
|||
|
} catch (error) {
|
|||
|
// Some files might have different encodings
|
|||
|
console.log(`Non-UTF-8 or invalid file: ${file}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
console.log(`UTF-8 corpus test: ${utf8Count}/${processedCount} files explicitly use UTF-8`);
|
|||
|
expect(processedCount).toBeGreaterThan(0);
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('corpus-utf8', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('UTF-8 normalization', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
// Test Unicode normalization forms (NFC, NFD)
|
|||
|
const unnormalizedContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>NORMALIZATION-TEST</ID>
|
|||
|
<Note>Café (NFC) vs Café (NFD)</Note>
|
|||
|
<AccountingSupplierParty>
|
|||
|
<Party>
|
|||
|
<PartyName>
|
|||
|
<Name>André's Büro</Name>
|
|||
|
</PartyName>
|
|||
|
</Party>
|
|||
|
</AccountingSupplierParty>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(unnormalizedContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
// Both forms should be preserved
|
|||
|
expect(xmlString).toContain('Café');
|
|||
|
expect(xmlString).toContain("André's Büro");
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('utf8-normalization', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
// Print performance summary
|
|||
|
performanceTracker.printSummary();
|
|||
|
|
|||
|
// Performance assertions
|
|||
|
const avgTime = performanceTracker.getAverageTime();
|
|||
|
expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast
|
|||
|
});
|
|||
|
|
|||
|
tap.start();
|