This commit is contained in:
2025-05-25 19:45:37 +00:00
parent e89675c319
commit 39942638d9
110 changed files with 49183 additions and 3104 deletions

View File

@ -0,0 +1,280 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async (t) => {
// ENC-01: Verify correct handling of UTF-8 encoded XML documents
// This test ensures that the library can properly read, process, and write UTF-8 encoded invoices
const performanceTracker = new PerformanceTracker('ENC-01: UTF-8 Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic UTF-8 encoding support', async () => {
const startTime = performance.now();
// Test with UTF-8 encoded content containing various characters
const utf8Content = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0</CustomizationID>
<ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<ID>UTF8-TEST-001</ID>
<IssueDate>2025-01-25</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<Note>UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>UTF-8 Supplier GmbH</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Büßer & Müller GmbH</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
<LegalMonetaryTotal>
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
<TaxInclusiveAmount currencyID="EUR">119.00</TaxInclusiveAmount>
<PayableAmount currencyID="EUR">119.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(utf8Content);
// Verify encoding is preserved
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('encoding="UTF-8"');
expect(xmlString).toContain('€£¥ñüäöß');
expect(xmlString).toContain('中文');
expect(xmlString).toContain('العربية');
expect(xmlString).toContain('русский');
expect(xmlString).toContain('日本語');
expect(xmlString).toContain('한국어');
expect(xmlString).toContain('🌍📧');
expect(xmlString).toContain('Büßer & Müller GmbH');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-utf8', elapsed);
});
t.test('UTF-8 BOM handling', async () => {
const startTime = performance.now();
// Test with UTF-8 BOM (Byte Order Mark)
const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]);
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-BOM-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>UTF-8 with BOM: Spëcïål Chäracters</Note>
</Invoice>`;
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBOM);
// Verify BOM is handled correctly
const parsedData = einvoice.getInvoiceData();
expect(parsedData).toBeTruthy();
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('UTF8-BOM-TEST');
expect(xmlString).toContain('Spëcïål Chäracters');
// BOM should not appear in the output
expect(xmlString.charCodeAt(0)).not.toBe(0xFEFF);
} catch (error) {
// Some implementations might not support BOM
console.log('UTF-8 BOM handling not supported:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-bom', elapsed);
});
t.test('UTF-8 without explicit declaration', async () => {
const startTime = performance.now();
// Test UTF-8 content without encoding declaration (should default to UTF-8)
const implicitUtf8 = `<?xml version="1.0"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>IMPLICIT-UTF8</ID>
<Note>Köln München København</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(implicitUtf8);
// Verify UTF-8 is used by default
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('Köln München København');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('implicit-utf8', elapsed);
});
t.test('Multi-byte UTF-8 sequences', async () => {
const startTime = performance.now();
// Test various UTF-8 multi-byte sequences
const multiByteContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MULTIBYTE-UTF8</ID>
<Note>
2-byte: £¥€ñüäöß
3-byte: ₹₽₨ 中文漢字
4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀
Mixed: Prix: 42,50€ (včetně DPH)
</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(multiByteContent);
const xmlString = einvoice.getXmlString();
// Verify all multi-byte sequences are preserved
expect(xmlString).toContain('£¥€ñüäöß');
expect(xmlString).toContain('₹₽₨');
expect(xmlString).toContain('中文漢字');
expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔');
expect(xmlString).toContain('🎉🌍🚀');
expect(xmlString).toContain('42,50€');
expect(xmlString).toContain('včetně DPH');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('multibyte-utf8', elapsed);
});
t.test('UTF-8 encoding in attributes', async () => {
const startTime = performance.now();
const attributeContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-ATTR-TEST</ID>
<PaymentMeans>
<PaymentMeansCode name="Überweisung">30</PaymentMeansCode>
<PayeeFinancialAccount>
<Name>Büro für Städtebau</Name>
<FinancialInstitutionBranch>
<Name>Sparkasse Köln/Bonn</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<TaxTotal>
<TaxAmount currencyID="EUR" symbol="€">19.00</TaxAmount>
</TaxTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(attributeContent);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('name="Überweisung"');
expect(xmlString).toContain('Büro für Städtebau');
expect(xmlString).toContain('Sparkasse Köln/Bonn');
expect(xmlString).toContain('symbol="€"');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-attributes', elapsed);
});
t.test('UTF-8 corpus validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let utf8Count = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Test a sample of XML files for UTF-8 handling
const sampleSize = Math.min(50, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check if encoding is preserved or defaulted to UTF-8
if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) {
utf8Count++;
}
// Verify content is properly encoded
expect(xmlString).toBeTruthy();
expect(xmlString.length).toBeGreaterThan(0);
processedCount++;
} catch (error) {
// Some files might have different encodings
console.log(`Non-UTF-8 or invalid file: ${file}`);
}
}
console.log(`UTF-8 corpus test: ${utf8Count}/${processedCount} files explicitly use UTF-8`);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-utf8', elapsed);
});
t.test('UTF-8 normalization', async () => {
const startTime = performance.now();
// Test Unicode normalization forms (NFC, NFD)
const unnormalizedContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NORMALIZATION-TEST</ID>
<Note>Café (NFC) vs Café (NFD)</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>André's Büro</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(unnormalizedContent);
const xmlString = einvoice.getXmlString();
// Both forms should be preserved
expect(xmlString).toContain('Café');
expect(xmlString).toContain("André's Büro");
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf8-normalization', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast
});
tap.start();

View File

@ -0,0 +1,307 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-02: UTF-16 Encoding - should handle UTF-16 encoded documents correctly', async (t) => {
// ENC-02: Verify correct handling of UTF-16 encoded XML documents (both BE and LE)
// This test ensures proper support for UTF-16 encoding variants
const performanceTracker = new PerformanceTracker('ENC-02: UTF-16 Encoding');
const corpusLoader = new CorpusLoader();
t.test('UTF-16 BE (Big Endian) encoding', async () => {
const startTime = performance.now();
// Create UTF-16 BE content
const xmlContent = `<?xml version="1.0" encoding="UTF-16BE"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16BE-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>UTF-16 BE Test: €100 für Bücher</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Großhändler GmbH</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<LegalMonetaryTotal>
<PayableAmount currencyID="EUR">100.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
// Convert to UTF-16 BE with BOM
const utf16BeBom = Buffer.from([0xFE, 0xFF]); // UTF-16 BE BOM
const utf16BeContent = Buffer.from(xmlContent, 'utf16le').swap16(); // Convert to BE
const contentWithBom = Buffer.concat([utf16BeBom, utf16BeContent]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBom);
const parsedData = einvoice.getInvoiceData();
expect(parsedData).toBeTruthy();
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('UTF16BE-TEST');
expect(xmlString).toContain('€100 für Bücher');
expect(xmlString).toContain('Großhändler GmbH');
} catch (error) {
console.log('UTF-16 BE not fully supported:', error.message);
// Try alternative approach
const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, '');
await einvoice.loadFromString(decoded);
expect(einvoice.getXmlString()).toContain('UTF16BE-TEST');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-be', elapsed);
});
t.test('UTF-16 LE (Little Endian) encoding', async () => {
const startTime = performance.now();
// Create UTF-16 LE content
const xmlContent = `<?xml version="1.0" encoding="UTF-16LE"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16LE-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>UTF-16 LE: Special chars → ← ↑ ↓ ♠ ♣ ♥ ♦</Note>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>François & Søren Ltd.</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
</Invoice>`;
// Convert to UTF-16 LE with BOM
const utf16LeBom = Buffer.from([0xFF, 0xFE]); // UTF-16 LE BOM
const utf16LeContent = Buffer.from(xmlContent, 'utf16le');
const contentWithBom = Buffer.concat([utf16LeBom, utf16LeContent]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBom);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('UTF16LE-TEST');
expect(xmlString).toContain('→ ← ↑ ↓');
expect(xmlString).toContain('♠ ♣ ♥ ♦');
expect(xmlString).toContain('François & Søren Ltd.');
} catch (error) {
console.log('UTF-16 LE not fully supported:', error.message);
// Try fallback
const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, '');
await einvoice.loadFromString(decoded);
expect(einvoice.getXmlString()).toContain('UTF16LE-TEST');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-le', elapsed);
});
t.test('UTF-16 without BOM', async () => {
const startTime = performance.now();
// UTF-16 without BOM (should detect from encoding declaration)
const xmlContent = `<?xml version="1.0" encoding="UTF-16"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16-NO-BOM</ID>
<Note>Ψ Ω α β γ δ ε ζ η θ</Note>
</Invoice>`;
// Create UTF-16 without BOM (system default endianness)
const utf16Content = Buffer.from(xmlContent, 'utf16le');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(utf16Content);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('UTF16-NO-BOM');
expect(xmlString).toContain('Ψ Ω α β γ δ ε ζ η θ');
} catch (error) {
console.log('UTF-16 without BOM requires explicit handling:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-no-bom', elapsed);
});
t.test('UTF-16 surrogate pairs', async () => {
const startTime = performance.now();
// Test UTF-16 surrogate pairs (for characters outside BMP)
const xmlContent = `<?xml version="1.0" encoding="UTF-16"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16-SURROGATE</ID>
<Note>Emojis: 😀😃😄😁 Math: 𝕳𝖊𝖑𝖑𝖔 CJK Ext: 𠀀𠀁</Note>
<InvoiceLine>
<Note>Ancient scripts: 𐌀𐌁𐌂 𓀀𓀁𓀂</Note>
</InvoiceLine>
</Invoice>`;
const utf16Bom = Buffer.from([0xFF, 0xFE]); // UTF-16 LE BOM
const utf16Content = Buffer.from(xmlContent, 'utf16le');
const contentWithBom = Buffer.concat([utf16Bom, utf16Content]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBom);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('😀😃😄😁');
expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔');
expect(xmlString).toContain('𠀀𠀁');
expect(xmlString).toContain('𐌀𐌁𐌂');
expect(xmlString).toContain('𓀀𓀁𓀂');
} catch (error) {
console.log('Surrogate pair handling:', error.message);
// Try string approach
const decoded = contentWithBom.toString('utf16le').replace(/^\ufeff/, '');
await einvoice.loadFromString(decoded);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-surrogates', elapsed);
});
t.test('UTF-16 to UTF-8 conversion', async () => {
const startTime = performance.now();
// Test that UTF-16 input can be converted to UTF-8 output
const xmlContent = `<?xml version="1.0" encoding="UTF-16"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16-TO-UTF8</ID>
<Note>Müller, François, 北京, Москва</Note>
</Invoice>`;
const utf16Bom = Buffer.from([0xFF, 0xFE]);
const utf16Content = Buffer.from(xmlContent, 'utf16le');
const contentWithBom = Buffer.concat([utf16Bom, utf16Content]);
const einvoice = new EInvoice();
try {
// Load UTF-16 content
await einvoice.loadFromBuffer(contentWithBom);
// Get as UTF-8 string
const xmlString = einvoice.getXmlString();
// Should be valid UTF-8 now
expect(xmlString).toContain('Müller');
expect(xmlString).toContain('François');
expect(xmlString).toContain('北京');
expect(xmlString).toContain('Москва');
// Verify it's valid UTF-8
const utf8Buffer = Buffer.from(xmlString, 'utf8');
expect(utf8Buffer.toString('utf8')).toBe(xmlString);
} catch (error) {
console.log('UTF-16 to UTF-8 conversion not supported:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-to-utf8', elapsed);
});
t.test('Mixed content with UTF-16', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-16"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF16-MIXED</ID>
<PaymentTerms>
<Note>Payment terms: 30 days net
• Early payment: 2% discount
• Late payment: 1.5% interest
→ Bank: Sparkasse München
← Account: DE89 3704 0044 0532 0130 00</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Description>Bücher (10× @ €15)</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const utf16Bom = Buffer.from([0xFF, 0xFE]);
const utf16Content = Buffer.from(xmlContent, 'utf16le');
const contentWithBom = Buffer.concat([utf16Bom, utf16Content]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(contentWithBom);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('•');
expect(xmlString).toContain('→');
expect(xmlString).toContain('←');
expect(xmlString).toContain('×');
expect(xmlString).toContain('€');
expect(xmlString).toContain('Sparkasse München');
} catch (error) {
console.log('UTF-16 mixed content:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('utf16-mixed', elapsed);
});
t.test('Corpus UTF-16 detection', async () => {
const startTime = performance.now();
let utf16Count = 0;
let checkedCount = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check a sample for UTF-16 encoded files
const sampleSize = Math.min(30, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
if (Buffer.isBuffer(content)) {
// Check for UTF-16 BOMs
if ((content[0] === 0xFE && content[1] === 0xFF) ||
(content[0] === 0xFF && content[1] === 0xFE)) {
utf16Count++;
console.log(`Found UTF-16 file: ${file}`);
}
}
checkedCount++;
} catch (error) {
// Skip files that can't be read
}
}
console.log(`UTF-16 corpus scan: ${utf16Count}/${checkedCount} files use UTF-16`);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-utf16', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // UTF-16 operations may be slightly slower than UTF-8
});
tap.start();

View File

@ -0,0 +1,351 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-03: ISO-8859-1 Encoding - should handle ISO-8859-1 (Latin-1) encoded documents', async (t) => {
// ENC-03: Verify correct handling of ISO-8859-1 encoded XML documents
// This test ensures support for legacy Western European character encoding
const performanceTracker = new PerformanceTracker('ENC-03: ISO-8859-1 Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic ISO-8859-1 encoding', async () => {
const startTime = performance.now();
// Create ISO-8859-1 content with Latin-1 specific characters
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>ISO-8859-1 Test: àáâãäåæçèéêëìíîïñòóôõöøùúûüý</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Société Générale</Name>
</PartyName>
<PostalAddress>
<StreetName>Rue de la Paix</StreetName>
<CityName>Paris</CityName>
<Country>
<IdentificationCode>FR</IdentificationCode>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Müller & Söhne GmbH</Name>
</PartyName>
<PostalAddress>
<StreetName>Königsallee</StreetName>
<CityName>Düsseldorf</CityName>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Note>Prix unitaire: 25,50 € (vingt-cinq euros cinquante)</Note>
</InvoiceLine>
</Invoice>`;
// Convert to ISO-8859-1 buffer
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('ISO88591-TEST');
expect(xmlString).toContain('àáâãäåæçèéêëìíîïñòóôõöøùúûüý');
expect(xmlString).toContain('Société Générale');
expect(xmlString).toContain('Müller & Söhne GmbH');
expect(xmlString).toContain('Königsallee');
expect(xmlString).toContain('Düsseldorf');
expect(xmlString).toContain('25,50 €');
} catch (error) {
console.log('ISO-8859-1 handling issue:', error.message);
// Try string conversion fallback
const decoded = iso88591Buffer.toString('latin1');
await einvoice.loadFromString(decoded);
expect(einvoice.getXmlString()).toContain('ISO88591-TEST');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-iso88591', elapsed);
});
t.test('ISO-8859-1 special characters', async () => {
const startTime = performance.now();
// Test all printable ISO-8859-1 characters (160-255)
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-SPECIAL</ID>
<Note>Special chars: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿</Note>
<PaymentMeans>
<PaymentID>REF°12345</PaymentID>
<InstructionNote>Amount: £100 or €120 (±5%)</InstructionNote>
</PaymentMeans>
<TaxTotal>
<TaxSubtotal>
<TaxCategory>
<ID>S</ID>
<Percent>19</Percent>
<TaxScheme>
<Name>VAT § 19</Name>
</TaxScheme>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<LegalMonetaryTotal>
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
<PayableAmount currencyID="EUR">119.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿');
expect(xmlString).toContain('REF°12345');
expect(xmlString).toContain('£100 or €120 (±5%)');
expect(xmlString).toContain('VAT § 19');
} catch (error) {
console.log('ISO-8859-1 special characters:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso88591-special', elapsed);
});
t.test('ISO-8859-1 to UTF-8 conversion', async () => {
const startTime = performance.now();
// Test conversion from ISO-8859-1 to UTF-8
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO-TO-UTF8</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>André's Café</Name>
</PartyName>
<Contact>
<Name>François Müller</Name>
<ElectronicMail>françois@café.fr</ElectronicMail>
</Contact>
</Party>
</AccountingSupplierParty>
<InvoiceLine>
<Item>
<Name>Crème brûlée</Name>
<Description>Dessert français traditionnel</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
// Get as UTF-8 string
const xmlString = einvoice.getXmlString();
// Verify content is properly converted
expect(xmlString).toContain("André's Café");
expect(xmlString).toContain('François Müller');
expect(xmlString).toContain('françois@café.fr');
expect(xmlString).toContain('Crème brûlée');
expect(xmlString).toContain('Dessert français traditionnel');
// Verify output is valid UTF-8
const utf8Buffer = Buffer.from(xmlString, 'utf8');
expect(utf8Buffer.toString('utf8')).toBe(xmlString);
} catch (error) {
console.log('ISO-8859-1 to UTF-8 conversion:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso-to-utf8', elapsed);
});
t.test('ISO-8859-1 limitations', async () => {
const startTime = performance.now();
// Test characters outside ISO-8859-1 range
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-LIMITS</ID>
<Note>Euro: € Pound: £ Yen: ¥</Note>
<InvoiceLine>
<Note>Temperature: 20°C (68°F)</Note>
<Item>
<Name>Naïve café</Name>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
// These characters exist in ISO-8859-1
expect(xmlString).toContain('£'); // Pound sign (163)
expect(xmlString).toContain('¥'); // Yen sign (165)
expect(xmlString).toContain('°'); // Degree sign (176)
expect(xmlString).toContain('Naïve café');
// Note: Euro sign (€) is NOT in ISO-8859-1 (it's in ISO-8859-15)
// It might be replaced or cause issues
} catch (error) {
console.log('ISO-8859-1 limitation test:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso88591-limits', elapsed);
});
t.test('Mixed encoding scenarios', async () => {
const startTime = performance.now();
// Test file declared as ISO-8859-1 but might contain other encodings
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-ENCODING</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>José García S.A.</Name>
</PartyName>
<PostalAddress>
<StreetName>Passeig de Gràcia</StreetName>
<CityName>Barcelona</CityName>
<CountrySubentity>Catalunya</CountrySubentity>
<Country>
<IdentificationCode>ES</IdentificationCode>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note>Pago: 30 días fecha factura</Note>
</PaymentTerms>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('José García S.A.');
expect(xmlString).toContain('Passeig de Gràcia');
expect(xmlString).toContain('Catalunya');
expect(xmlString).toContain('30 días fecha factura');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('mixed-encoding', elapsed);
});
t.test('Corpus ISO-8859-1 detection', async () => {
const startTime = performance.now();
let iso88591Count = 0;
let checkedCount = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for ISO-8859-1 encoded files
const sampleSize = Math.min(40, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Check for ISO-8859-1 encoding declaration
if (xmlString.includes('encoding="ISO-8859-1"') ||
xmlString.includes("encoding='ISO-8859-1'") ||
xmlString.includes('encoding="iso-8859-1"')) {
iso88591Count++;
console.log(`Found ISO-8859-1 file: ${file}`);
}
checkedCount++;
} catch (error) {
// Skip problematic files
}
}
console.log(`ISO-8859-1 corpus scan: ${iso88591Count}/${checkedCount} files use ISO-8859-1`);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-iso88591', elapsed);
});
t.test('Character reference handling', async () => {
const startTime = performance.now();
// Test numeric character references for chars outside ISO-8859-1
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CHAR-REF-TEST</ID>
<Note>Euro: &#8364; Em dash: &#8212; Ellipsis: &#8230;</Note>
<InvoiceLine>
<Note>Smart quotes: &#8220;Hello&#8221; &#8216;World&#8217;</Note>
<Item>
<Name>Trademark&#8482; Product</Name>
<Description>Copyright &#169; 2025</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
// Character references should be preserved or converted
expect(xmlString).toMatch(/Euro:.*€|&#8364;/);
expect(xmlString).toMatch(/Copyright.*©|&#169;/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('char-references', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // ISO-8859-1 operations should be reasonably fast
});
tap.start();

View File

@ -0,0 +1,371 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-04: Character Escaping - should handle XML character escaping correctly', async (t) => {
// ENC-04: Verify proper escaping and unescaping of special XML characters
// This test ensures XML entities and special characters are handled correctly
const performanceTracker = new PerformanceTracker('ENC-04: Character Escaping');
const corpusLoader = new CorpusLoader();
t.test('Basic XML entity escaping', async () => {
const startTime = performance.now();
// Test the five predefined XML entities
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ESCAPE-TEST-001</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>Test &amp; verify: &lt;invoice&gt; with "quotes" &amp; 'apostrophes'</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Smith &amp; Jones Ltd.</Name>
</PartyName>
<Contact>
<ElectronicMail>info@smith&amp;jones.com</ElectronicMail>
</Contact>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note>Terms: 2/10 net 30 (2% if paid &lt;= 10 days)</Note>
</PaymentTerms>
<InvoiceLine>
<Note>Price comparison: USD &lt; EUR &gt; GBP</Note>
<Item>
<Description>Product "A" &amp; Product 'B'</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const invoiceData = einvoice.getInvoiceData();
const xmlString = einvoice.getXmlString();
// Verify entities are properly escaped in output
expect(xmlString).toContain('Smith &amp; Jones Ltd.');
expect(xmlString).toContain('info@smith&amp;jones.com');
expect(xmlString).toContain('2% if paid &lt;= 10 days');
expect(xmlString).toContain('USD &lt; EUR &gt; GBP');
expect(xmlString).toContain('Product "A" &amp; Product \'B\'');
// Verify data is unescaped when accessed
if (invoiceData?.notes) {
expect(invoiceData.notes[0]).toContain('Test & verify: <invoice> with "quotes" & \'apostrophes\'');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-escaping', elapsed);
});
t.test('Numeric character references', async () => {
const startTime = performance.now();
// Test decimal and hexadecimal character references
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NUMERIC-REF-TEST</ID>
<Note>Decimal refs: &#8364; &#163; &#165; &#8482;</Note>
<PaymentMeans>
<InstructionNote>Hex refs: &#x20AC; &#x00A3; &#x00A5; &#x2122;</InstructionNote>
</PaymentMeans>
<InvoiceLine>
<Note>Mixed: &#169; 2025 &#x2014; All rights reserved&#x2122;</Note>
<Item>
<Name>Special chars: &#8211; &#8212; &#8230; &#8220;quoted&#8221;</Name>
<Description>Math: &#8804; &#8805; &#8800; &#177; &#247; &#215;</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify numeric references are preserved or converted correctly
// The implementation might convert them to actual characters or preserve as entities
expect(xmlString).toMatch(/€|&#8364;|&#x20AC;/); // Euro
expect(xmlString).toMatch(/£|&#163;|&#x00A3;/); // Pound
expect(xmlString).toMatch(/¥|&#165;|&#x00A5;/); // Yen
expect(xmlString).toMatch(/™|&#8482;|&#x2122;/); // Trademark
expect(xmlString).toMatch(/©|&#169;/); // Copyright
expect(xmlString).toMatch(/—|&#8212;|&#x2014;/); // Em dash
expect(xmlString).toMatch(/"|&#8220;/); // Left quote
expect(xmlString).toMatch(/"|&#8221;/); // Right quote
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('numeric-refs', elapsed);
});
t.test('Attribute value escaping', async () => {
const startTime = performance.now();
// Test escaping in attribute values
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-ESCAPE-TEST</ID>
<PaymentMeans>
<PaymentMeansCode name="Bank &amp; Wire Transfer">30</PaymentMeansCode>
<PaymentID type="Order &lt;123&gt;">REF-2025-001</PaymentID>
<InstructionNote condition='If amount &gt; 1000 &amp; currency = "EUR"'>Special handling required</InstructionNote>
</PaymentMeans>
<TaxTotal>
<TaxAmount currencyID="EUR" note="Amount includes 19% VAT &amp; fees">119.00</TaxAmount>
</TaxTotal>
<InvoiceLine>
<DocumentReference>
<ID schemeID="Item's &quot;special&quot; code">ITEM-001</ID>
<DocumentDescription>Product with 'quotes' &amp; "double quotes"</DocumentDescription>
</DocumentReference>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify attributes are properly escaped
expect(xmlString).toMatch(/name="Bank &amp; Wire Transfer"|name='Bank &amp; Wire Transfer'/);
expect(xmlString).toMatch(/type="Order &lt;123&gt;"|type='Order &lt;123&gt;'/);
expect(xmlString).toContain('&amp;');
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
// Quotes in attributes should be escaped
expect(xmlString).toMatch(/&quot;|'/); // Quotes should be escaped or use different quote style
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('attribute-escaping', elapsed);
});
t.test('CDATA sections with special characters', async () => {
const startTime = performance.now();
// Test CDATA sections that don't need escaping
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CDATA-ESCAPE-TEST</ID>
<Note><![CDATA[Special characters: < > & " ' without escaping]]></Note>
<PaymentTerms>
<Note><![CDATA[HTML content: <p>Payment terms: <b>30 days</b> net</p>]]></Note>
</PaymentTerms>
<AdditionalDocumentReference>
<ID>SCRIPT-001</ID>
<DocumentDescription><![CDATA[
JavaScript example:
if (amount > 100 && currency == "EUR") {
discount = amount * 0.05;
}
]]></DocumentDescription>
</AdditionalDocumentReference>
<InvoiceLine>
<Note><![CDATA[Price formula: if quantity >= 10 then price < 50.00]]></Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// CDATA content should be preserved
if (xmlString.includes('CDATA')) {
expect(xmlString).toContain('<![CDATA[');
expect(xmlString).toContain(']]>');
// Inside CDATA, characters are not escaped
expect(xmlString).toMatch(/<!\[CDATA\[.*[<>&].*\]\]>/);
} else {
// If CDATA is converted to text, it should be escaped
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
expect(xmlString).toContain('&amp;');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cdata-escaping', elapsed);
});
t.test('Invalid character handling', async () => {
const startTime = performance.now();
// Test handling of characters that are invalid in XML
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>INVALID-CHAR-TEST</ID>
<Note>Control chars: &#x0; &#x1; &#x8; &#xB; &#xC; &#xE; &#x1F;</Note>
<PaymentTerms>
<Note>Valid controls: &#x9; &#xA; &#xD; (tab, LF, CR)</Note>
</PaymentTerms>
<InvoiceLine>
<Note>High Unicode: &#x10000; &#x10FFFF;</Note>
<Item>
<Description>Surrogate pairs: &#xD800; &#xDFFF; (invalid)</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
try {
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Valid control characters should be preserved
expect(xmlString).toMatch(/&#x9;| /); // Tab
expect(xmlString).toMatch(/&#xA;|\n/); // Line feed
expect(xmlString).toMatch(/&#xD;|\r/); // Carriage return
// Invalid characters might be filtered or cause errors
// Implementation specific behavior
} catch (error) {
// Some parsers reject invalid character references
console.log('Invalid character handling:', error.message);
expect(error.message).toMatch(/invalid.*character|character.*reference/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invalid-chars', elapsed);
});
t.test('Mixed content escaping', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-ESCAPE-TEST</ID>
<Note>Regular text with &amp; ampersand</Note>
<PaymentTerms>
<Note><![CDATA[CDATA with <b>tags</b> & ampersands]]></Note>
<SettlementPeriod>
<Description>Payment due in &lt; 30 days</Description>
<DurationMeasure unitCode="DAY">30</DurationMeasure>
</SettlementPeriod>
</PaymentTerms>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason>Discount for orders &gt; &#8364;1000</AllowanceChargeReason>
<Amount currencyID="EUR">50.00</Amount>
</AllowanceCharge>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Mixed content should maintain proper escaping
expect(xmlString).toContain('&amp;');
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('mixed-escaping', elapsed);
});
t.test('Corpus escaping validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let escapedCount = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for proper escaping
const sampleSize = Math.min(50, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check for proper escaping
if (xmlString.includes('&amp;') ||
xmlString.includes('&lt;') ||
xmlString.includes('&gt;') ||
xmlString.includes('&quot;') ||
xmlString.includes('&apos;') ||
xmlString.includes('&#')) {
escapedCount++;
}
// Verify XML is well-formed after escaping
expect(xmlString).toBeTruthy();
expect(xmlString.includes('<?xml')).toBe(true);
processedCount++;
} catch (error) {
console.log(`Escaping issue in ${file}:`, error.message);
}
}
console.log(`Corpus escaping test: ${escapedCount}/${processedCount} files contain escaped characters`);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-escaping', elapsed);
});
t.test('Security: XML entity expansion', async () => {
const startTime = performance.now();
// Test protection against XML entity expansion attacks
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Invoice [
<!ENTITY lol "lol">
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
]>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ENTITY-EXPANSION-TEST</ID>
<Note>&lol3;</Note>
</Invoice>`;
const einvoice = new EInvoice();
try {
await einvoice.loadFromString(xmlContent);
// If entity expansion is allowed, check it's limited
const xmlString = einvoice.getXmlString();
expect(xmlString.length).toBeLessThan(1000000); // Should not explode in size
} catch (error) {
// Good - entity expansion might be blocked
console.log('Entity expansion protection:', error.message);
expect(error.message).toMatch(/entity|expansion|security/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('entity-expansion', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(100); // Escaping operations should be fast
});
tap.start();

View File

@ -0,0 +1,535 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-05: Special Characters - should handle special and international characters correctly', async (t) => {
// ENC-05: Verify handling of special characters across different languages and scripts
// This test ensures proper support for international invoicing
const performanceTracker = new PerformanceTracker('ENC-05: Special Characters');
const corpusLoader = new CorpusLoader();
t.test('European special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EU-SPECIAL-CHARS</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>European chars test</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Åsa Öberg AB (Sweden)</Name>
</PartyName>
<PostalAddress>
<StreetName>Østergade 42</StreetName>
<CityName>København</CityName>
<Country><IdentificationCode>DK</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Müller & Schäfer GmbH</Name>
</PartyName>
<PostalAddress>
<StreetName>Hauptstraße 15</StreetName>
<CityName>Düsseldorf</CityName>
<Country><IdentificationCode>DE</IdentificationCode></Country>
</PostalAddress>
<Contact>
<Name>François Lefèvre</Name>
<ElectronicMail>f.lefevre@müller-schäfer.de</ElectronicMail>
</Contact>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name>Château Margaux (Bordeaux)</Name>
<Description>Vin rouge, millésime 2015, cépage: Cabernet Sauvignon</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Prošek (Croatian dessert wine)</Name>
<Description>Vino desertno, područje: Dalmacija</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Żubrówka (Polish vodka)</Name>
<Description>Wódka żytnia z trawą żubrową</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Nordic characters
expect(xmlString).toContain('Åsa Öberg');
expect(xmlString).toContain('Østergade');
expect(xmlString).toContain('København');
// German characters
expect(xmlString).toContain('Müller & Schäfer');
expect(xmlString).toContain('Hauptstraße');
expect(xmlString).toContain('Düsseldorf');
expect(xmlString).toContain('müller-schäfer.de');
// French characters
expect(xmlString).toContain('François Lefèvre');
expect(xmlString).toContain('Château Margaux');
expect(xmlString).toContain('millésime');
expect(xmlString).toContain('cépage');
// Croatian characters
expect(xmlString).toContain('Prošek');
expect(xmlString).toContain('područje');
// Polish characters
expect(xmlString).toContain('Żubrówka');
expect(xmlString).toContain('żytnia');
expect(xmlString).toContain('żubrową');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('european-chars', elapsed);
});
t.test('Currency and monetary symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CURRENCY-SYMBOLS</ID>
<Note>Currency symbols: € £ $ ¥ ₹ ₽ ₪ ₩ ₡ ₦ ₨ ₱ ₴ ₵ ₸ ₹ ₺ ₼</Note>
<TaxTotal>
<TaxAmount currencyID="EUR">€1,234.56</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="GBP">£987.65</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="USD">$2,345.67</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="JPY">¥123,456</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="INR">₹98,765</TaxAmount>
</TaxTotal>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason>Discount (5% off orders > €500)</AllowanceChargeReason>
<Amount currencyID="EUR">25.50</Amount>
</AllowanceCharge>
<PaymentTerms>
<Note>Accepted: € EUR, £ GBP, $ USD, ¥ JPY, ₹ INR</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Major currency symbols
expect(xmlString).toContain('€'); // Euro
expect(xmlString).toContain('£'); // Pound
expect(xmlString).toContain('$'); // Dollar
expect(xmlString).toContain('¥'); // Yen
expect(xmlString).toContain('₹'); // Rupee
expect(xmlString).toContain('₽'); // Ruble
expect(xmlString).toContain('₪'); // Shekel
expect(xmlString).toContain('₩'); // Won
// Verify monetary formatting
expect(xmlString).toContain('€1,234.56');
expect(xmlString).toContain('£987.65');
expect(xmlString).toContain('$2,345.67');
expect(xmlString).toContain('¥123,456');
expect(xmlString).toContain('₹98,765');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('currency-symbols', elapsed);
});
t.test('Mathematical and technical symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MATH-SYMBOLS</ID>
<Note>Math symbols: ± × ÷ ≤ ≥ ≠ ≈ ∞ √ ∑ ∏ ∫ ∂ ∇ ∈ ∉ ⊂ ⊃ ∩</Note>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
<PricingReference>
<AlternativeConditionPrice>
<PriceAmount currencyID="EUR">95.00</PriceAmount>
<PriceTypeCode>Discount ≥ 10 units</PriceTypeCode>
</AlternativeConditionPrice>
</PricingReference>
<Item>
<Description>Precision tool ± 0.001mm</Description>
<AdditionalItemProperty>
<Name>Temperature range</Name>
<Value>-40°C ≤ T ≤ +85°C</Value>
</AdditionalItemProperty>
<AdditionalItemProperty>
<Name>Dimensions</Name>
<Value>10cm × 5cm × 2cm</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Description>√2 ≈ 1.414, π ≈ 3.14159, e ≈ 2.71828</Description>
<AdditionalItemProperty>
<Name>Formula</Name>
<Value>Area = πr² (where r = radius)</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Mathematical operators
expect(xmlString).toContain('±'); // Plus-minus
expect(xmlString).toContain('×'); // Multiplication
expect(xmlString).toContain('÷'); // Division
expect(xmlString).toContain('≤'); // Less than or equal
expect(xmlString).toContain('≥'); // Greater than or equal
expect(xmlString).toContain('≠'); // Not equal
expect(xmlString).toContain('≈'); // Approximately
expect(xmlString).toContain('∞'); // Infinity
expect(xmlString).toContain('√'); // Square root
expect(xmlString).toContain('π'); // Pi
expect(xmlString).toContain('°'); // Degree
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('math-symbols', elapsed);
});
t.test('Asian scripts and characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ASIAN-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>株式会社山田商事 (Yamada Trading Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>東京都千代田区丸の内1-1-1</StreetName>
<CityName>東京</CityName>
<Country><IdentificationCode>JP</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>北京科技有限公司 (Beijing Tech Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>北京市朝阳区建国路88号</StreetName>
<CityName>北京</CityName>
<Country><IdentificationCode>CN</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name>전자제품 (Electronics)</Name>
<Description>최신 스마트폰 모델</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>कंप्यूटर उपकरण</Name>
<Description>नवीनतम लैपटॉप मॉडल</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>ซอฟต์แวร์คอมพิวเตอร์</Name>
<Description>โปรแกรมสำนักงาน</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Japanese (Kanji, Hiragana, Katakana)
expect(xmlString).toContain('株式会社山田商事');
expect(xmlString).toContain('東京都千代田区丸の内');
// Chinese (Simplified)
expect(xmlString).toContain('北京科技有限公司');
expect(xmlString).toContain('北京市朝阳区建国路');
// Korean (Hangul)
expect(xmlString).toContain('전자제품');
expect(xmlString).toContain('최신 스마트폰 모델');
// Hindi (Devanagari)
expect(xmlString).toContain('कंप्यूटर उपकरण');
expect(xmlString).toContain('नवीनतम लैपटॉप मॉडल');
// Thai
expect(xmlString).toContain('ซอฟต์แวร์คอมพิวเตอร์');
expect(xmlString).toContain('โปรแกรมสำนักงาน');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('asian-scripts', elapsed);
});
t.test('Arabic and RTL scripts', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>RTL-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>شركة التقنية المحدودة</Name>
</PartyName>
<PostalAddress>
<StreetName>شارع الملك فهد</StreetName>
<CityName>الرياض</CityName>
<Country><IdentificationCode>SA</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>חברת הטכנולוגיה בע"מ</Name>
</PartyName>
<PostalAddress>
<StreetName>רחוב דיזנגוף 123</StreetName>
<CityName>תל אביב</CityName>
<Country><IdentificationCode>IL</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<PaymentTerms>
<Note>الدفع: 30 يومًا صافي</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>منتج إلكتروني</Name>
<Description>جهاز كمبيوتر محمول</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>מוצר אלקטרוני</Name>
<Description>מחשב נייד מתקדם</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Arabic
expect(xmlString).toContain('شركة التقنية المحدودة');
expect(xmlString).toContain('شارع الملك فهد');
expect(xmlString).toContain('الرياض');
expect(xmlString).toContain('الدفع: 30 يومًا صافي');
expect(xmlString).toContain('منتج إلكتروني');
// Hebrew
expect(xmlString).toContain('חברת הטכנולוגיה בע"מ');
expect(xmlString).toContain('רחוב דיזנגוף');
expect(xmlString).toContain('תל אביב');
expect(xmlString).toContain('מוצר אלקטרוני');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('rtl-scripts', elapsed);
});
t.test('Emoji and emoticons', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EMOJI-TEST</ID>
<Note>Thank you for your order! 😊 🎉 🚀</Note>
<PaymentTerms>
<Note>Payment methods: 💳 💰 🏦</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Premium Package 🌟</Name>
<Description>Includes: 📱 💻 🖱️ ⌨️ 🎧</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Express Shipping 🚚💨</Name>
<Description>Delivery: 📦 → 🏠 (1-2 days)</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Customer Support 24/7 ☎️</Name>
<Description>Contact: 📧 📞 💬</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Common emojis
expect(xmlString).toContain('😊'); // Smiling face
expect(xmlString).toContain('🎉'); // Party
expect(xmlString).toContain('🚀'); // Rocket
expect(xmlString).toContain('💳'); // Credit card
expect(xmlString).toContain('💰'); // Money bag
expect(xmlString).toContain('🏦'); // Bank
expect(xmlString).toContain('🌟'); // Star
expect(xmlString).toContain('📱'); // Phone
expect(xmlString).toContain('💻'); // Laptop
expect(xmlString).toContain('🚚'); // Truck
expect(xmlString).toContain('📦'); // Package
expect(xmlString).toContain('🏠'); // House
expect(xmlString).toContain('☎️'); // Phone
expect(xmlString).toContain('📧'); // Email
expect(xmlString).toContain('💬'); // Chat
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('emoji', elapsed);
});
t.test('Corpus special character validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let specialCharCount = 0;
const specialCharFiles: string[] = [];
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for special characters
const sampleSize = Math.min(60, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check for non-ASCII characters
if (/[^\x00-\x7F]/.test(xmlString)) {
specialCharCount++;
// Check for specific character ranges
if (/[À-ÿ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended)`);
} else if (/[Ā-ſ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended-A)`);
} else if (/[\u0400-\u04FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Cyrillic)`);
} else if (/[\u4E00-\u9FFF]/.test(xmlString)) {
specialCharFiles.push(`${file} (CJK)`);
} else if (/[\u0600-\u06FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Arabic)`);
}
}
processedCount++;
} catch (error) {
console.log(`Special char issue in ${file}:`, error.message);
}
}
console.log(`Special character corpus test: ${specialCharCount}/${processedCount} files contain special characters`);
if (specialCharFiles.length > 0) {
console.log('Sample files with special characters:', specialCharFiles.slice(0, 5));
}
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-special', elapsed);
});
t.test('Zero-width and invisible characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>INVISIBLE-CHARS</ID>
<Note>Zero-widthspace (U+200B)</Note>
<PaymentTerms>
<Note>Nonbreakingzerowidthjoiner</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Soft­hyphen­test</Name>
<Description>Lefttorightmark and righttoleftmark</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// These characters might be preserved or stripped
// Check that the text is still readable
expect(xmlString).toMatch(/Zero.*width.*space/);
expect(xmlString).toMatch(/Non.*breaking.*zero.*width.*joiner/);
expect(xmlString).toMatch(/Soft.*hyphen.*test/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invisible-chars', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Special character operations should be reasonably fast
});
tap.start();

View File

@ -0,0 +1,432 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-06: Namespace Declarations - should handle XML namespace declarations correctly', async (t) => {
// ENC-06: Verify proper encoding and handling of XML namespace declarations
// This test ensures namespace prefixes, URIs, and default namespaces work correctly
const performanceTracker = new PerformanceTracker('ENC-06: Namespace Declarations');
const corpusLoader = new CorpusLoader();
t.test('Default namespace declaration', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<CustomizationID>urn:cen.eu:en16931:2017</CustomizationID>
<ID>DEFAULT-NS-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Test Supplier</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Test Customer</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
<LegalMonetaryTotal>
<PayableAmount currencyID="EUR">100.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify default namespace is preserved
expect(xmlString).toContain('xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"');
expect(xmlString).toContain('<Invoice');
expect(xmlString).toContain('<UBLVersionID>');
expect(xmlString).not.toContain('xmlns:'); // No prefixed namespaces
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('default-namespace', elapsed);
});
t.test('Multiple namespace declarations', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice
xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
xmlns:ext="urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2 UBL-Invoice-2.1.xsd">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:CustomizationID>urn:cen.eu:en16931:2017#conformant#urn:fdc:peppol.eu:2017:poacc:billing:international:peppol:3.0</cbc:CustomizationID>
<cbc:ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</cbc:ProfileID>
<cbc:ID>MULTI-NS-TEST</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Namespace Test Supplier</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
</ubl:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify all namespace declarations are preserved
expect(xmlString).toContain('xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"');
expect(xmlString).toContain('xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"');
expect(xmlString).toContain('xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"');
expect(xmlString).toContain('xmlns:ext="urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2"');
expect(xmlString).toContain('xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"');
// Verify prefixed elements
expect(xmlString).toContain('<ubl:Invoice');
expect(xmlString).toContain('<cbc:UBLVersionID>');
expect(xmlString).toContain('<cac:AccountingSupplierParty>');
expect(xmlString).toContain('</ubl:Invoice>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('multiple-namespaces', elapsed);
});
t.test('Nested namespace declarations', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NESTED-NS-TEST</ID>
<UBLExtensions>
<UBLExtension>
<ExtensionContent>
<sig:UBLDocumentSignatures xmlns:sig="urn:oasis:names:specification:ubl:schema:xsd:CommonSignatureComponents-2">
<sac:SignatureInformation xmlns:sac="urn:oasis:names:specification:ubl:schema:xsd:SignatureAggregateComponents-2">
<cbc:ID xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">SIG-001</cbc:ID>
<sbc:SignatureMethod xmlns:sbc="urn:oasis:names:specification:ubl:schema:xsd:SignatureBasicComponents-2">RSA-SHA256</sbc:SignatureMethod>
</sac:SignatureInformation>
</sig:UBLDocumentSignatures>
</ExtensionContent>
</UBLExtension>
</UBLExtensions>
<AdditionalDocumentReference>
<ID>DOC-001</ID>
<Attachment>
<EmbeddedDocumentBinaryObject mimeCode="application/pdf" filename="invoice.pdf">
<xades:QualifyingProperties xmlns:xades="http://uri.etsi.org/01903/v1.3.2#">
<xades:SignedProperties>
<xades:SignedSignatureProperties>
<xades:SigningTime>2025-01-25T10:00:00Z</xades:SigningTime>
</xades:SignedSignatureProperties>
</xades:SignedProperties>
</xades:QualifyingProperties>
</EmbeddedDocumentBinaryObject>
</Attachment>
</AdditionalDocumentReference>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify nested namespaces are handled correctly
expect(xmlString).toContain('xmlns:sig="urn:oasis:names:specification:ubl:schema:xsd:CommonSignatureComponents-2"');
expect(xmlString).toContain('xmlns:sac="urn:oasis:names:specification:ubl:schema:xsd:SignatureAggregateComponents-2"');
expect(xmlString).toContain('xmlns:xades="http://uri.etsi.org/01903/v1.3.2#"');
// Verify nested elements with namespaces
expect(xmlString).toContain('<sig:UBLDocumentSignatures');
expect(xmlString).toContain('<sac:SignatureInformation');
expect(xmlString).toContain('<xades:QualifyingProperties');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('nested-namespaces', elapsed);
});
t.test('Namespace prefixes with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<inv:Invoice
xmlns:inv="urn:example:invoice:2.0"
xmlns:addr-info="urn:example:address:1.0"
xmlns:pay_terms="urn:example:payment:1.0"
xmlns:item.details="urn:example:items:1.0">
<inv:Header>
<inv:ID>NS-SPECIAL-CHARS</inv:ID>
<inv:Date>2025-01-25</inv:Date>
</inv:Header>
<addr-info:SupplierAddress>
<addr-info:Name>Test GmbH & Co. KG</addr-info:Name>
<addr-info:Street>Hauptstraße 42</addr-info:Street>
<addr-info:City>München</addr-info:City>
</addr-info:SupplierAddress>
<pay_terms:PaymentConditions>
<pay_terms:Terms>Net 30 days</pay_terms:Terms>
<pay_terms:Discount>2% if &lt; 10 days</pay_terms:Discount>
</pay_terms:PaymentConditions>
<item.details:LineItems>
<item.details:Item>
<item.details:Description>Product "A" with special chars: €, £, ¥</item.details:Description>
<item.details:Price currency="EUR">99.99</item.details:Price>
</item.details:Item>
</item.details:LineItems>
</inv:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace prefixes with hyphens, underscores, dots
expect(xmlString).toContain('xmlns:addr-info=');
expect(xmlString).toContain('xmlns:pay_terms=');
expect(xmlString).toContain('xmlns:item.details=');
// Verify elements use correct prefixes
expect(xmlString).toContain('<addr-info:SupplierAddress');
expect(xmlString).toContain('<pay_terms:PaymentConditions');
expect(xmlString).toContain('<item.details:LineItems');
// Verify special characters in content are still escaped
expect(xmlString).toContain('GmbH &amp; Co. KG');
expect(xmlString).toContain('2% if &lt; 10 days');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-prefix-chars', elapsed);
});
t.test('Namespace URI encoding', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice
xmlns="urn:example:invoice:2.0"
xmlns:ext="http://example.com/extensions?version=2.0&amp;type=invoice"
xmlns:intl="http://example.com/i18n/español/facturas"
xmlns:spec="http://example.com/spec#fragment">
<ID>URI-ENCODING-TEST</ID>
<ext:Extension>
<ext:Type>Custom Extension</ext:Type>
<ext:Value>Test with encoded URI</ext:Value>
</ext:Extension>
<intl:Descripcion>Factura en español</intl:Descripcion>
<spec:SpecialField>Value with fragment reference</spec:SpecialField>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace URIs are properly encoded
expect(xmlString).toContain('xmlns:ext="http://example.com/extensions?version=2.0&amp;type=invoice"');
expect(xmlString).toContain('xmlns:intl="http://example.com/i18n/español/facturas"');
expect(xmlString).toContain('xmlns:spec="http://example.com/spec#fragment"');
// Verify elements with these namespaces
expect(xmlString).toContain('<ext:Extension>');
expect(xmlString).toContain('<intl:Descripcion>');
expect(xmlString).toContain('<spec:SpecialField>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('uri-encoding', elapsed);
});
t.test('Namespace inheritance and scoping', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<root:Invoice xmlns:root="urn:example:root:1.0" xmlns:shared="urn:example:shared:1.0">
<root:Header>
<shared:ID>NS-SCOPE-TEST</shared:ID>
<shared:Date>2025-01-25</shared:Date>
</root:Header>
<root:Body xmlns:local="urn:example:local:1.0">
<local:Item>
<shared:Name>Item using inherited namespace</shared:Name>
<local:Price>100.00</local:Price>
</local:Item>
<root:Subtotal xmlns:calc="urn:example:calc:1.0">
<calc:Amount>100.00</calc:Amount>
<calc:Tax rate="19%">19.00</calc:Tax>
</root:Subtotal>
</root:Body>
<root:Footer>
<!-- local namespace not available here -->
<shared:Total>119.00</shared:Total>
</root:Footer>
</root:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace scoping
expect(xmlString).toContain('xmlns:root="urn:example:root:1.0"');
expect(xmlString).toContain('xmlns:shared="urn:example:shared:1.0"');
expect(xmlString).toContain('xmlns:local="urn:example:local:1.0"');
expect(xmlString).toContain('xmlns:calc="urn:example:calc:1.0"');
// Verify proper element prefixing
expect(xmlString).toContain('<root:Invoice');
expect(xmlString).toContain('<shared:ID>');
expect(xmlString).toContain('<local:Item>');
expect(xmlString).toContain('<calc:Amount>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('namespace-scoping', elapsed);
});
t.test('Corpus namespace analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
const namespaceStats = {
defaultNamespace: 0,
prefixedNamespaces: 0,
multipleNamespaces: 0,
commonPrefixes: new Map<string, number>()
};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Analyze namespace usage in corpus
const sampleSize = Math.min(100, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Check for default namespace
if (/xmlns\s*=\s*["'][^"']+["']/.test(xmlString)) {
namespaceStats.defaultNamespace++;
}
// Check for prefixed namespaces
const prefixMatches = xmlString.match(/xmlns:(\w+)\s*=\s*["'][^"']+["']/g);
if (prefixMatches && prefixMatches.length > 0) {
namespaceStats.prefixedNamespaces++;
if (prefixMatches.length > 2) {
namespaceStats.multipleNamespaces++;
}
// Count common prefixes
prefixMatches.forEach(match => {
const prefixMatch = match.match(/xmlns:(\w+)/);
if (prefixMatch) {
const prefix = prefixMatch[1];
namespaceStats.commonPrefixes.set(
prefix,
(namespaceStats.commonPrefixes.get(prefix) || 0) + 1
);
}
});
}
processedCount++;
} catch (error) {
console.log(`Namespace parsing issue in ${file}:`, error.message);
}
}
console.log(`Namespace corpus analysis (${processedCount} files):`);
console.log(`- Default namespace: ${namespaceStats.defaultNamespace}`);
console.log(`- Prefixed namespaces: ${namespaceStats.prefixedNamespaces}`);
console.log(`- Multiple namespaces: ${namespaceStats.multipleNamespaces}`);
const topPrefixes = Array.from(namespaceStats.commonPrefixes.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
console.log('Top namespace prefixes:', topPrefixes);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-namespaces', elapsed);
});
t.test('Namespace preservation during conversion', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:CreditNote
xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:CreditNote-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:CreditNote-2 UBL-CreditNote-2.1.xsd">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:ID>NS-PRESERVE-TEST</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:CreditNoteTypeCode>381</cbc:CreditNoteTypeCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Müller GmbH</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingSupplierParty>
</ubl:CreditNote>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
// Process and get back
const xmlString = einvoice.getXmlString();
// All original namespaces should be preserved
expect(xmlString).toContain('xmlns:ubl=');
expect(xmlString).toContain('xmlns:cac=');
expect(xmlString).toContain('xmlns:cbc=');
expect(xmlString).toContain('xmlns:xsi=');
expect(xmlString).toContain('xsi:schemaLocation=');
// Verify namespace prefixes are maintained
expect(xmlString).toContain('<ubl:CreditNote');
expect(xmlString).toContain('<cbc:UBLVersionID>');
expect(xmlString).toContain('<cac:AccountingSupplierParty>');
expect(xmlString).toContain('</ubl:CreditNote>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('namespace-preservation', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // Namespace operations should be reasonably fast
});
tap.start();

View File

@ -0,0 +1,460 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-07: Attribute Encoding - should handle XML attribute encoding correctly', async (t) => {
// ENC-07: Verify proper encoding of XML attributes including special chars and quotes
// This test ensures attributes are properly encoded across different scenarios
const performanceTracker = new PerformanceTracker('ENC-07: Attribute Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic attribute encoding', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID schemeID="INVOICE" schemeAgencyID="6">ATTR-BASIC-001</ID>
<IssueDate>2025-01-25</IssueDate>
<DocumentCurrencyCode listID="ISO4217" listAgencyID="6" listVersionID="2001">EUR</DocumentCurrencyCode>
<TaxTotal>
<TaxAmount currencyID="EUR">19.00</TaxAmount>
<TaxSubtotal>
<TaxCategory>
<ID schemeID="UNCL5305" schemeAgencyID="6">S</ID>
<Percent>19</Percent>
<TaxScheme>
<ID schemeID="UN/ECE 5153" schemeAgencyID="6">VAT</ID>
</TaxScheme>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID>1</ID>
<InvoicedQuantity unitCode="C62" unitCodeListID="UNECERec20">10</InvoicedQuantity>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify basic attributes are preserved
expect(xmlString).toMatch(/schemeID\s*=\s*["']INVOICE["']/);
expect(xmlString).toMatch(/schemeAgencyID\s*=\s*["']6["']/);
expect(xmlString).toMatch(/listID\s*=\s*["']ISO4217["']/);
expect(xmlString).toMatch(/listVersionID\s*=\s*["']2001["']/);
expect(xmlString).toMatch(/currencyID\s*=\s*["']EUR["']/);
expect(xmlString).toMatch(/unitCode\s*=\s*["']C62["']/);
expect(xmlString).toMatch(/unitCodeListID\s*=\s*["']UNECERec20["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-attributes', elapsed);
});
t.test('Attributes with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-SPECIAL-001</ID>
<Note languageID="de-DE" encoding="UTF-8">Rechnung für Bücher &amp; Zeitschriften</Note>
<PaymentMeans>
<PaymentMeansCode name="Überweisung (Bank &amp; SEPA)">30</PaymentMeansCode>
<PaymentID reference="Order &lt;2025-001&gt;">PAY-123</PaymentID>
<PayeeFinancialAccount>
<Name type="IBAN &amp; BIC">DE89 3704 0044 0532 0130 00</Name>
<FinancialInstitutionBranch>
<Name branch="München &quot;Zentrum&quot;">Sparkasse</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason code="95" description="Discount for &gt; 100€ orders">Volume discount</AllowanceChargeReason>
<Amount currencyID="EUR" percentage="5%" calculation="100 * 0.05">5.00</Amount>
</AllowanceCharge>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify special characters in attributes are properly escaped
expect(xmlString).toMatch(/name\s*=\s*["']Überweisung \(Bank &amp; SEPA\)["']/);
expect(xmlString).toMatch(/reference\s*=\s*["']Order &lt;2025-001&gt;["']/);
expect(xmlString).toMatch(/type\s*=\s*["']IBAN &amp; BIC["']/);
expect(xmlString).toMatch(/branch\s*=\s*["']München (&quot;|")Zentrum(&quot;|")["']/);
expect(xmlString).toMatch(/description\s*=\s*["']Discount for &gt; 100€ orders["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']5%["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-char-attributes', elapsed);
});
t.test('Quote handling in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-QUOTES-001</ID>
<Note title='Single quotes with "double quotes" inside'>Test note</Note>
<AdditionalDocumentReference>
<ID description="Product &quot;Premium&quot; edition">DOC-001</ID>
<DocumentDescription title="User's guide">Manual for "advanced" users</DocumentDescription>
<Attachment>
<ExternalReference>
<URI scheme="http" description='Link to "official" site'>http://example.com/doc?id=123&amp;type="pdf"</URI>
</ExternalReference>
</Attachment>
</AdditionalDocumentReference>
<InvoiceLine>
<Item>
<Name type='"Special" product'>Item with quotes</Name>
<Description note="Contains both 'single' and &quot;double&quot; quotes">Complex quoting test</Description>
<AdditionalItemProperty>
<Name>Quote test</Name>
<Value type="text" format='He said: "It\'s working!"'>Quoted value</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify quote handling - implementation may use different strategies
// Either escape quotes or switch quote style
expect(xmlString).toBeTruthy();
// Should contain the attribute values somehow
expect(xmlString).toMatch(/Single quotes with .*double quotes.* inside/);
expect(xmlString).toMatch(/Product .*Premium.* edition/);
expect(xmlString).toMatch(/User.*s guide/);
expect(xmlString).toMatch(/Special.*product/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('quote-attributes', elapsed);
});
t.test('International characters in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-INTL-001</ID>
<Note languageID="multi" region="Europa/歐洲/यूरोप">International attributes</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name tradingName="Société Générale" localName="ソシエテ・ジェネラル">SG Group</Name>
</PartyName>
<PostalAddress>
<StreetName type="Avenue/大道/एवेन्यू">Champs-Élysées</StreetName>
<CityName region="Île-de-France">Paris</CityName>
<Country>
<IdentificationCode listName="ISO 3166-1 α2">FR</IdentificationCode>
<Name language="fr-FR">République française</Name>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note terms="30 días/天/दिन" currency="€/¥/₹">Multi-currency payment</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name category="Bücher/书籍/पुस्तकें">International Books</Name>
<Description author="François Müller (佛朗索瓦·穆勒)">Multilingual content</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify international characters in attributes
expect(xmlString).toContain('Europa/歐洲/यूरोप');
expect(xmlString).toContain('Société Générale');
expect(xmlString).toContain('ソシエテ・ジェネラル');
expect(xmlString).toContain('Avenue/大道/एवेन्यू');
expect(xmlString).toContain('Île-de-France');
expect(xmlString).toContain('α2'); // Greek alpha
expect(xmlString).toContain('République française');
expect(xmlString).toContain('30 días/天/दिन');
expect(xmlString).toContain('€/¥/₹');
expect(xmlString).toContain('Bücher/书籍/पुस्तकें');
expect(xmlString).toContain('佛朗索瓦·穆勒');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('intl-attributes', elapsed);
});
t.test('Empty and whitespace attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-WHITESPACE-001</ID>
<Note title="" language="">Empty attributes</Note>
<DocumentReference>
<ID schemeID=" " schemeAgencyID=" ">REF-001</ID>
<DocumentDescription prefix=" " suffix=" "> Trimmed content </DocumentDescription>
</DocumentReference>
<PaymentMeans>
<PaymentID reference="
multiline
reference
">PAY-001</PaymentID>
<InstructionNote format=" preserved spaces ">Note with spaces</InstructionNote>
</PaymentMeans>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR" decimals="" symbol="€">100.00</LineExtensionAmount>
<Item>
<Description short=" " long=" ">Item description</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify empty attributes are preserved
expect(xmlString).toMatch(/title\s*=\s*["'](\s*)["']/);
expect(xmlString).toMatch(/language\s*=\s*["'](\s*)["']/);
// Whitespace handling may vary
expect(xmlString).toContain('schemeID=');
expect(xmlString).toContain('reference=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('whitespace-attributes', elapsed);
});
t.test('Numeric and boolean attribute values', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NUMERIC-001</ID>
<AllowanceCharge>
<ChargeIndicator>true</ChargeIndicator>
<SequenceNumeric>1</SequenceNumeric>
<Amount currencyID="EUR" decimals="2" precision="0.01">19.99</Amount>
<BaseAmount currencyID="EUR" percentage="19.5" factor="0.195">100.00</BaseAmount>
</AllowanceCharge>
<TaxTotal>
<TaxAmount currencyID="EUR" rate="19" rateType="percent">19.00</TaxAmount>
<TaxSubtotal>
<TaxableAmount currencyID="EUR" rounded="false">100.00</TaxableAmount>
<TaxCategory>
<ID>S</ID>
<Percent format="decimal">19.0</Percent>
<TaxExemptionReason code="0" active="true">Not exempt</TaxExemptionReason>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID sequence="001" index="0">1</ID>
<InvoicedQuantity unitCode="C62" value="10.0" isInteger="true">10</InvoicedQuantity>
<Price>
<PriceAmount currencyID="EUR" negative="false">10.00</PriceAmount>
<BaseQuantity unitCode="C62" default="1">1</BaseQuantity>
</Price>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify numeric and boolean attributes
expect(xmlString).toMatch(/decimals\s*=\s*["']2["']/);
expect(xmlString).toMatch(/precision\s*=\s*["']0\.01["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']19\.5["']/);
expect(xmlString).toMatch(/factor\s*=\s*["']0\.195["']/);
expect(xmlString).toMatch(/rate\s*=\s*["']19["']/);
expect(xmlString).toMatch(/rounded\s*=\s*["']false["']/);
expect(xmlString).toMatch(/active\s*=\s*["']true["']/);
expect(xmlString).toMatch(/sequence\s*=\s*["']001["']/);
expect(xmlString).toMatch(/index\s*=\s*["']0["']/);
expect(xmlString).toMatch(/isInteger\s*=\s*["']true["']/);
expect(xmlString).toMatch(/negative\s*=\s*["']false["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('numeric-boolean-attributes', elapsed);
});
t.test('Namespace-prefixed attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice
xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:ds="http://www.w3.org/2000/09/xmldsig#"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2 Invoice.xsd">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NS-PREFIX-001</ID>
<ProfileID xsi:type="string">urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<AdditionalDocumentReference>
<ID>DOC-001</ID>
<Attachment>
<ExternalReference>
<URI xlink:type="simple" xlink:href="http://example.com/doc.pdf" xlink:title="Invoice Documentation">http://example.com/doc.pdf</URI>
</ExternalReference>
<EmbeddedDocumentBinaryObject
mimeCode="application/pdf"
encodingCode="base64"
filename="invoice.pdf"
ds:algorithm="SHA256">
JVBERi0xLjQKJeLjz9MKNCAwIG9iago=
</EmbeddedDocumentBinaryObject>
</Attachment>
</AdditionalDocumentReference>
<Signature>
<ID>SIG-001</ID>
<SignatureMethod ds:Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256">RSA-SHA256</SignatureMethod>
</Signature>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace-prefixed attributes
expect(xmlString).toContain('xsi:schemaLocation=');
expect(xmlString).toContain('xsi:type=');
expect(xmlString).toContain('xlink:type=');
expect(xmlString).toContain('xlink:href=');
expect(xmlString).toContain('xlink:title=');
expect(xmlString).toContain('ds:algorithm=');
expect(xmlString).toContain('ds:Algorithm=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ns-prefixed-attributes', elapsed);
});
t.test('Corpus attribute analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
const attributeStats = {
totalAttributes: 0,
escapedAttributes: 0,
unicodeAttributes: 0,
numericAttributes: 0,
emptyAttributes: 0,
commonAttributes: new Map<string, number>()
};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Analyze attribute usage in corpus
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Count attributes
const attrMatches = xmlString.match(/\s(\w+(?::\w+)?)\s*=\s*["'][^"']*["']/g);
if (attrMatches) {
attributeStats.totalAttributes += attrMatches.length;
attrMatches.forEach(attr => {
// Check for escaped content
if (attr.includes('&amp;') || attr.includes('&lt;') || attr.includes('&gt;') ||
attr.includes('&quot;') || attr.includes('&apos;')) {
attributeStats.escapedAttributes++;
}
// Check for Unicode
if (/[^\x00-\x7F]/.test(attr)) {
attributeStats.unicodeAttributes++;
}
// Check for numeric values
if (/=\s*["']\d+(?:\.\d+)?["']/.test(attr)) {
attributeStats.numericAttributes++;
}
// Check for empty values
if (/=\s*["']\s*["']/.test(attr)) {
attributeStats.emptyAttributes++;
}
// Extract attribute name
const nameMatch = attr.match(/(\w+(?::\w+)?)\s*=/);
if (nameMatch) {
const attrName = nameMatch[1];
attributeStats.commonAttributes.set(
attrName,
(attributeStats.commonAttributes.get(attrName) || 0) + 1
);
}
});
}
processedCount++;
} catch (error) {
console.log(`Attribute parsing issue in ${file}:`, error.message);
}
}
console.log(`Attribute corpus analysis (${processedCount} files):`);
console.log(`- Total attributes: ${attributeStats.totalAttributes}`);
console.log(`- Escaped attributes: ${attributeStats.escapedAttributes}`);
console.log(`- Unicode attributes: ${attributeStats.unicodeAttributes}`);
console.log(`- Numeric attributes: ${attributeStats.numericAttributes}`);
console.log(`- Empty attributes: ${attributeStats.emptyAttributes}`);
const topAttributes = Array.from(attributeStats.commonAttributes.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
console.log('Top 10 attribute names:', topAttributes);
expect(processedCount).toBeGreaterThan(0);
expect(attributeStats.totalAttributes).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-attributes', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // Attribute operations should be reasonably fast
});
tap.start();

View File

@ -0,0 +1,462 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-08: Mixed Content Encoding - should handle mixed content (text and elements) correctly', async (t) => {
// ENC-08: Verify proper encoding of mixed content scenarios
// This test ensures text nodes, elements, CDATA, and comments are properly encoded together
const performanceTracker = new PerformanceTracker('ENC-08: Mixed Content');
const corpusLoader = new CorpusLoader();
t.test('Basic mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-BASIC-001</ID>
<Note>
This invoice includes <emphasis>important</emphasis> payment terms:
<term>Net 30 days</term> with <percentage>2%</percentage> early payment discount.
Please pay by <date>2025-02-25</date>.
</Note>
<PaymentTerms>
<Note>
Payment due in <days>30</days> days.
<condition>If paid within <days>10</days> days: <discount>2%</discount> discount</condition>
<condition>If paid after <days>30</days> days: <penalty>1.5%</penalty> interest</condition>
</Note>
</PaymentTerms>
<InvoiceLine>
<Note>
Item includes <quantity>10</quantity> units of <product>Widget A</product>
at <price currency="EUR">€9.99</price> each.
Total: <total currency="EUR">€99.90</total>
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify mixed content is preserved
expect(xmlString).toContain('This invoice includes');
expect(xmlString).toContain('<emphasis>important</emphasis>');
expect(xmlString).toContain('payment terms:');
expect(xmlString).toContain('<term>Net 30 days</term>');
expect(xmlString).toContain('with');
expect(xmlString).toContain('<percentage>2%</percentage>');
expect(xmlString).toContain('Please pay by');
expect(xmlString).toContain('<date>2025-02-25</date>');
// Verify nested mixed content
expect(xmlString).toContain('If paid within');
expect(xmlString).toContain('<days>10</days>');
expect(xmlString).toContain('days:');
expect(xmlString).toContain('<discount>2%</discount>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-mixed', elapsed);
});
t.test('Mixed content with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-SPECIAL-001</ID>
<Note>
Price: <amount>100.00</amount> € (VAT <percentage>19%</percentage> = <vat>19.00</vat> €)
Total: <total>119.00</total> € for <company>Müller &amp; Söhne GmbH</company>
</Note>
<DocumentReference>
<DocumentDescription>
See contract <ref>§12.3</ref> for terms &amp; conditions.
<important>Payment &lt; 30 days</important> required.
Contact: <email>info@müller-söhne.de</email>
</DocumentDescription>
</DocumentReference>
<PaymentTerms>
<Note>
<condition type="discount">≥ 100 items → 5% discount</condition>
<condition type="penalty">&gt; 30 days → 1.5% interest</condition>
<formula>Total = Price × Quantity × (1 + VAT%)</formula>
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify special characters in mixed content
expect(xmlString).toContain('Price:');
expect(xmlString).toContain('€');
expect(xmlString).toContain('Müller &amp; Söhne GmbH');
expect(xmlString).toContain('§12.3');
expect(xmlString).toContain('terms &amp; conditions');
expect(xmlString).toContain('&lt; 30 days');
expect(xmlString).toContain('info@müller-söhne.de');
expect(xmlString).toContain('≥ 100 items → 5% discount');
expect(xmlString).toContain('&gt; 30 days → 1.5% interest');
expect(xmlString).toContain('×');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-mixed', elapsed);
});
t.test('Mixed content with CDATA sections', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-CDATA-001</ID>
<Note>
Regular text before CDATA.
<![CDATA[This section contains <unescaped> tags & special chars: < > & " ']]>
Text after CDATA with <element>nested element</element>.
</Note>
<AdditionalDocumentReference>
<DocumentDescription>
HTML content example:
<![CDATA[
<html>
<body>
<h1>Invoice Details</h1>
<p>Amount: €100.00</p>
<p>VAT: 19%</p>
</body>
</html>
]]>
End of description.
</DocumentDescription>
</AdditionalDocumentReference>
<PaymentTerms>
<Note>
Formula: <formula>price * quantity</formula>
<![CDATA[JavaScript: if (amount > 100) { discount = 5%; }]]>
Applied to all items.
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify mixed content with CDATA is handled
expect(xmlString).toContain('Regular text before CDATA');
expect(xmlString).toContain('Text after CDATA');
expect(xmlString).toContain('<element>nested element</element>');
// CDATA content should be preserved somehow
if (xmlString.includes('CDATA')) {
expect(xmlString).toContain('<![CDATA[');
expect(xmlString).toContain(']]>');
} else {
// Or converted to escaped text
expect(xmlString).toMatch(/&lt;unescaped&gt;|<unescaped>/);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cdata-mixed', elapsed);
});
t.test('Mixed content with comments', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-COMMENTS-001</ID>
<Note>
<!-- Start of payment terms -->
Payment is due in <days>30</days> days.
<!-- Discount information follows -->
<discount>Early payment: 2% if paid within 10 days</discount>
<!-- End of payment terms -->
</Note>
<DocumentReference>
<DocumentDescription>
See attachment <!-- PDF document --> for details.
<attachment>invoice.pdf</attachment> <!-- 2 pages -->
Contact <!-- via email -->: <email>info@example.com</email>
</DocumentDescription>
</DocumentReference>
<InvoiceLine>
<!-- Line item 1 -->
<Note>
Product: <name>Widget</name> <!-- Best seller -->
Quantity: <qty>10</qty> <!-- In stock -->
Price: <price>9.99</price> <!-- EUR -->
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify text content is preserved (comments may or may not be preserved)
expect(xmlString).toContain('Payment is due in');
expect(xmlString).toContain('<days>30</days>');
expect(xmlString).toContain('days.');
expect(xmlString).toContain('<discount>Early payment: 2% if paid within 10 days</discount>');
expect(xmlString).toContain('See attachment');
expect(xmlString).toContain('for details.');
expect(xmlString).toContain('<attachment>invoice.pdf</attachment>');
expect(xmlString).toContain('Contact');
expect(xmlString).toContain('<email>info@example.com</email>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('comments-mixed', elapsed);
});
t.test('Whitespace preservation in mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-WHITESPACE-001</ID>
<Note>Text with multiple spaces and
newlines should be preserved.
<element>Indented element</element>
More text with tabs between words.
</Note>
<PaymentTerms>
<Note xml:space="preserve"> Leading spaces
<term>Net 30</term> Trailing spaces
Middle spaces preserved.
End with spaces </Note>
</PaymentTerms>
<DocumentReference>
<DocumentDescription>Line 1
<break/>
Line 2
<break/>
Line 3</DocumentDescription>
</DocumentReference>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Whitespace handling varies by implementation
expect(xmlString).toContain('Text with');
expect(xmlString).toContain('spaces');
expect(xmlString).toContain('<element>Indented element</element>');
expect(xmlString).toContain('More text with');
expect(xmlString).toContain('words');
// xml:space="preserve" should maintain whitespace
if (xmlString.includes('xml:space="preserve"')) {
expect(xmlString).toMatch(/Leading spaces|^\s+Leading/m);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('whitespace-mixed', elapsed);
});
t.test('Deeply nested mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-NESTED-001</ID>
<Note>
Level 1: Invoice for <customer>
<name>ABC Corp</name> (Customer ID: <id>C-12345</id>)
<address>
Located at <street>123 Main St</street>,
<city>New York</city>, <state>NY</state> <zip>10001</zip>
</address>
</customer> dated <date>2025-01-25</date>.
</Note>
<PaymentTerms>
<Note>
<terms>
Standard terms: <standard>
Net <days>30</days> days from <reference>
invoice date (<date>2025-01-25</date>)
</reference>
</standard>
<special>
Special conditions: <condition num="1">
For orders &gt; <amount currency="EUR">€1000</amount>:
<discount>5%</discount> discount
</condition>
</special>
</terms>
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify deeply nested structure is preserved
expect(xmlString).toContain('Level 1: Invoice for');
expect(xmlString).toContain('<customer>');
expect(xmlString).toContain('<name>ABC Corp</name>');
expect(xmlString).toContain('(Customer ID:');
expect(xmlString).toContain('<id>C-12345</id>');
expect(xmlString).toContain('Located at');
expect(xmlString).toContain('<street>123 Main St</street>');
expect(xmlString).toContain('<city>New York</city>');
expect(xmlString).toContain('<state>NY</state>');
expect(xmlString).toContain('<zip>10001</zip>');
expect(xmlString).toContain('dated');
expect(xmlString).toContain('<date>2025-01-25</date>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('nested-mixed', elapsed);
});
t.test('International mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-INTL-001</ID>
<Note>
Invoice for <company lang="de">Müller GmbH</company> from <city>München</city>.
Total: <amount currency="EUR">€1.234,56</amount> (inkl. <tax>19% MwSt</tax>).
支付条款:<terms lang="zh">30天内付款</terms>。
お支払い: <terms lang="ja">30日以内</terms>。
</Note>
<PaymentTerms>
<Note>
<multilang>
<en>Payment due in <days>30</days> days</en>
<de>Zahlung fällig in <days>30</days> Tagen</de>
<fr>Paiement dû dans <days>30</days> jours</fr>
<es>Pago debido en <days>30</days> días</es>
</multilang>
</Note>
</PaymentTerms>
<InvoiceLine>
<Note>
Product: <name lang="multi">
<en>Book</en> / <de>Buch</de> / <fr>Livre</fr> /
<zh>书</zh> / <ja>本</ja> / <ar>كتاب</ar>
</name>
Price: <price>€25.00</price> per <unit>Stück</unit>
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify international mixed content
expect(xmlString).toContain('Müller GmbH');
expect(xmlString).toContain('München');
expect(xmlString).toContain('€1.234,56');
expect(xmlString).toContain('19% MwSt');
expect(xmlString).toContain('支付条款:');
expect(xmlString).toContain('30天内付款');
expect(xmlString).toContain('お支払い:');
expect(xmlString).toContain('30日以内');
expect(xmlString).toContain('Zahlung fällig in');
expect(xmlString).toContain('Tagen');
expect(xmlString).toContain('Paiement dû dans');
expect(xmlString).toContain('书');
expect(xmlString).toContain('本');
expect(xmlString).toContain('كتاب');
expect(xmlString).toContain('Stück');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('intl-mixed', elapsed);
});
t.test('Corpus mixed content analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
let mixedContentCount = 0;
const mixedContentExamples: string[] = [];
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Sample corpus for mixed content patterns
const sampleSize = Math.min(60, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Look for mixed content patterns
// Pattern: text followed by element followed by text within same parent
const mixedPattern = />([^<]+)<[^>]+>[^<]+<\/[^>]+>([^<]+)</;
if (mixedPattern.test(xmlString)) {
mixedContentCount++;
// Extract example
const match = xmlString.match(mixedPattern);
if (match && mixedContentExamples.length < 5) {
mixedContentExamples.push(`${file}: "${match[0].substring(0, 100)}..."`);
}
}
// Also check for CDATA sections
if (xmlString.includes('<![CDATA[')) {
if (!mixedContentExamples.some(ex => ex.includes('CDATA'))) {
mixedContentExamples.push(`${file}: Contains CDATA sections`);
}
}
processedCount++;
} catch (error) {
console.log(`Mixed content parsing issue in ${file}:`, error.message);
}
}
console.log(`Mixed content corpus analysis (${processedCount} files):`);
console.log(`- Files with mixed content patterns: ${mixedContentCount}`);
if (mixedContentExamples.length > 0) {
console.log('Mixed content examples:');
mixedContentExamples.forEach(ex => console.log(` ${ex}`));
}
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-mixed', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Mixed content operations may be slightly slower
});
tap.start();

View File

@ -0,0 +1,397 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-09: Encoding Errors - should handle encoding errors and mismatches gracefully', async (t) => {
// ENC-09: Verify proper handling of encoding errors and recovery strategies
// This test ensures the system can handle malformed encodings and mismatches
const performanceTracker = new PerformanceTracker('ENC-09: Encoding Errors');
const corpusLoader = new CorpusLoader();
t.test('Encoding mismatch detection', async () => {
const startTime = performance.now();
// UTF-8 content declared as ISO-8859-1
const utf8Content = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ENCODING-MISMATCH-001</ID>
<Note>UTF-8 content: € £ ¥ 中文 العربية русский</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Société Générale (société anonyme)</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice = new EInvoice();
try {
// Try loading with potential encoding mismatch
await einvoice.loadFromString(utf8Content);
const xmlString = einvoice.getXmlString();
// Should handle the content somehow
expect(xmlString).toContain('ENCODING-MISMATCH-001');
// Check if special characters survived
if (xmlString.includes('€') && xmlString.includes('中文')) {
console.log('Encoding mismatch handled: UTF-8 content preserved');
} else {
console.log('Encoding mismatch resulted in character loss');
}
} catch (error) {
console.log('Encoding mismatch error:', error.message);
expect(error.message).toMatch(/encoding|character|parse/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('encoding-mismatch', elapsed);
});
t.test('Invalid byte sequences', async () => {
const startTime = performance.now();
// Create buffer with invalid UTF-8 sequences
const invalidUtf8 = Buffer.concat([
Buffer.from('<?xml version="1.0" encoding="UTF-8"?>\n<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">\n<ID>INVALID-BYTES</ID>\n<Note>'),
Buffer.from([0xFF, 0xFE, 0xFD]), // Invalid UTF-8 bytes
Buffer.from('</Note>\n</Invoice>')
]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(invalidUtf8);
// If it succeeds, check how invalid bytes were handled
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('INVALID-BYTES');
console.log('Invalid bytes were handled/replaced');
} catch (error) {
console.log('Invalid byte sequence error:', error.message);
expect(error.message).toMatch(/invalid|malformed|byte|sequence/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invalid-bytes', elapsed);
});
t.test('Incomplete multi-byte sequences', async () => {
const startTime = performance.now();
// Create UTF-8 with incomplete multi-byte sequences
const incompleteSequences = [
Buffer.from('<?xml version="1.0" encoding="UTF-8"?>\n<Invoice>\n<Note>'),
Buffer.from('Test '),
Buffer.from([0xC3]), // Incomplete 2-byte sequence (missing second byte)
Buffer.from(' text '),
Buffer.from([0xE2, 0x82]), // Incomplete 3-byte sequence (missing third byte)
Buffer.from(' end</Note>\n</Invoice>')
];
const incompleteUtf8 = Buffer.concat(incompleteSequences);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(incompleteUtf8);
const xmlString = einvoice.getXmlString();
console.log('Incomplete sequences were handled');
expect(xmlString).toContain('Test');
expect(xmlString).toContain('text');
expect(xmlString).toContain('end');
} catch (error) {
console.log('Incomplete sequence error:', error.message);
expect(error.message).toMatch(/incomplete|invalid|sequence/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('incomplete-sequences', elapsed);
});
t.test('Wrong encoding declaration', async () => {
const startTime = performance.now();
// UTF-16 content with UTF-8 declaration
const utf16Content = Buffer.from(
'<?xml version="1.0" encoding="UTF-8"?>\n<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">\n<ID>WRONG-DECL</ID>\n<Note>UTF-16 content</Note>\n</Invoice>',
'utf16le'
);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(utf16Content);
// Might detect and handle the mismatch
const xmlString = einvoice.getXmlString();
console.log('Wrong encoding declaration handled');
} catch (error) {
console.log('Wrong encoding declaration:', error.message);
expect(error.message).toMatch(/encoding|parse|invalid/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('wrong-declaration', elapsed);
});
t.test('Mixed encoding in single document', async () => {
const startTime = performance.now();
// Document with mixed encodings (simulated by incorrect concatenation)
const mixedEncoding = Buffer.concat([
Buffer.from('<?xml version="1.0" encoding="UTF-8"?>\n<Invoice>\n<Part1>'),
Buffer.from('UTF-8 text: München', 'utf8'),
Buffer.from('</Part1>\n<Part2>'),
Buffer.from('Latin-1 text: ', 'utf8'),
Buffer.from('Düsseldorf', 'latin1'), // Different encoding
Buffer.from('</Part2>\n</Invoice>', 'utf8')
]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(mixedEncoding);
const xmlString = einvoice.getXmlString();
// Check which parts survived
expect(xmlString).toContain('München'); // Should be correct
// Düsseldorf might be garbled
console.log('Mixed encoding document processed');
} catch (error) {
console.log('Mixed encoding error:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('mixed-encoding', elapsed);
});
t.test('Unsupported encoding declarations', async () => {
const startTime = performance.now();
const unsupportedEncodings = [
'EBCDIC',
'Shift_JIS',
'Big5',
'KOI8-R',
'Windows-1252'
];
for (const encoding of unsupportedEncodings) {
const xmlContent = `<?xml version="1.0" encoding="${encoding}"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>UNSUPPORTED-${encoding}</ID>
<Note>Test with ${encoding} encoding</Note>
</Invoice>`;
const einvoice = new EInvoice();
try {
await einvoice.loadFromString(xmlContent);
// Some parsers might handle it anyway
const xmlString = einvoice.getXmlString();
console.log(`${encoding} encoding handled`);
expect(xmlString).toContain(`UNSUPPORTED-${encoding}`);
} catch (error) {
console.log(`${encoding} encoding error:`, error.message);
expect(error.message).toMatch(/unsupported|encoding|unknown/i);
}
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('unsupported-encodings', elapsed);
});
t.test('BOM conflicts', async () => {
const startTime = performance.now();
// UTF-8 BOM with UTF-16 declaration
const conflictBuffer = Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from('<?xml version="1.0" encoding="UTF-16"?>\n<Invoice>\n<ID>BOM-CONFLICT</ID>\n</Invoice>')
]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(conflictBuffer);
const xmlString = einvoice.getXmlString();
console.log('BOM conflict resolved');
expect(xmlString).toContain('BOM-CONFLICT');
} catch (error) {
console.log('BOM conflict error:', error.message);
}
// UTF-16 LE BOM with UTF-8 declaration
const conflictBuffer2 = Buffer.concat([
Buffer.from([0xFF, 0xFE]), // UTF-16 LE BOM
Buffer.from('<?xml version="1.0" encoding="UTF-8"?>\n<Invoice>\n<ID>BOM-CONFLICT-2</ID>\n</Invoice>', 'utf16le')
]);
try {
await einvoice.loadFromBuffer(conflictBuffer2);
console.log('UTF-16 BOM with UTF-8 declaration handled');
} catch (error) {
console.log('UTF-16 BOM conflict:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('bom-conflicts', elapsed);
});
t.test('Character normalization issues', async () => {
const startTime = performance.now();
// Different Unicode normalization forms
const nfcContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>NORM-NFC</ID>
<Note>Café (NFC: U+00E9)</Note>
<Name>André</Name>
</Invoice>`;
// Same content but with NFD (decomposed)
const nfdContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>NORM-NFD</ID>
<Note>Café (NFD: U+0065 U+0301)</Note>
<Name>André</Name>
</Invoice>`;
const einvoice1 = new EInvoice();
const einvoice2 = new EInvoice();
await einvoice1.loadFromString(nfcContent);
await einvoice2.loadFromString(nfdContent);
const xml1 = einvoice1.getXmlString();
const xml2 = einvoice2.getXmlString();
// Both should work but might normalize differently
expect(xml1).toContain('Café');
expect(xml2).toContain('Café');
expect(xml1).toContain('André');
expect(xml2).toContain('André');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('normalization', elapsed);
});
t.test('Encoding error recovery strategies', async () => {
const startTime = performance.now();
// Test various recovery strategies
const problematicContent = Buffer.concat([
Buffer.from('<?xml version="1.0" encoding="UTF-8"?>\n<Invoice>\n<Items>\n'),
Buffer.from('<Item name="Test'),
Buffer.from([0xFF, 0xFE]), // Invalid bytes
Buffer.from('Product">'),
Buffer.from('<Price>'),
Buffer.from([0xC0, 0x80]), // Overlong encoding (security issue)
Buffer.from('99.99</Price>'),
Buffer.from('</Item>\n</Items>\n</Invoice>')
]);
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(problematicContent);
const xmlString = einvoice.getXmlString();
console.log('Problematic content recovered');
// Check what survived
expect(xmlString).toContain('Test');
expect(xmlString).toContain('Product');
expect(xmlString).toContain('99.99');
} catch (error) {
console.log('Recovery failed:', error.message);
// Try fallback strategies
try {
// Remove invalid bytes
const cleaned = problematicContent.toString('utf8', 0, problematicContent.length)
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, '');
await einvoice.loadFromString(cleaned);
console.log('Fallback recovery succeeded');
} catch (fallbackError) {
console.log('Fallback also failed:', fallbackError.message);
}
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('error-recovery', elapsed);
});
t.test('Corpus encoding error analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
let encodingIssues = 0;
const issueTypes: Record<string, number> = {};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check corpus for encoding issues
const sampleSize = Math.min(100, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
// Try to detect encoding issues
if (Buffer.isBuffer(content)) {
// Check for BOM
if (content.length >= 3) {
if (content[0] === 0xEF && content[1] === 0xBB && content[2] === 0xBF) {
issueTypes['UTF-8 BOM'] = (issueTypes['UTF-8 BOM'] || 0) + 1;
} else if (content[0] === 0xFF && content[1] === 0xFE) {
issueTypes['UTF-16 LE BOM'] = (issueTypes['UTF-16 LE BOM'] || 0) + 1;
} else if (content[0] === 0xFE && content[1] === 0xFF) {
issueTypes['UTF-16 BE BOM'] = (issueTypes['UTF-16 BE BOM'] || 0) + 1;
}
}
// Try parsing
try {
await einvoice.loadFromBuffer(content);
} catch (parseError) {
encodingIssues++;
if (parseError.message.match(/encoding/i)) {
issueTypes['Encoding error'] = (issueTypes['Encoding error'] || 0) + 1;
}
}
} else {
await einvoice.loadFromString(content);
}
processedCount++;
} catch (error) {
encodingIssues++;
issueTypes['General error'] = (issueTypes['General error'] || 0) + 1;
}
}
console.log(`Encoding error corpus analysis (${processedCount} files):`);
console.log(`- Files with encoding issues: ${encodingIssues}`);
console.log('Issue types:', issueTypes);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-errors', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(200); // Error handling may be slower
});
tap.start();

View File

@ -0,0 +1,393 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-10: Cross-Format Encoding - should maintain encoding consistency across formats', async (t) => {
// ENC-10: Verify encoding consistency when converting between different invoice formats
// This test ensures character encoding is preserved during format conversions
const performanceTracker = new PerformanceTracker('ENC-10: Cross-Format Encoding');
const corpusLoader = new CorpusLoader();
t.test('UBL to CII encoding preservation', async () => {
const startTime = performance.now();
// UBL invoice with special characters
const ublContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:ID>CROSS-FORMAT-UBL-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:Note>Special chars: € £ ¥ © ® ™ § ¶ • ° ± × ÷</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Müller & Associés S.à r.l.</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>Rue de la Légion d'Honneur</cbc:StreetName>
<cbc:CityName>Saarbrücken</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Spëcïål cháracters: ñ ç ø å æ þ ð</cbc:Note>
<cac:Item>
<cbc:Name>Bücher über Köln</cbc:Name>
<cbc:Description>Prix: 25,50 € (TVA incluse)</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ublContent);
// Attempt format detection and conversion
const format = einvoice.getFormat();
console.log(`Detected format: ${format}`);
// Get the content back
const xmlString = einvoice.getXmlString();
// Verify all special characters are preserved
expect(xmlString).toContain('€ £ ¥ © ® ™ § ¶ • ° ± × ÷');
expect(xmlString).toContain('Müller & Associés S.à r.l.');
expect(xmlString).toContain('Rue de la Légion d\'Honneur');
expect(xmlString).toContain('Saarbrücken');
expect(xmlString).toContain('Spëcïål cháracters: ñ ç ø å æ þ ð');
expect(xmlString).toContain('Bücher über Köln');
expect(xmlString).toContain('25,50 €');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ubl-to-cii', elapsed);
});
t.test('CII to UBL encoding preservation', async () => {
const startTime = performance.now();
// CII invoice with international characters
const ciiContent = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocumentContext>
<ram:GuidelineSpecifiedDocumentContextParameter>
<ram:ID>urn:cen.eu:en16931:2017</ram:ID>
</ram:GuidelineSpecifiedDocumentContextParameter>
</rsm:ExchangedDocumentContext>
<rsm:ExchangedDocument>
<ram:ID>CROSS-FORMAT-CII-001</ram:ID>
<ram:IssueDateTime>2025-01-25</ram:IssueDateTime>
<ram:IncludedNote>
<ram:Content>Multi-language: Français, Español, Português, Română, Čeština</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>АО "Компания" (Россия)</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>ул. Тверская, д. 1</ram:LineOne>
<ram:CityName>Москва</ram:CityName>
<ram:CountryID>RU</ram:CountryID>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
<ram:IncludedSupplyChainTradeLineItem>
<ram:SpecifiedTradeProduct>
<ram:Name>北京烤鸭 (Beijing Duck)</ram:Name>
<ram:Description>Traditional Chinese dish: 传统中国菜</ram:Description>
</ram:SpecifiedTradeProduct>
</ram:IncludedSupplyChainTradeLineItem>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ciiContent);
const xmlString = einvoice.getXmlString();
// Verify international characters
expect(xmlString).toContain('Français, Español, Português, Română, Čeština');
expect(xmlString).toContain('АО "Компания" (Россия)');
expect(xmlString).toContain('ул. Тверская, д. 1');
expect(xmlString).toContain('Москва');
expect(xmlString).toContain('北京烤鸭 (Beijing Duck)');
expect(xmlString).toContain('Traditional Chinese dish: 传统中国菜');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cii-to-ubl', elapsed);
});
t.test('ZUGFeRD/Factur-X encoding in PDF', async () => {
const startTime = performance.now();
// XML content for ZUGFeRD with special German characters
const zugferdXml = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<rsm:ExchangedDocument>
<ram:ID xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">ZUGFERD-ENCODING-001</ram:ID>
<ram:Name xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">Rechnung für Büroartikel</ram:Name>
<ram:IncludedNote xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:Content>Sonderzeichen: ÄÖÜäöüß €§°²³µ</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:SellerTradeParty>
<ram:Name>Großhändler für Bürobedarf GmbH & Co. KG</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>Königsallee 42</ram:LineOne>
<ram:CityName>Düsseldorf</ram:CityName>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(zugferdXml);
const xmlString = einvoice.getXmlString();
// Verify German special characters
expect(xmlString).toContain('Rechnung für Büroartikel');
expect(xmlString).toContain('ÄÖÜäöüß €§°²³µ');
expect(xmlString).toContain('Großhändler für Bürobedarf GmbH & Co. KG');
expect(xmlString).toContain('Königsallee');
expect(xmlString).toContain('Düsseldorf');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('zugferd-encoding', elapsed);
});
t.test('XRechnung encoding requirements', async () => {
const startTime = performance.now();
// XRechnung with strict German public sector requirements
const xrechnungContent = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0</cbc:CustomizationID>
<cbc:ID>XRECHNUNG-ENCODING-001</cbc:ID>
<cbc:Note>Leitweg-ID: 991-12345-67</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyLegalEntity>
<cbc:RegistrationName>Behörde für Straßenbau und Verkehr</cbc:RegistrationName>
</cac:PartyLegalEntity>
<cac:Contact>
<cbc:Name>Herr Müller-Lüdenscheid</cbc:Name>
<cbc:Telephone>+49 (0)30 12345-678</cbc:Telephone>
<cbc:ElectronicMail>müller-lüdenscheid@behoerde.de</cbc:ElectronicMail>
</cac:Contact>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:Note>Straßenbauarbeiten gemäß § 3 Abs. 2 VOB/B</cbc:Note>
<cac:Item>
<cbc:Name>Asphaltierungsarbeiten (Fahrbahn)</cbc:Name>
<cbc:Description>Maße: 100m × 8m × 0,08m</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</ubl:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xrechnungContent);
const xmlString = einvoice.getXmlString();
// Verify XRechnung specific encoding
expect(xmlString).toContain('urn:xeinkauf.de:kosit:xrechnung_3.0');
expect(xmlString).toContain('Leitweg-ID: 991-12345-67');
expect(xmlString).toContain('Behörde für Straßenbau und Verkehr');
expect(xmlString).toContain('Herr Müller-Lüdenscheid');
expect(xmlString).toContain('müller-lüdenscheid@behoerde.de');
expect(xmlString).toContain('gemäß § 3 Abs. 2 VOB/B');
expect(xmlString).toContain('100m × 8m × 0,08m');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('xrechnung-encoding', elapsed);
});
t.test('Mixed format conversion chain', async () => {
const startTime = performance.now();
// Start with complex content
const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CHAIN-TEST-001</ID>
<Note>Characters to preserve:
Latin: àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
Greek: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ αβγδεζηθικλμνξοπρστυφχψω
Cyrillic: АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
Math: ∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷
Currency: €£¥₹₽₪₩
Emoji: 📧💰🌍
</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>测试公司 (Test Company) ทดสอบ บริษัท</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice1 = new EInvoice();
await einvoice1.loadFromString(originalContent);
// First conversion
const xml1 = einvoice1.getXmlString();
// Load into new instance
const einvoice2 = new EInvoice();
await einvoice2.loadFromString(xml1);
// Second conversion
const xml2 = einvoice2.getXmlString();
// Verify nothing was lost in the chain
expect(xml2).toContain('àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ');
expect(xml2).toContain('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ');
expect(xml2).toContain('АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
expect(xml2).toContain('∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷');
expect(xml2).toContain('€£¥₹₽₪₩');
expect(xml2).toContain('📧💰🌍');
expect(xml2).toContain('测试公司');
expect(xml2).toContain('ทดสอบ บริษัท');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('conversion-chain', elapsed);
});
t.test('Encoding consistency across formats in corpus', async () => {
const startTime = performance.now();
let processedCount = 0;
let consistentCount = 0;
const formatEncoding: Record<string, Record<string, number>> = {};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Sample corpus for cross-format encoding
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const format = einvoice.getFormat() || 'unknown';
const xmlString = einvoice.getXmlString();
// Extract encoding declaration
const encodingMatch = xmlString.match(/encoding\s*=\s*["']([^"']+)["']/i);
const encoding = encodingMatch ? encodingMatch[1] : 'none';
// Track encoding by format
if (!formatEncoding[format]) {
formatEncoding[format] = {};
}
formatEncoding[format][encoding] = (formatEncoding[format][encoding] || 0) + 1;
// Check for special characters
if (/[^\x00-\x7F]/.test(xmlString)) {
consistentCount++;
}
processedCount++;
} catch (error) {
console.log(`Cross-format encoding issue in ${file}:`, error.message);
}
}
console.log(`Cross-format encoding analysis (${processedCount} files):`);
console.log(`- Files with non-ASCII characters: ${consistentCount}`);
console.log('Encoding by format:');
Object.entries(formatEncoding).forEach(([format, encodings]) => {
console.log(` ${format}:`, encodings);
});
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-cross-format', elapsed);
});
t.test('Round-trip encoding preservation', async () => {
const startTime = performance.now();
// Test content with various challenging characters
const testCases = [
{
name: 'European languages',
content: 'Zürich, München, København, Kraków, București'
},
{
name: 'Asian languages',
content: '東京 (Tokyo), 北京 (Beijing), 서울 (Seoul), กรุงเทพฯ (Bangkok)'
},
{
name: 'RTL languages',
content: 'العربية (Arabic), עברית (Hebrew), فارسی (Persian)'
},
{
name: 'Special symbols',
content: '™®©℗℠№℮¶§†‡•◊♠♣♥♦'
},
{
name: 'Mathematical',
content: '∀x∈: x²≥0, ∑ᵢ₌₁ⁿ i = n(n+1)/2'
}
];
for (const testCase of testCases) {
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>ROUND-TRIP-${testCase.name.toUpperCase().replace(/\s+/g, '-')}</ID>
<Note>${testCase.content}</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
// Round trip
const output = einvoice.getXmlString();
// Verify content is preserved
expect(output).toContain(testCase.content);
console.log(`Round-trip ${testCase.name}: OK`);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('round-trip', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Cross-format operations should be reasonably fast
});
tap.start();