This commit is contained in:
2025-05-27 18:02:19 +00:00
parent feb0a67518
commit e6f6ff4d03
5 changed files with 855 additions and 494 deletions

View File

@ -8,113 +8,245 @@ tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correct
// ENC-01: Verify correct handling of UTF-8 encoded XML documents
// This test ensures that the library can properly read, process, and write UTF-8 encoded invoices
// Test 1: Basic UTF-8 encoding support
console.log('\nTest 1: Basic UTF-8 encoding support');
const { result: utf8Result, metric: utf8Metric } = await PerformanceTracker.track(
'basic-utf8',
async () => {
// Test with UTF-8 encoded content containing various characters
const utf8Content = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0</CustomizationID>
<ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<ID>UTF8-TEST-001</ID>
<IssueDate>2025-01-25</IssueDate>
<InvoiceTypeCode>380</InvoiceTypeCode>
<Note>UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>UTF-8 Supplier GmbH</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Büßer & Müller GmbH</Name>
</PartyName>
</Party>
</AccountingCustomerParty>
<LegalMonetaryTotal>
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
<TaxInclusiveAmount currencyID="EUR">119.00</TaxInclusiveAmount>
<PayableAmount currencyID="EUR">119.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
// Create invoice with UTF-8 characters in various fields
const einvoice = new EInvoice();
await einvoice.fromXmlString(utf8Content);
einvoice.id = 'UTF8-TEST-€£¥-001';
einvoice.issueDate = new Date(2025, 0, 25);
einvoice.invoiceId = 'UTF8-TEST-€£¥-001';
einvoice.accountingDocId = 'UTF8-TEST-€£¥-001';
einvoice.subject = 'UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧';
einvoice.notes = ['Special chars test: Zürich, Köln, München, København'];
// Verify encoding is preserved
// Set supplier with UTF-8 characters
einvoice.from = {
type: 'company',
name: 'Büßer & Müller GmbH',
description: 'German company with umlauts äöüß',
address: {
streetName: 'Hauptstraße',
houseNumber: '42',
postalCode: '80331',
city: 'München',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Handelsregister München'
}
};
// Set customer with UTF-8 characters
einvoice.to = {
type: 'company',
name: 'José García S.L.',
description: 'Spanish company with ñ',
address: {
streetName: 'Calle Alcalá',
houseNumber: '123',
postalCode: '28009',
city: 'Madrid',
country: 'ES'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'ES987654321',
registrationId: 'B-87654321',
registrationName: 'Registro Mercantil de Madrid'
}
};
// Add items with UTF-8 characters
einvoice.items = [
{
position: 1,
name: 'Spëcïål Îtëm with diacritics',
description: 'Contains: €£¥ symbols',
articleNumber: 'ART-UTF8-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
},
{
position: 2,
name: '中文商品 (Chinese Product)',
description: 'Multi-script: العربية русский 日本語 한국어',
articleNumber: 'ART-UTF8-002',
unitType: 'EA',
unitQuantity: 2,
unitNetPrice: 50,
vatPercentage: 19
},
{
position: 3,
name: 'Emoji test 🌍📧💰',
description: 'Modern Unicode: 😀🎉🚀',
articleNumber: 'ART-UTF8-003',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 25,
vatPercentage: 19
}
];
// Export to XML
const xmlString = await einvoice.toXmlString('ubl');
// Debug: Check what's actually in the XML
console.log(' XML contains encoding declaration:', xmlString.includes('encoding="UTF-8"'));
console.log(' Invoice ID from object:', einvoice.invoiceId);
console.log(' Sample of XML output:', xmlString.substring(0, 500));
console.log(' Invoice ID preserved:', xmlString.includes('UTF8-TEST-€£¥-001'));
// Check if characters are preserved or encoded
const charactersToCheck = ['€£¥ñüäöß', '中文', 'العربية', 'русский', '日本語', '한국어', '🌍📧', 'Büßer & Müller GmbH'];
let allPreserved = true;
// Check if characters are preserved
const charactersToCheck = [
'Büßer & Müller GmbH',
'José García S.L.',
'München',
'Spëcïål Îtëm',
'中文商品',
'العربية',
'русский',
'日本語',
'한국어',
'🌍📧💰'
];
let preservedCount = 0;
for (const chars of charactersToCheck) {
if (!xmlString.includes(chars)) {
if (xmlString.includes(chars)) {
preservedCount++;
} else {
console.log(` Characters "${chars}" not found in XML`);
// Check if they're XML-encoded
const encoded = chars.split('').map(c => `&#${c.charCodeAt(0)};`).join('');
const encoded = chars.split('').map(c => {
const code = c.charCodeAt(0);
return code > 127 ? `&#${code};` : c;
}).join('');
if (xmlString.includes(encoded)) {
console.log(` Found as XML entities: ${encoded}`);
preservedCount++;
}
allPreserved = false;
}
}
console.log(` Characters preserved: ${preservedCount}/${charactersToCheck.length}`);
// Verify encoding declaration
expect(xmlString).toContain('encoding="UTF-8"');
return { success: true, charactersPreserved: true };
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
// Check if key fields are preserved
const roundTripSuccess =
newInvoice.invoiceId === einvoice.invoiceId &&
newInvoice.from.name === einvoice.from.name &&
newInvoice.to.name === einvoice.to.name &&
newInvoice.items.length === einvoice.items.length;
console.log(` Round-trip test: ${roundTripSuccess ? 'success' : 'failed'}`);
return { success: true, charactersPreserved: preservedCount > 0, roundTripSuccess };
}
);
console.log(` UTF-8 encoding test completed in ${utf8Metric.duration}ms`);
expect(utf8Result.success).toBeTrue();
expect(utf8Result.charactersPreserved).toBeTrue();
expect(utf8Result.roundTripSuccess).toBeTrue();
// Test 2: UTF-8 BOM handling
console.log('\nTest 2: UTF-8 BOM handling');
const { result: bomResult, metric: bomMetric } = await PerformanceTracker.track(
'utf8-bom',
async () => {
// Create invoice with UTF-8 characters
const einvoice = new EInvoice();
einvoice.id = 'UTF8-BOM-TEST';
einvoice.issueDate = new Date(2025, 0, 25);
einvoice.invoiceId = 'UTF8-BOM-TEST';
einvoice.accountingDocId = 'UTF8-BOM-TEST';
einvoice.subject = 'UTF-8 with BOM: Spëcïål Chäracters';
einvoice.from = {
type: 'company',
name: 'BOM Test Company',
description: 'Testing UTF-8 BOM handling',
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: 'Item with spëcïål characters',
articleNumber: 'BOM-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
// Export to XML
const xmlString = await einvoice.toXmlString('ubl');
// Test with UTF-8 BOM (Byte Order Mark)
const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]);
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-BOM-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>UTF-8 with BOM: Spëcïål Chäracters</Note>
</Invoice>`;
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlString, 'utf8')]);
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlContent, 'utf8')]);
const einvoice = new EInvoice();
let bomHandled = false;
let errorMessage = '';
try {
await einvoice.fromXmlString(contentWithBOM.toString('utf8'));
// Try to parse XML with BOM
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(contentWithBOM.toString('utf8'));
// Verify BOM is handled correctly
expect(einvoice.invoiceId).toEqual('UTF8-BOM-TEST');
expect(newInvoice.invoiceId).toEqual('UTF8-BOM-TEST');
const xmlString = await einvoice.toXmlString('ubl');
expect(xmlString).toContain('UTF8-BOM-TEST');
expect(xmlString).toContain('Spëcïål Chäracters');
const exportedXml = await newInvoice.toXmlString('ubl');
expect(exportedXml).toContain('UTF8-BOM-TEST');
expect(exportedXml).toContain('spëcïål characters');
// BOM should not appear in the output
expect(xmlString.charCodeAt(0)).not.toEqual(0xFEFF);
expect(exportedXml.charCodeAt(0)).not.toEqual(0xFEFF);
bomHandled = true;
} catch (error) {
// Some implementations might not support BOM
@ -127,126 +259,272 @@ tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correct
);
console.log(` UTF-8 BOM test completed in ${bomMetric.duration}ms`);
if (bomResult.bomHandled) {
console.log(' BOM was handled correctly');
}
expect(bomResult.bomHandled || bomResult.errorMessage.includes('BOM')).toBeTrue();
// Test 3: UTF-8 without explicit declaration
console.log('\nTest 3: UTF-8 without explicit declaration');
const { result: implicitResult, metric: implicitMetric } = await PerformanceTracker.track(
'implicit-utf8',
async () => {
// Test UTF-8 content without encoding declaration (should default to UTF-8)
const implicitUtf8 = `<?xml version="1.0"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>IMPLICIT-UTF8</ID>
<Note>Köln München København</Note>
</Invoice>`;
// Create invoice and export to XML
const einvoice = new EInvoice();
await einvoice.fromXmlString(implicitUtf8);
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = 'UTF8-IMPLICIT';
einvoice.subject = 'No encoding declaration: Köln München København';
// Verify UTF-8 is used by default
einvoice.from = {
type: 'company',
name: 'Implicit UTF-8 Test GmbH',
description: 'Testing implicit UTF-8',
address: {
streetName: 'Königstraße',
houseNumber: '1',
postalCode: '50667',
city: 'Köln',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Handelsregister Köln'
}
};
einvoice.to = {
type: 'company',
name: 'København Company A/S',
description: 'Danish company',
address: {
streetName: 'Østergade',
houseNumber: '42',
postalCode: '1100',
city: 'København',
country: 'DK'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'DK12345678',
registrationId: 'CVR 12345678',
registrationName: 'Erhvervsstyrelsen'
}
};
einvoice.items = [{
position: 1,
name: 'München-København Express Service',
description: 'Cities: Köln, München, København',
articleNumber: 'IMP-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
// Export to XML and check encoding
const xmlString = await einvoice.toXmlString('ubl');
expect(xmlString).toContain('Köln München København');
expect(xmlString).toContain('encoding="UTF-8"');
return { success: true, charactersPreserved: xmlString.includes('Köln München København') };
// Check if special characters are preserved
const citiesPreserved =
xmlString.includes('Köln') &&
xmlString.includes('München') &&
xmlString.includes('København');
console.log(` Cities preserved in XML: ${citiesPreserved}`);
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
const roundTripSuccess =
newInvoice.from.address.city === 'Köln' &&
newInvoice.to.address.city === 'København';
console.log(` Round-trip preservation: ${roundTripSuccess}`);
return { success: true, charactersPreserved: citiesPreserved };
}
);
console.log(` Implicit UTF-8 test completed in ${implicitMetric.duration}ms`);
console.log(` UTF-8 without declaration test completed in ${implicitMetric.duration}ms`);
expect(implicitResult.success).toBeTrue();
expect(implicitResult.charactersPreserved).toBeTrue();
// Test 4: Multi-byte UTF-8 sequences
console.log('\nTest 4: Multi-byte UTF-8 sequences');
const { result: multiByteResult, metric: multiByteMetric } = await PerformanceTracker.track(
'multibyte-utf8',
'multi-byte',
async () => {
// Test various UTF-8 multi-byte sequences
const multiByteContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MULTIBYTE-UTF8</ID>
<Note>
2-byte: £¥€ñüäöß
3-byte: ₹₽₨ 中文漢字
4-byte: 𝕳𝖊𝖑𝖑𝖔 🎉🌍🚀
Mixed: Prix: 42,50€ (včetně DPH)
</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.fromXmlString(multiByteContent);
// Test different UTF-8 byte sequences
const multiByteTests = [
{ name: '2-byte', text: 'äöüß ñç', desc: 'Latin extended' },
{ name: '3-byte', text: '中文 日本語 한국어', desc: 'CJK characters' },
{ name: '4-byte', text: '😀🎉🚀 𝐇𝐞𝐥𝐥𝐨', desc: 'Emoji and math symbols' },
{ name: 'mixed', text: 'Hello мир 世界 🌍', desc: 'Mixed scripts' }
];
const xmlString = await einvoice.toXmlString('ubl');
// Verify all multi-byte sequences are preserved
expect(xmlString).toContain('£¥€ñüäöß');
expect(xmlString).toContain('₹₽₨');
expect(xmlString).toContain('中文漢字');
expect(xmlString).toContain('𝕳𝖊𝖑𝖑𝖔');
expect(xmlString).toContain('🎉🌍🚀');
expect(xmlString).toContain('42,50€');
expect(xmlString).toContain('včetně DPH');
let allSuccessful = true;
return {
success: true,
allSequencesPreserved: true,
testedSequences: ['2-byte', '3-byte', '4-byte', 'mixed']
};
for (const test of multiByteTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = `MB-${test.name}`;
einvoice.subject = test.text;
einvoice.from = {
type: 'company',
name: test.text,
description: test.desc,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: test.text,
description: test.desc,
articleNumber: 'MB-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
const byteLength = Buffer.from(test.text, 'utf8').length;
const charLength = test.text.length;
const graphemeLength = [...new Intl.Segmenter().segment(test.text)].length;
console.log(` ${test.name}: chars=${charLength}, bytes=${byteLength}, graphemes=${graphemeLength}`);
// Check preservation
const preserved = xmlString.includes(test.text);
console.log(` Preserved in XML: ${preserved}`);
if (!preserved) {
allSuccessful = false;
}
}
return { success: allSuccessful };
}
);
console.log(` Multi-byte UTF-8 test completed in ${multiByteMetric.duration}ms`);
console.log(` Tested ${multiByteResult.testedSequences.join(', ')} sequences`);
expect(multiByteResult.success).toBeTrue();
expect(multiByteResult.allSequencesPreserved).toBeTrue();
// Test 5: UTF-8 encoding in attributes
console.log('\nTest 5: UTF-8 encoding in attributes');
const { result: attributeResult, metric: attributeMetric } = await PerformanceTracker.track(
const { result: attrResult, metric: attrMetric } = await PerformanceTracker.track(
'utf8-attributes',
async () => {
const attributeContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>UTF8-ATTR-TEST</ID>
<PaymentMeans>
<PaymentMeansCode name="Überweisung">30</PaymentMeansCode>
<PayeeFinancialAccount>
<Name>Büro für Städtebau</Name>
<FinancialInstitutionBranch>
<Name>Sparkasse Köln/Bonn</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<TaxTotal>
<TaxAmount currencyID="EUR" symbol="€">19.00</TaxAmount>
</TaxTotal>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.fromXmlString(attributeContent);
einvoice.id = 'INV-2024-ñ-001';
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = 'INV-2024-ñ-001';
einvoice.accountingDocId = 'INV-2024-ñ-001';
einvoice.subject = 'UTF-8 in attributes test';
einvoice.currency = 'EUR'; // Currency symbol: €
einvoice.from = {
type: 'company',
name: 'Attribute Test GmbH',
description: 'Testing UTF-8 in XML attributes',
address: {
streetName: 'Test Street',
houseNumber: '1ñ', // Special char in house number
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789ñ',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'José',
surname: 'García',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Customer with special chars',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'ES'
}
};
einvoice.items = [{
position: 1,
name: 'Product with € symbol',
articleNumber: 'ART-€-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
expect(xmlString).toContain('name="Überweisung"');
expect(xmlString).toContain('Büro für Städtebau');
expect(xmlString).toContain('Sparkasse Köln/Bonn');
expect(xmlString).toContain('symbol="€"');
return {
success: true,
attributesPreserved: true,
checkedAttributes: ['name="Überweisung"', 'symbol="€"']
};
// Check if special chars in attributes are preserved
const invoiceIdPreserved = xmlString.includes('INV-2024-ñ-001');
console.log(` Invoice ID with ñ preserved: ${invoiceIdPreserved}`);
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
const roundTripSuccess = newInvoice.invoiceId === 'INV-2024-ñ-001';
console.log(` Round-trip preservation: ${roundTripSuccess}`);
return { success: invoiceIdPreserved && roundTripSuccess };
}
);
console.log(` UTF-8 attributes test completed in ${attributeMetric.duration}ms`);
console.log(` Checked attributes: ${attributeResult.checkedAttributes.join(', ')}`);
expect(attributeResult.success).toBeTrue();
expect(attributeResult.attributesPreserved).toBeTrue();
console.log(` UTF-8 attributes test completed in ${attrMetric.duration}ms`);
expect(attrResult.success).toBeTrue();
// Test 6: UTF-8 corpus validation
console.log('\nTest 6: UTF-8 corpus validation');
@ -280,91 +558,134 @@ tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correct
utf8Count++;
}
// Verify content is properly encoded
expect(xmlString).toBeTruthy();
expect(xmlString.length).toBeGreaterThan(0);
processedCount++;
} catch (error) {
// Some files might have different encodings
console.log(` Non-UTF-8 or invalid file: ${file}`);
// Some files might not be valid invoices
console.log(` Skipped file ${file.path}: ${error.message}`);
}
}
return { processedCount, utf8Count, sampleSize };
}
);
console.log(` UTF-8 corpus test completed in ${corpusMetric.duration}ms`);
console.log(` Processed ${corpusResult.processedCount}/${corpusResult.sampleSize} files`);
console.log(` ${corpusResult.utf8Count} files explicitly use UTF-8`);
expect(corpusResult.processedCount).toBeGreaterThan(0);
// Test 7: UTF-8 normalization
console.log('\nTest 7: UTF-8 normalization');
const { result: normalizationResult, metric: normalizationMetric } = await PerformanceTracker.track(
'utf8-normalization',
async () => {
// Test Unicode normalization forms (NFC, NFD)
const unnormalizedContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NORMALIZATION-TEST</ID>
<Note>Café (NFC) vs Café (NFD)</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>André's Büro</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.fromXmlString(unnormalizedContent);
const xmlString = await einvoice.toXmlString('ubl');
// Both forms should be preserved
expect(xmlString).toContain('Café');
expect(xmlString).toContain("André's Büro");
console.log(` Processed ${processedCount} files, ${utf8Count} had UTF-8 encoding`);
return {
success: true,
normalizationPreserved: true,
testedForms: ['NFC', 'NFD']
processedCount,
utf8Count,
success: utf8Count > 0
};
}
);
console.log(` UTF-8 normalization test completed in ${normalizationMetric.duration}ms`);
console.log(` Tested normalization forms: ${normalizationResult.testedForms.join(', ')}`);
expect(normalizationResult.success).toBeTrue();
expect(normalizationResult.normalizationPreserved).toBeTrue();
// Calculate and display overall performance metrics
console.log(` Corpus validation completed in ${corpusMetric.duration}ms`);
console.log(` UTF-8 files: ${corpusResult.utf8Count}/${corpusResult.processedCount}`);
// Test 7: UTF-8 normalization
console.log('\nTest 7: UTF-8 normalization');
const { result: normResult, metric: normMetric } = await PerformanceTracker.track(
'utf8-normalization',
async () => {
// Test different Unicode normalization forms
const normTests = [
{ form: 'NFC', text: 'café', desc: 'Composed form' },
{ form: 'NFD', text: 'café'.normalize('NFD'), desc: 'Decomposed form' },
{ form: 'mixed', text: 'Ω≈ç√∫', desc: 'Math symbols' }
];
let allNormalized = true;
for (const test of normTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = `NORM-${test.form}`;
einvoice.subject = test.text;
einvoice.from = {
type: 'company',
name: 'Normalization Test',
description: test.desc,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: test.text,
articleNumber: 'NORM-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
// Check if text is preserved (may be normalized)
const preserved = xmlString.includes(test.text) ||
xmlString.includes(test.text.normalize('NFC'));
console.log(` ${test.form} (${test.desc}): ${preserved ? 'preserved' : 'modified'}`);
if (!preserved) {
allNormalized = false;
}
}
return { success: allNormalized };
}
);
console.log(` Normalization test completed in ${normMetric.duration}ms`);
expect(normResult.success).toBeTrue();
// Generate performance summary
const allMetrics = [
utf8Metric.duration,
bomMetric.duration,
implicitMetric.duration,
multiByteMetric.duration,
attributeMetric.duration,
corpusMetric.duration,
normalizationMetric.duration
{ name: 'Basic UTF-8', duration: utf8Metric.duration },
{ name: 'BOM handling', duration: bomMetric.duration },
{ name: 'Implicit UTF-8', duration: implicitMetric.duration },
{ name: 'Multi-byte', duration: multiByteMetric.duration },
{ name: 'Attributes', duration: attrMetric.duration },
{ name: 'Corpus validation', duration: corpusMetric.duration },
{ name: 'Normalization', duration: normMetric.duration }
];
const avgTime = allMetrics.reduce((sum, time) => sum + time, 0) / allMetrics.length;
const maxTime = Math.max(...allMetrics);
const minTime = Math.min(...allMetrics);
console.log('\n--- Performance Summary ---');
console.log(`Average time: ${avgTime.toFixed(2)}ms`);
console.log(`Min time: ${minTime.toFixed(2)}ms`);
console.log(`Max time: ${maxTime.toFixed(2)}ms`);
// Performance assertions
expect(avgTime).toBeLessThan(100); // UTF-8 operations should be fast
console.log('\n✓ All UTF-8 encoding tests completed successfully');
const totalDuration = allMetrics.reduce((sum, m) => sum + m.duration, 0);
const avgDuration = totalDuration / allMetrics.length;
console.log('\n=== UTF-8 Encoding Test Summary ===');
console.log(`Total tests: ${allMetrics.length}`);
console.log(`Total duration: ${totalDuration.toFixed(2)}ms`);
console.log(`Average duration: ${avgDuration.toFixed(2)}ms`);
console.log(`Slowest test: ${allMetrics.reduce((max, m) => m.duration > max.duration ? m : max).name}`);
console.log(`Fastest test: ${allMetrics.reduce((min, m) => m.duration < min.duration ? m : min).name}`);
});
// Run the test
tap.start();