einvoice/test/suite/einvoice_encoding/test.enc-01.utf8-encoding.ts
2025-05-27 18:02:19 +00:00

691 lines
23 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-01: UTF-8 Encoding - should handle UTF-8 encoded documents correctly', async () => {
// ENC-01: Verify correct handling of UTF-8 encoded XML documents
// This test ensures that the library can properly read, process, and write UTF-8 encoded invoices
// Test 1: Basic UTF-8 encoding support
console.log('\nTest 1: Basic UTF-8 encoding support');
const { result: utf8Result, metric: utf8Metric } = await PerformanceTracker.track(
'basic-utf8',
async () => {
// Create invoice with UTF-8 characters in various fields
const einvoice = new EInvoice();
einvoice.id = 'UTF8-TEST-€£¥-001';
einvoice.issueDate = new Date(2025, 0, 25);
einvoice.invoiceId = 'UTF8-TEST-€£¥-001';
einvoice.accountingDocId = 'UTF8-TEST-€£¥-001';
einvoice.subject = 'UTF-8 Test: €£¥ñüäöß 中文 العربية русский 日本語 한국어 🌍📧';
einvoice.notes = ['Special chars test: Zürich, Köln, München, København'];
// Set supplier with UTF-8 characters
einvoice.from = {
type: 'company',
name: 'Büßer & Müller GmbH',
description: 'German company with umlauts äöüß',
address: {
streetName: 'Hauptstraße',
houseNumber: '42',
postalCode: '80331',
city: 'München',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Handelsregister München'
}
};
// Set customer with UTF-8 characters
einvoice.to = {
type: 'company',
name: 'José García S.L.',
description: 'Spanish company with ñ',
address: {
streetName: 'Calle Alcalá',
houseNumber: '123',
postalCode: '28009',
city: 'Madrid',
country: 'ES'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'ES987654321',
registrationId: 'B-87654321',
registrationName: 'Registro Mercantil de Madrid'
}
};
// Add items with UTF-8 characters
einvoice.items = [
{
position: 1,
name: 'Spëcïål Îtëm with diacritics',
description: 'Contains: €£¥ symbols',
articleNumber: 'ART-UTF8-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
},
{
position: 2,
name: '中文商品 (Chinese Product)',
description: 'Multi-script: العربية русский 日本語 한국어',
articleNumber: 'ART-UTF8-002',
unitType: 'EA',
unitQuantity: 2,
unitNetPrice: 50,
vatPercentage: 19
},
{
position: 3,
name: 'Emoji test 🌍📧💰',
description: 'Modern Unicode: 😀🎉🚀',
articleNumber: 'ART-UTF8-003',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 25,
vatPercentage: 19
}
];
// Export to XML
const xmlString = await einvoice.toXmlString('ubl');
// Debug: Check what's actually in the XML
console.log(' XML contains encoding declaration:', xmlString.includes('encoding="UTF-8"'));
console.log(' Invoice ID preserved:', xmlString.includes('UTF8-TEST-€£¥-001'));
// Check if characters are preserved
const charactersToCheck = [
'Büßer & Müller GmbH',
'José García S.L.',
'München',
'Spëcïål Îtëm',
'中文商品',
'العربية',
'русский',
'日本語',
'한국어',
'🌍📧💰'
];
let preservedCount = 0;
for (const chars of charactersToCheck) {
if (xmlString.includes(chars)) {
preservedCount++;
} else {
console.log(` Characters "${chars}" not found in XML`);
// Check if they're XML-encoded
const encoded = chars.split('').map(c => {
const code = c.charCodeAt(0);
return code > 127 ? `&#${code};` : c;
}).join('');
if (xmlString.includes(encoded)) {
console.log(` Found as XML entities: ${encoded}`);
preservedCount++;
}
}
}
console.log(` Characters preserved: ${preservedCount}/${charactersToCheck.length}`);
// Verify encoding declaration
expect(xmlString).toContain('encoding="UTF-8"');
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
// Check if key fields are preserved
const roundTripSuccess =
newInvoice.invoiceId === einvoice.invoiceId &&
newInvoice.from.name === einvoice.from.name &&
newInvoice.to.name === einvoice.to.name &&
newInvoice.items.length === einvoice.items.length;
console.log(` Round-trip test: ${roundTripSuccess ? 'success' : 'failed'}`);
return { success: true, charactersPreserved: preservedCount > 0, roundTripSuccess };
}
);
console.log(` UTF-8 encoding test completed in ${utf8Metric.duration}ms`);
expect(utf8Result.success).toBeTrue();
expect(utf8Result.charactersPreserved).toBeTrue();
expect(utf8Result.roundTripSuccess).toBeTrue();
// Test 2: UTF-8 BOM handling
console.log('\nTest 2: UTF-8 BOM handling');
const { result: bomResult, metric: bomMetric } = await PerformanceTracker.track(
'utf8-bom',
async () => {
// Create invoice with UTF-8 characters
const einvoice = new EInvoice();
einvoice.id = 'UTF8-BOM-TEST';
einvoice.issueDate = new Date(2025, 0, 25);
einvoice.invoiceId = 'UTF8-BOM-TEST';
einvoice.accountingDocId = 'UTF8-BOM-TEST';
einvoice.subject = 'UTF-8 with BOM: Spëcïål Chäracters';
einvoice.from = {
type: 'company',
name: 'BOM Test Company',
description: 'Testing UTF-8 BOM handling',
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: 'Item with spëcïål characters',
articleNumber: 'BOM-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
// Export to XML
const xmlString = await einvoice.toXmlString('ubl');
// Test with UTF-8 BOM (Byte Order Mark)
const utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]);
const contentWithBOM = Buffer.concat([utf8BOM, Buffer.from(xmlString, 'utf8')]);
let bomHandled = false;
let errorMessage = '';
try {
// Try to parse XML with BOM
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(contentWithBOM.toString('utf8'));
// Verify BOM is handled correctly
expect(newInvoice.invoiceId).toEqual('UTF8-BOM-TEST');
const exportedXml = await newInvoice.toXmlString('ubl');
expect(exportedXml).toContain('UTF8-BOM-TEST');
expect(exportedXml).toContain('spëcïål characters');
// BOM should not appear in the output
expect(exportedXml.charCodeAt(0)).not.toEqual(0xFEFF);
bomHandled = true;
} catch (error) {
// Some implementations might not support BOM
errorMessage = error.message;
console.log(' UTF-8 BOM handling not supported:', errorMessage);
}
return { bomHandled, errorMessage };
}
);
console.log(` UTF-8 BOM test completed in ${bomMetric.duration}ms`);
expect(bomResult.bomHandled || bomResult.errorMessage.includes('BOM')).toBeTrue();
// Test 3: UTF-8 without explicit declaration
console.log('\nTest 3: UTF-8 without explicit declaration');
const { result: implicitResult, metric: implicitMetric } = await PerformanceTracker.track(
'implicit-utf8',
async () => {
// Create invoice and export to XML
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = 'UTF8-IMPLICIT';
einvoice.subject = 'No encoding declaration: Köln München København';
einvoice.from = {
type: 'company',
name: 'Implicit UTF-8 Test GmbH',
description: 'Testing implicit UTF-8',
address: {
streetName: 'Königstraße',
houseNumber: '1',
postalCode: '50667',
city: 'Köln',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Handelsregister Köln'
}
};
einvoice.to = {
type: 'company',
name: 'København Company A/S',
description: 'Danish company',
address: {
streetName: 'Østergade',
houseNumber: '42',
postalCode: '1100',
city: 'København',
country: 'DK'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'DK12345678',
registrationId: 'CVR 12345678',
registrationName: 'Erhvervsstyrelsen'
}
};
einvoice.items = [{
position: 1,
name: 'München-København Express Service',
description: 'Cities: Köln, München, København',
articleNumber: 'IMP-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
// Export to XML and check encoding
const xmlString = await einvoice.toXmlString('ubl');
expect(xmlString).toContain('encoding="UTF-8"');
// Check if special characters are preserved
const citiesPreserved =
xmlString.includes('Köln') &&
xmlString.includes('München') &&
xmlString.includes('København');
console.log(` Cities preserved in XML: ${citiesPreserved}`);
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
const roundTripSuccess =
newInvoice.from.address.city === 'Köln' &&
newInvoice.to.address.city === 'København';
console.log(` Round-trip preservation: ${roundTripSuccess}`);
return { success: true, charactersPreserved: citiesPreserved };
}
);
console.log(` UTF-8 without declaration test completed in ${implicitMetric.duration}ms`);
expect(implicitResult.success).toBeTrue();
expect(implicitResult.charactersPreserved).toBeTrue();
// Test 4: Multi-byte UTF-8 sequences
console.log('\nTest 4: Multi-byte UTF-8 sequences');
const { result: multiByteResult, metric: multiByteMetric } = await PerformanceTracker.track(
'multi-byte',
async () => {
// Test different UTF-8 byte sequences
const multiByteTests = [
{ name: '2-byte', text: 'äöüß ñç', desc: 'Latin extended' },
{ name: '3-byte', text: '中文 日本語 한국어', desc: 'CJK characters' },
{ name: '4-byte', text: '😀🎉🚀 𝐇𝐞𝐥𝐥𝐨', desc: 'Emoji and math symbols' },
{ name: 'mixed', text: 'Hello мир 世界 🌍', desc: 'Mixed scripts' }
];
let allSuccessful = true;
for (const test of multiByteTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = `MB-${test.name}`;
einvoice.subject = test.text;
einvoice.from = {
type: 'company',
name: test.text,
description: test.desc,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: test.text,
description: test.desc,
articleNumber: 'MB-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
const byteLength = Buffer.from(test.text, 'utf8').length;
const charLength = test.text.length;
const graphemeLength = [...new Intl.Segmenter().segment(test.text)].length;
console.log(` ${test.name}: chars=${charLength}, bytes=${byteLength}, graphemes=${graphemeLength}`);
// Check preservation
const preserved = xmlString.includes(test.text);
console.log(` Preserved in XML: ${preserved}`);
if (!preserved) {
allSuccessful = false;
}
}
return { success: allSuccessful };
}
);
console.log(` Multi-byte UTF-8 test completed in ${multiByteMetric.duration}ms`);
expect(multiByteResult.success).toBeTrue();
// Test 5: UTF-8 encoding in attributes
console.log('\nTest 5: UTF-8 encoding in attributes');
const { result: attrResult, metric: attrMetric } = await PerformanceTracker.track(
'utf8-attributes',
async () => {
const einvoice = new EInvoice();
einvoice.id = 'INV-2024-ñ-001';
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = 'INV-2024-ñ-001';
einvoice.accountingDocId = 'INV-2024-ñ-001';
einvoice.subject = 'UTF-8 in attributes test';
einvoice.currency = 'EUR'; // Currency symbol: €
einvoice.from = {
type: 'company',
name: 'Attribute Test GmbH',
description: 'Testing UTF-8 in XML attributes',
address: {
streetName: 'Test Street',
houseNumber: '1ñ', // Special char in house number
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789ñ',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'José',
surname: 'García',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Customer with special chars',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'ES'
}
};
einvoice.items = [{
position: 1,
name: 'Product with € symbol',
articleNumber: 'ART-€-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
// Check if special chars in attributes are preserved
const invoiceIdPreserved = xmlString.includes('INV-2024-ñ-001');
console.log(` Invoice ID with ñ preserved: ${invoiceIdPreserved}`);
// Round-trip test
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
const roundTripSuccess = newInvoice.invoiceId === 'INV-2024-ñ-001';
console.log(` Round-trip preservation: ${roundTripSuccess}`);
return { success: invoiceIdPreserved && roundTripSuccess };
}
);
console.log(` UTF-8 attributes test completed in ${attrMetric.duration}ms`);
expect(attrResult.success).toBeTrue();
// Test 6: UTF-8 corpus validation
console.log('\nTest 6: UTF-8 corpus validation');
const { result: corpusResult, metric: corpusMetric } = await PerformanceTracker.track(
'corpus-utf8',
async () => {
let processedCount = 0;
let utf8Count = 0;
// Load XML files from various categories
const ciiFiles = await CorpusLoader.loadCategory('CII_XMLRECHNUNG');
const ublFiles = await CorpusLoader.loadCategory('UBL_XMLRECHNUNG');
const allFiles = [...ciiFiles, ...ublFiles];
// Test a sample of XML files for UTF-8 handling
const sampleSize = Math.min(50, allFiles.length);
const sample = allFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const buffer = await CorpusLoader.loadFile(file.path);
const content = buffer.toString('utf8');
const einvoice = new EInvoice();
await einvoice.fromXmlString(content);
const xmlString = await einvoice.toXmlString('ubl');
// Check if encoding is preserved or defaulted to UTF-8
if (xmlString.includes('encoding="UTF-8"') || xmlString.includes("encoding='UTF-8'")) {
utf8Count++;
}
processedCount++;
} catch (error) {
// Some files might not be valid invoices
console.log(` Skipped file ${file.path}: ${error.message}`);
}
}
console.log(` Processed ${processedCount} files, ${utf8Count} had UTF-8 encoding`);
return {
processedCount,
utf8Count,
success: utf8Count > 0
};
}
);
console.log(` Corpus validation completed in ${corpusMetric.duration}ms`);
console.log(` UTF-8 files: ${corpusResult.utf8Count}/${corpusResult.processedCount}`);
// Test 7: UTF-8 normalization
console.log('\nTest 7: UTF-8 normalization');
const { result: normResult, metric: normMetric } = await PerformanceTracker.track(
'utf8-normalization',
async () => {
// Test different Unicode normalization forms
const normTests = [
{ form: 'NFC', text: 'café', desc: 'Composed form' },
{ form: 'NFD', text: 'café'.normalize('NFD'), desc: 'Decomposed form' },
{ form: 'mixed', text: 'Ω≈ç√∫', desc: 'Math symbols' }
];
let allNormalized = true;
for (const test of normTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2025, 0, 1);
einvoice.invoiceId = `NORM-${test.form}`;
einvoice.subject = test.text;
einvoice.from = {
type: 'company',
name: 'Normalization Test',
description: test.desc,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: test.text,
articleNumber: 'NORM-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
const xmlString = await einvoice.toXmlString('ubl');
// Check if text is preserved (may be normalized)
const preserved = xmlString.includes(test.text) ||
xmlString.includes(test.text.normalize('NFC'));
console.log(` ${test.form} (${test.desc}): ${preserved ? 'preserved' : 'modified'}`);
if (!preserved) {
allNormalized = false;
}
}
return { success: allNormalized };
}
);
console.log(` Normalization test completed in ${normMetric.duration}ms`);
expect(normResult.success).toBeTrue();
// Generate performance summary
const allMetrics = [
{ name: 'Basic UTF-8', duration: utf8Metric.duration },
{ name: 'BOM handling', duration: bomMetric.duration },
{ name: 'Implicit UTF-8', duration: implicitMetric.duration },
{ name: 'Multi-byte', duration: multiByteMetric.duration },
{ name: 'Attributes', duration: attrMetric.duration },
{ name: 'Corpus validation', duration: corpusMetric.duration },
{ name: 'Normalization', duration: normMetric.duration }
];
const totalDuration = allMetrics.reduce((sum, m) => sum + m.duration, 0);
const avgDuration = totalDuration / allMetrics.length;
console.log('\n=== UTF-8 Encoding Test Summary ===');
console.log(`Total tests: ${allMetrics.length}`);
console.log(`Total duration: ${totalDuration.toFixed(2)}ms`);
console.log(`Average duration: ${avgDuration.toFixed(2)}ms`);
console.log(`Slowest test: ${allMetrics.reduce((max, m) => m.duration > max.duration ? m : max).name}`);
console.log(`Fastest test: ${allMetrics.reduce((min, m) => m.duration < min.duration ? m : min).name}`);
});
// Run the test
tap.start();