einvoice/test/suite/einvoice_edge-cases/test.edge-04.unusual-charsets.ts

564 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { tap } from '@git.zone/tstest/tapbundle';
import { EInvoice } from '../../../ts/index.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('EDGE-04: Unusual Character Sets - should handle unusual and exotic character encodings', async () => {
// Test 1: Unicode edge cases with real invoice data
await PerformanceTracker.track('unicode-edge-cases', async () => {
const testCases = [
{
name: 'zero-width-characters',
text: 'Invoice\u200B\u200C\u200D\uFEFFNumber',
description: 'Zero-width spaces and joiners'
},
{
name: 'right-to-left',
text: 'مرحبا INV-001 שלום',
description: 'RTL Arabic and Hebrew mixed with LTR'
},
{
name: 'surrogate-pairs',
text: '𝐇𝐞𝐥𝐥𝐨 😀 🎉 Invoice',
description: 'Mathematical bold text and emojis'
},
{
name: 'combining-characters',
text: 'Ińvȯíçë̃ Nüm̈bër̊',
description: 'Combining diacritical marks'
},
{
name: 'control-characters',
text: 'Invoice Test', // Remove actual control chars as they break XML
description: 'Control characters (removed for XML safety)'
},
{
name: 'bidi-override',
text: '\u202Eتسا Invoice 123\u202C',
description: 'Bidirectional override characters'
}
];
for (const testCase of testCases) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = testCase.text;
einvoice.subject = testCase.description;
// Set required fields
einvoice.from = {
type: 'company',
name: 'Test Unicode Company',
description: testCase.description,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
// Add test item
einvoice.items = [{
position: 1,
name: `Item with ${testCase.name}`,
articleNumber: 'ART-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
// Export to UBL format
const ublString = await einvoice.toXmlString('ubl');
// Check if special characters are preserved
const preserved = ublString.includes(testCase.text);
console.log(`Unicode test ${testCase.name}: ${preserved ? 'preserved' : 'encoded'}`);
// Try to import it back
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(ublString);
const roundTripPreserved = newInvoice.invoiceId === testCase.text;
console.log(`Unicode test ${testCase.name} round-trip: ${roundTripPreserved ? 'success' : 'modified'}`);
} catch (error) {
console.log(`Unicode test ${testCase.name} failed: ${error.message}`);
}
}
});
// Test 2: Various character encodings in invoice content
await PerformanceTracker.track('various-character-encodings', async () => {
const encodingTests = [
{
encoding: 'UTF-8',
text: 'Übung macht den Meister - äöüß'
},
{
encoding: 'Latin',
text: 'Ñoño español - ¡Hola!'
},
{
encoding: 'Cyrillic',
text: 'Счёт-фактура № 2024'
},
{
encoding: 'Greek',
text: 'Τιμολόγιο: ΜΜΚΔ'
},
{
encoding: 'Chinese',
text: '發票編號:貳零貳肆'
}
];
for (const test of encodingTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = `ENC-${test.encoding}`;
einvoice.subject = test.text;
einvoice.from = {
type: 'company',
name: test.text,
description: `Company using ${test.encoding}`,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: test.text,
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: test.text
}
};
einvoice.to = {
type: 'company',
name: 'Customer Inc',
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE987654321',
registrationId: 'HRB 54321',
registrationName: 'Commercial Register'
}
};
einvoice.items = [{
position: 1,
name: test.text,
articleNumber: 'ART-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
// Test both UBL and CII formats
for (const format of ['ubl', 'cii'] as const) {
const xmlString = await einvoice.toXmlString(format);
// Check if text is preserved
const preserved = xmlString.includes(test.text);
console.log(`Encoding test ${test.encoding} in ${format}: ${preserved ? 'preserved' : 'modified'}`);
// Import back
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(xmlString);
const descPreserved = newInvoice.subject === test.text;
console.log(`Encoding test ${test.encoding} round-trip in ${format}: ${descPreserved ? 'success' : 'failed'}`);
}
} catch (error) {
console.log(`Encoding test ${test.encoding} failed: ${error.message}`);
}
}
});
// Test 3: Emoji and pictographic characters
await PerformanceTracker.track('emoji-and-pictographs', async () => {
const emojiTests = [
{
name: 'basic-emoji',
content: 'Invoice 📧 sent ✅'
},
{
name: 'flag-emoji',
content: 'Country: 🇺🇸 🇬🇧 🇩🇪 🇫🇷'
},
{
name: 'skin-tone-emoji',
content: 'Approved by 👍🏻👍🏼👍🏽👍🏾👍🏿'
},
{
name: 'zwj-sequences',
content: 'Family: 👨‍👩‍👧‍👦'
},
{
name: 'mixed-emoji-text',
content: '💰 Total: €1,234.56 💶'
}
];
for (const test of emojiTests) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = 'EMOJI-001';
einvoice.subject = test.content;
einvoice.from = {
type: 'company',
name: 'Emoji Company',
description: test.content,
address: {
streetName: 'Emoji Street',
houseNumber: '1',
postalCode: '12345',
city: 'Emoji City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Emoji',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Customer who likes emojis',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: test.content,
articleNumber: 'EMOJI-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
const ublString = await einvoice.toXmlString('ubl');
// Check if emoji content is preserved or encoded
const preserved = ublString.includes(test.content);
console.log(`Emoji test ${test.name}: ${preserved ? 'preserved' : 'encoded'}`);
// Count grapheme clusters (visual characters)
const graphemeCount = [...new Intl.Segmenter().segment(test.content)].length;
console.log(`Emoji test ${test.name} has ${graphemeCount} visual characters`);
} catch (error) {
console.log(`Emoji test ${test.name} failed: ${error.message}`);
}
}
});
// Test 4: Legacy and exotic scripts
await PerformanceTracker.track('exotic-scripts', async () => {
const scripts = [
{ name: 'chinese-traditional', text: '發票編號:貳零貳肆' },
{ name: 'japanese-mixed', text: '請求書番号:2024年' },
{ name: 'korean', text: '송장 번호: 2024' },
{ name: 'thai', text: 'ใบแจ้งหนี้: ๒๐๒๔' },
{ name: 'devanagari', text: 'चालान संख्या: २०२४' },
{ name: 'bengali', text: 'চালান নং: ২০২৪' },
{ name: 'tamil', text: 'விலைப்பட்டியல்: ௨௦௨௪' }
];
for (const script of scripts) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = `SCRIPT-${script.name}`;
einvoice.subject = script.text;
einvoice.from = {
type: 'company',
name: 'International Company',
description: script.text,
address: {
streetName: 'International Street',
houseNumber: '1',
postalCode: '12345',
city: 'International City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'company',
name: 'Local Company',
description: 'Customer',
address: {
streetName: 'Local Street',
houseNumber: '2',
postalCode: '54321',
city: 'Local City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE987654321',
registrationId: 'HRB 54321',
registrationName: 'Commercial Register'
}
};
einvoice.items = [{
position: 1,
name: script.text,
articleNumber: 'INT-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
const ciiString = await einvoice.toXmlString('cii');
const preserved = ciiString.includes(script.text);
console.log(`Script ${script.name}: ${preserved ? 'preserved' : 'encoded'}`);
// Test round-trip
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(ciiString);
const descPreserved = newInvoice.subject === script.text;
console.log(`Script ${script.name} round-trip: ${descPreserved ? 'success' : 'modified'}`);
} catch (error) {
console.log(`Script ${script.name} failed: ${error.message}`);
}
}
});
// Test 5: XML special characters in unusual positions
await PerformanceTracker.track('xml-special-characters', async () => {
const specialChars = [
{ char: '<', desc: 'less than' },
{ char: '>', desc: 'greater than' },
{ char: '&', desc: 'ampersand' },
{ char: '"', desc: 'quote' },
{ char: "'", desc: 'apostrophe' }
];
for (const special of specialChars) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = `XML-${special.desc}`;
einvoice.subject = `Price ${special.char} 100`;
einvoice.from = {
type: 'company',
name: `Company ${special.char} Test`,
description: 'Special char test',
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'person',
name: 'Test',
surname: 'Customer',
salutation: 'Mr' as const,
sex: 'male' as const,
title: 'Doctor' as const,
description: 'Test customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
}
};
einvoice.items = [{
position: 1,
name: `Item ${special.char} Test`,
articleNumber: 'SPEC-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
const xmlString = await einvoice.toXmlString('ubl');
// Check if special chars are properly escaped
const escaped = xmlString.includes(`&${special.desc.replace(' ', '')};`) ||
xmlString.includes(`&#${special.char.charCodeAt(0)};`);
console.log(`XML special ${special.desc}: ${escaped ? 'properly escaped' : 'check encoding'}`);
} catch (error) {
console.log(`XML special ${special.desc} failed: ${error.message}`);
}
}
});
// Test 6: Character set conversion in format transformation
await PerformanceTracker.track('format-transform-charsets', async () => {
const testContents = [
{ name: 'multilingual', text: 'Hello مرحبا 你好 Здравствуйте' },
{ name: 'symbols', text: '€ £ ¥ $ ₹ ₽ ¢ ₩' },
{ name: 'accented', text: 'àáäâ èéëê ìíïî òóöô ùúüû ñç' },
{ name: 'mixed-emoji', text: 'Invoice 📄 Total: 💰 Status: ✅' }
];
for (const content of testContents) {
const einvoice = new EInvoice();
einvoice.issueDate = new Date(2024, 0, 1);
einvoice.invoiceId = 'CHARSET-001';
einvoice.subject = content.text;
einvoice.from = {
type: 'company',
name: 'Charset Test Company',
description: content.text,
address: {
streetName: 'Test Street',
houseNumber: '1',
postalCode: '12345',
city: 'Test City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2020, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE123456789',
registrationId: 'HRB 12345',
registrationName: 'Commercial Register'
}
};
einvoice.to = {
type: 'company',
name: 'Customer Company',
description: 'Customer',
address: {
streetName: 'Customer Street',
houseNumber: '2',
postalCode: '54321',
city: 'Customer City',
country: 'DE'
},
status: 'active',
foundedDate: { year: 2019, month: 1, day: 1 },
registrationDetails: {
vatId: 'DE987654321',
registrationId: 'HRB 54321',
registrationName: 'Commercial Register'
}
};
einvoice.items = [{
position: 1,
name: content.text,
articleNumber: 'CHARSET-001',
unitType: 'EA',
unitQuantity: 1,
unitNetPrice: 100,
vatPercentage: 19
}];
try {
// Convert from UBL to CII
const ublString = await einvoice.toXmlString('ubl');
const newInvoice = new EInvoice();
await newInvoice.fromXmlString(ublString);
const ciiString = await newInvoice.toXmlString('cii');
// Check if content was preserved through transformation
const preserved = ciiString.includes(content.text);
console.log(`Format transform ${content.name}: ${preserved ? 'preserved' : 'modified'}`);
// Double check with round trip
const finalInvoice = new EInvoice();
await finalInvoice.fromXmlString(ciiString);
const roundTripPreserved = finalInvoice.subject === content.text;
console.log(`Format transform ${content.name} round-trip: ${roundTripPreserved ? 'success' : 'failed'}`);
} catch (error) {
console.log(`Format transform ${content.name} failed: ${error.message}`);
}
}
});
});
// Run the test
tap.start();