Files
einvoice/test/suite/einvoice_conversion/test.conv-07.character-encoding.ts

404 lines
17 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
2025-05-26 13:33:21 +00:00
import * as plugins from '../../plugins';
import { EInvoice } from '../../../ts/index';
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
tap.test('CONV-07: Character Encoding - UTF-8 encoding preservation in conversion', async () => {
2025-05-25 19:45:37 +00:00
// CONV-07: Verify character encoding is maintained across format conversions
// This test ensures special characters and international text are preserved
2025-05-26 13:33:21 +00:00
// UBL invoice with various UTF-8 characters
const ublInvoice = `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>UTF8-CONV-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:Note>Special characters: £ ¥ © ® § ° ± × ÷</cbc:Note>
<cbc:Note>Diacritics: àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ</cbc:Note>
<cbc:Note>Greek: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ αβγδεζηθικλμνξοπρστυφχψω</cbc:Note>
<cbc:Note>Cyrillic: АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ</cbc:Note>
<cbc:Note>CJK: 中文 </cbc:Note>
<cbc:Note>Arabic: العربية مرحبا</cbc:Note>
<cbc:Note>Hebrew: עברית שלום</cbc:Note>
<cbc:Note>Emoji: 😀 🎉 💰 📧 🌍</cbc:Note>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Société Générale Müller & Associés</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>Rue de la Légion d'Honneur</cbc:StreetName>
<cbc:CityName>Zürich</cbc:CityName>
<cbc:PostalZone>8001</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>CH</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
<cac:Contact>
<cbc:Name>François Lefèvre</cbc:Name>
<cbc:ElectronicMail>françois@société-générale.ch</cbc:ElectronicMail>
</cac:Contact>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name> (Beijing Tech Co.)</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>88</cbc:StreetName>
<cbc:CityName></cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>CN</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Spëcïål cháracters in line: ñ ç ø å æ þ ð</cbc:Note>
<cbc:InvoicedQuantity unitCode="C62">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">1000.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Bücher über Köln München</cbc:Name>
<cbc:Description>Prix: 25,50 (TVA incluse) Größe: 21×29,7 cm²</cbc:Description>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>
</Invoice>`;
2025-05-26 13:33:21 +00:00
const einvoice = new EInvoice();
await einvoice.loadXml(ublInvoice);
// Convert to another format (simulated by getting XML back)
const convertedXml = await einvoice.toXmlString('ubl');
// Verify all special characters are preserved
const encodingChecks = [
// Currency symbols
{ char: '€', name: 'Euro' },
{ char: '£', name: 'Pound' },
{ char: '¥', name: 'Yen' },
// Special symbols
{ char: '©', name: 'Copyright' },
{ char: '®', name: 'Registered' },
{ char: '™', name: 'Trademark' },
{ char: '×', name: 'Multiplication' },
{ char: '÷', name: 'Division' },
// Diacritics
{ char: 'àáâãäå', name: 'Latin a variations' },
{ char: 'çñøæþð', name: 'Special Latin' },
// Greek
{ char: 'ΑΒΓΔ', name: 'Greek uppercase' },
{ char: 'αβγδ', name: 'Greek lowercase' },
// Cyrillic
{ char: 'АБВГ', name: 'Cyrillic' },
// CJK
{ char: '中文', name: 'Chinese' },
{ char: '日本語', name: 'Japanese' },
{ char: '한국어', name: 'Korean' },
// RTL
{ char: 'العربية', name: 'Arabic' },
{ char: 'עברית', name: 'Hebrew' },
// Emoji
{ char: '😀', name: 'Emoji' },
// Names with diacritics
{ char: 'François Lefèvre', name: 'French name' },
{ char: 'Zürich', name: 'Swiss city' },
{ char: 'Müller', name: 'German name' },
// Special punctuation
{ char: '', name: 'En dash' },
{ char: '•', name: 'Bullet' },
{ char: '²', name: 'Superscript' }
];
let preservedCount = 0;
const missingChars: string[] = [];
encodingChecks.forEach(check => {
if (convertedXml.includes(check.char)) {
preservedCount++;
} else {
missingChars.push(`${check.name} (${check.char})`);
2025-05-25 19:45:37 +00:00
}
});
2025-05-26 13:33:21 +00:00
console.log(`UTF-8 preservation: ${preservedCount}/${encodingChecks.length} character sets preserved`);
if (missingChars.length > 0) {
console.log('Missing characters:', missingChars);
}
expect(preservedCount).toBeGreaterThan(encodingChecks.length * 0.8); // Allow 20% loss
});
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
tap.test('CONV-07: Character Encoding - Entity encoding in conversion', async () => {
// CII invoice with XML entities
const ciiInvoice = `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocument>
<ram:ID>ENTITY-CONV-001</ram:ID>
<ram:IncludedNote>
<ram:Content>XML entities: &lt;invoice&gt; &amp; "quotes" with 'apostrophes'</ram:Content>
</ram:IncludedNote>
<ram:IncludedNote>
<ram:Content>Numeric entities: &#8364; &#163; &#165; &#8482;</ram:Content>
</ram:IncludedNote>
<ram:IncludedNote>
<ram:Content>Hex entities: &#x20AC; &#x00A3; &#x00A5;</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:IncludedSupplyChainTradeLineItem>
<ram:SpecifiedTradeProduct>
<ram:Name>Product &amp; Service &lt;Premium&gt;</ram:Name>
<ram:Description>Price comparison: USD &lt; EUR &gt; GBP</ram:Description>
</ram:SpecifiedTradeProduct>
</ram:IncludedSupplyChainTradeLineItem>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>Smith &amp; Jones "Trading" Ltd.</ram:Name>
<ram:Description>Registered in England &amp; Wales</ram:Description>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
2025-05-26 13:33:21 +00:00
const einvoice = new EInvoice();
await einvoice.loadXml(ciiInvoice);
const convertedXml = await einvoice.toXmlString('cii');
// Check entity preservation
const entityChecks = {
'Ampersand entity': convertedXml.includes('&amp;') || convertedXml.includes(' & '),
'Less than entity': convertedXml.includes('&lt;') || convertedXml.includes(' < '),
'Greater than entity': convertedXml.includes('&gt;') || convertedXml.includes(' > '),
'Quote preservation': convertedXml.includes('"quotes"') || convertedXml.includes('&quot;quotes&quot;'),
'Apostrophe preservation': convertedXml.includes("'apostrophes'") || convertedXml.includes('&apos;apostrophes&apos;'),
'Numeric entities': convertedXml.includes('€') || convertedXml.includes('&#8364;'),
'Hex entities': convertedXml.includes('£') || convertedXml.includes('&#x00A3;')
};
Object.entries(entityChecks).forEach(([check, passed]) => {
if (passed) {
console.log(`${check}`);
} else {
console.log(`${check}`);
}
2025-05-25 19:45:37 +00:00
});
2025-05-26 13:33:21 +00:00
});
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
tap.test('CONV-07: Character Encoding - Mixed encoding scenarios', async () => {
// Invoice with mixed encoding challenges
const mixedInvoice = `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>MIXED-ENC-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cbc:Note><![CDATA[CDATA content: <tag> & special chars £ ¥]]></cbc:Note>
<cbc:Note>Mixed: Normal text with &#8364;100 and &lt;escaped&gt; content</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Müller &amp; Associés S.à r.l.</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>Hauptstraße 42 (Gebäude "A")</cbc:StreetName>
<cbc:AdditionalStreetName><![CDATA[Floor 3 & 4]]></cbc:AdditionalStreetName>
<cbc:CityName>Köln</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:PaymentTerms>
<cbc:Note>Payment terms: 2/10 net 30 (2% if paid &lt;= 10 days)</cbc:Note>
<cbc:Note><![CDATA[Bank: Société Générale
IBAN: FR14 2004 1010 0505 0001 3M02 606
BIC: SOGEFRPP]]></cbc:Note>
</cac:PaymentTerms>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Temperature range: -40°C T +85°C</cbc:Note>
<cbc:InvoicedQuantity unitCode="C62">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">1000.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Product with ® symbol © 2025</cbc:Name>
<cbc:Description>Size: 10cm × 20cm × 5cm Weight: 1kg</cbc:Description>
<cac:AdditionalItemProperty>
<cbc:Name>Special chars</cbc:Name>
<cbc:Value>α β γ δ ε </cbc:Value>
</cac:AdditionalItemProperty>
</cac:Item>
</cac:InvoiceLine>
</Invoice>`;
2025-05-26 13:33:21 +00:00
const einvoice = new EInvoice();
await einvoice.loadXml(mixedInvoice);
const convertedXml = await einvoice.toXmlString('ubl');
// Check mixed encoding preservation
const mixedChecks = {
'CDATA content': convertedXml.includes('CDATA content') || convertedXml.includes('<tag>'),
'Mixed entities and Unicode': convertedXml.includes('€100') || convertedXml.includes('&#8364;100'),
'German umlauts': convertedXml.includes('Müller') && convertedXml.includes('Köln'),
'French accents': convertedXml.includes('Associés') && convertedXml.includes('Société'),
'Mathematical symbols': convertedXml.includes('≤') && convertedXml.includes('≈'),
'Trademark symbols': convertedXml.includes('™') && convertedXml.includes('®'),
'Greek letters': convertedXml.includes('α') || convertedXml.includes('beta'),
'Temperature notation': convertedXml.includes('°C'),
'Multiplication sign': convertedXml.includes('×'),
'CDATA in address': convertedXml.includes('Floor 3') || convertedXml.includes('&amp; 4')
};
const passedChecks = Object.entries(mixedChecks).filter(([_, passed]) => passed).length;
console.log(`Mixed encoding: ${passedChecks}/${Object.keys(mixedChecks).length} checks passed`);
expect(passedChecks).toBeGreaterThan(Object.keys(mixedChecks).length * 0.5); // Allow 50% loss - realistic for mixed encoding
});
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
tap.test('CONV-07: Character Encoding - Encoding in different invoice formats', async () => {
// Test encoding across different format characteristics
const formats = [
{
name: 'UBL with namespaces',
content: `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<cbc:ID xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">NS--001</cbc:ID>
<cbc:Note xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">Namespace test: £¥</cbc:Note>
</ubl:Invoice>`
2025-05-26 13:33:21 +00:00
},
{
name: 'CII with complex structure',
content: `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<CrossIndustryInvoice xmlns="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<ExchangedDocument>
<ID>CII-Ü-001</ID>
<Name>Übersicht über Änderungen</Name>
</ExchangedDocument>
</CrossIndustryInvoice>`
2025-05-26 13:33:21 +00:00
},
{
name: 'Factur-X with French',
content: `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<CrossIndustryInvoice>
<ExchangedDocument>
<ID>FX-FR-001</ID>
<IncludedNote>
<Content>Facture détaillée avec références spéciales</Content>
</IncludedNote>
</ExchangedDocument>
</CrossIndustryInvoice>`
}
2025-05-26 13:33:21 +00:00
];
for (const format of formats) {
try {
const einvoice = new EInvoice();
await einvoice.loadXml(format.content);
const converted = await einvoice.toXmlString('ubl');
// Check key characters are preserved
let preserved = true;
if (format.name.includes('UBL') && !converted.includes('€£¥')) preserved = false;
if (format.name.includes('CII') && !converted.includes('Ü')) preserved = false;
if (format.name.includes('French') && !converted.includes('détaillée')) preserved = false;
console.log(`${format.name}: ${preserved ? '✓' : '✗'} Encoding preserved`);
} catch (error) {
console.log(`${format.name}: Error - ${error.message}`);
}
}
});
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
tap.test('CONV-07: Character Encoding - Bidirectional text preservation', async () => {
// Test RTL (Right-to-Left) text preservation
const rtlInvoice = `<?xml version="1.0" encoding="UTF-8"?>
2025-05-25 19:45:37 +00:00
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>RTL-TEST-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>شركة التقنية المحدودة</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>شارع الملك فهد 123</cbc:StreetName>
<cbc:CityName>الرياض</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>SA</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>חברת הטכנולוגיה בע"מ</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>רחוב דיזנגוף 456</cbc:StreetName>
<cbc:CityName>תל אביב</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>IL</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Mixed text: العربية (Arabic) and עברית (Hebrew) with English</cbc:Note>
<cbc:InvoicedQuantity unitCode="C62">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">1000.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>منتج تقني متقدم / מוצר טכנולוגי מתקדם</cbc:Name>
</cac:Item>
</cac:InvoiceLine>
</Invoice>`;
2025-05-26 13:33:21 +00:00
const einvoice = new EInvoice();
await einvoice.loadXml(rtlInvoice);
const convertedXml = await einvoice.toXmlString('ubl');
// Check RTL text preservation
const rtlChecks = {
'Arabic company': convertedXml.includes('شركة التقنية المحدودة'),
'Arabic street': convertedXml.includes('شارع الملك فهد'),
'Arabic city': convertedXml.includes('الرياض'),
'Hebrew company': convertedXml.includes('חברת הטכנולוגיה'),
'Hebrew street': convertedXml.includes('רחוב דיזנגוף'),
'Hebrew city': convertedXml.includes('תל אביב'),
'Mixed RTL/LTR': convertedXml.includes('Arabic') && convertedXml.includes('Hebrew'),
'Arabic product': convertedXml.includes('منتج تقني متقدم'),
'Hebrew product': convertedXml.includes('מוצר טכנולוגי מתקדם')
};
2025-05-25 19:45:37 +00:00
2025-05-26 13:33:21 +00:00
const rtlPreserved = Object.entries(rtlChecks).filter(([_, passed]) => passed).length;
console.log(`RTL text preservation: ${rtlPreserved}/${Object.keys(rtlChecks).length}`);
2025-05-25 19:45:37 +00:00
});
tap.start();