einvoice/test/suite/einvoice_encoding/test.enc-05.special-characters.ts

535 lines
18 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-05: Special Characters - should handle special and international characters correctly', async (t) => {
// ENC-05: Verify handling of special characters across different languages and scripts
// This test ensures proper support for international invoicing
const performanceTracker = new PerformanceTracker('ENC-05: Special Characters');
const corpusLoader = new CorpusLoader();
t.test('European special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EU-SPECIAL-CHARS</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>European chars test</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Åsa Öberg AB (Sweden)</Name>
</PartyName>
<PostalAddress>
<StreetName>Østergade 42</StreetName>
<CityName>København</CityName>
<Country><IdentificationCode>DK</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Müller & Schäfer GmbH</Name>
</PartyName>
<PostalAddress>
<StreetName>Hauptstraße 15</StreetName>
<CityName>Düsseldorf</CityName>
<Country><IdentificationCode>DE</IdentificationCode></Country>
</PostalAddress>
<Contact>
<Name>François Lefèvre</Name>
<ElectronicMail>f.lefevre@müller-schäfer.de</ElectronicMail>
</Contact>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name>Château Margaux (Bordeaux)</Name>
<Description>Vin rouge, millésime 2015, cépage: Cabernet Sauvignon</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Prošek (Croatian dessert wine)</Name>
<Description>Vino desertno, područje: Dalmacija</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Żubrówka (Polish vodka)</Name>
<Description>Wódka żytnia z trawą żubrową</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Nordic characters
expect(xmlString).toContain('Åsa Öberg');
expect(xmlString).toContain('Østergade');
expect(xmlString).toContain('København');
// German characters
expect(xmlString).toContain('Müller & Schäfer');
expect(xmlString).toContain('Hauptstraße');
expect(xmlString).toContain('Düsseldorf');
expect(xmlString).toContain('müller-schäfer.de');
// French characters
expect(xmlString).toContain('François Lefèvre');
expect(xmlString).toContain('Château Margaux');
expect(xmlString).toContain('millésime');
expect(xmlString).toContain('cépage');
// Croatian characters
expect(xmlString).toContain('Prošek');
expect(xmlString).toContain('područje');
// Polish characters
expect(xmlString).toContain('Żubrówka');
expect(xmlString).toContain('żytnia');
expect(xmlString).toContain('żubrową');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('european-chars', elapsed);
});
t.test('Currency and monetary symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CURRENCY-SYMBOLS</ID>
<Note>Currency symbols: £ $ ¥ </Note>
<TaxTotal>
<TaxAmount currencyID="EUR">1,234.56</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="GBP">£987.65</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="USD">$2,345.67</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="JPY">¥123,456</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="INR">98,765</TaxAmount>
</TaxTotal>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason>Discount (5% off orders > 500)</AllowanceChargeReason>
<Amount currencyID="EUR">25.50</Amount>
</AllowanceCharge>
<PaymentTerms>
<Note>Accepted: EUR, £ GBP, $ USD, ¥ JPY, INR</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Major currency symbols
expect(xmlString).toContain('€'); // Euro
expect(xmlString).toContain('£'); // Pound
expect(xmlString).toContain('$'); // Dollar
expect(xmlString).toContain('¥'); // Yen
expect(xmlString).toContain('₹'); // Rupee
expect(xmlString).toContain('₽'); // Ruble
expect(xmlString).toContain('₪'); // Shekel
expect(xmlString).toContain('₩'); // Won
// Verify monetary formatting
expect(xmlString).toContain('€1,234.56');
expect(xmlString).toContain('£987.65');
expect(xmlString).toContain('$2,345.67');
expect(xmlString).toContain('¥123,456');
expect(xmlString).toContain('₹98,765');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('currency-symbols', elapsed);
});
t.test('Mathematical and technical symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MATH-SYMBOLS</ID>
<Note>Math symbols: ± × ÷ </Note>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
<PricingReference>
<AlternativeConditionPrice>
<PriceAmount currencyID="EUR">95.00</PriceAmount>
<PriceTypeCode>Discount 10 units</PriceTypeCode>
</AlternativeConditionPrice>
</PricingReference>
<Item>
<Description>Precision tool ± 0.001mm</Description>
<AdditionalItemProperty>
<Name>Temperature range</Name>
<Value>-40°C T +85°C</Value>
</AdditionalItemProperty>
<AdditionalItemProperty>
<Name>Dimensions</Name>
<Value>10cm × 5cm × 2cm</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Description>2 1.414, π 3.14159, e 2.71828</Description>
<AdditionalItemProperty>
<Name>Formula</Name>
<Value>Area = πr² (where r = radius)</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Mathematical operators
expect(xmlString).toContain('±'); // Plus-minus
expect(xmlString).toContain('×'); // Multiplication
expect(xmlString).toContain('÷'); // Division
expect(xmlString).toContain('≤'); // Less than or equal
expect(xmlString).toContain('≥'); // Greater than or equal
expect(xmlString).toContain('≠'); // Not equal
expect(xmlString).toContain('≈'); // Approximately
expect(xmlString).toContain('∞'); // Infinity
expect(xmlString).toContain('√'); // Square root
expect(xmlString).toContain('π'); // Pi
expect(xmlString).toContain('°'); // Degree
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('math-symbols', elapsed);
});
t.test('Asian scripts and characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ASIAN-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name> (Yamada Trading Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>1-1-1</StreetName>
<CityName></CityName>
<Country><IdentificationCode>JP</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name> (Beijing Tech Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>88</StreetName>
<CityName></CityName>
<Country><IdentificationCode>CN</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name> (Electronics)</Name>
<Description> </Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name> </Name>
<Description> </Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name></Name>
<Description></Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Japanese (Kanji, Hiragana, Katakana)
expect(xmlString).toContain('株式会社山田商事');
expect(xmlString).toContain('東京都千代田区丸の内');
// Chinese (Simplified)
expect(xmlString).toContain('北京科技有限公司');
expect(xmlString).toContain('北京市朝阳区建国路');
// Korean (Hangul)
expect(xmlString).toContain('전자제품');
expect(xmlString).toContain('최신 스마트폰 모델');
// Hindi (Devanagari)
expect(xmlString).toContain('कंप्यूटर उपकरण');
expect(xmlString).toContain('नवीनतम लैपटॉप मॉडल');
// Thai
expect(xmlString).toContain('ซอฟต์แวร์คอมพิวเตอร์');
expect(xmlString).toContain('โปรแกรมสำนักงาน');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('asian-scripts', elapsed);
});
t.test('Arabic and RTL scripts', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>RTL-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>شركة التقنية المحدودة</Name>
</PartyName>
<PostalAddress>
<StreetName>شارع الملك فهد</StreetName>
<CityName>الرياض</CityName>
<Country><IdentificationCode>SA</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>חברת הטכנולוגיה בע"מ</Name>
</PartyName>
<PostalAddress>
<StreetName>רחוב דיזנגוף 123</StreetName>
<CityName>תל אביב</CityName>
<Country><IdentificationCode>IL</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<PaymentTerms>
<Note>الدفع: 30 يومًا صافي</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>منتج إلكتروني</Name>
<Description>جهاز كمبيوتر محمول</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>מוצר אלקטרוני</Name>
<Description>מחשב נייד מתקדם</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Arabic
expect(xmlString).toContain('شركة التقنية المحدودة');
expect(xmlString).toContain('شارع الملك فهد');
expect(xmlString).toContain('الرياض');
expect(xmlString).toContain('الدفع: 30 يومًا صافي');
expect(xmlString).toContain('منتج إلكتروني');
// Hebrew
expect(xmlString).toContain('חברת הטכנולוגיה בע"מ');
expect(xmlString).toContain('רחוב דיזנגוף');
expect(xmlString).toContain('תל אביב');
expect(xmlString).toContain('מוצר אלקטרוני');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('rtl-scripts', elapsed);
});
t.test('Emoji and emoticons', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EMOJI-TEST</ID>
<Note>Thank you for your order! 😊 🎉 🚀</Note>
<PaymentTerms>
<Note>Payment methods: 💳 💰 🏦</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Premium Package 🌟</Name>
<Description>Includes: 📱 💻 🖱 🎧</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Express Shipping 🚚💨</Name>
<Description>Delivery: 📦 🏠 (1-2 days)</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Customer Support 24/7 </Name>
<Description>Contact: 📧 📞 💬</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Common emojis
expect(xmlString).toContain('😊'); // Smiling face
expect(xmlString).toContain('🎉'); // Party
expect(xmlString).toContain('🚀'); // Rocket
expect(xmlString).toContain('💳'); // Credit card
expect(xmlString).toContain('💰'); // Money bag
expect(xmlString).toContain('🏦'); // Bank
expect(xmlString).toContain('🌟'); // Star
expect(xmlString).toContain('📱'); // Phone
expect(xmlString).toContain('💻'); // Laptop
expect(xmlString).toContain('🚚'); // Truck
expect(xmlString).toContain('📦'); // Package
expect(xmlString).toContain('🏠'); // House
expect(xmlString).toContain('☎️'); // Phone
expect(xmlString).toContain('📧'); // Email
expect(xmlString).toContain('💬'); // Chat
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('emoji', elapsed);
});
t.test('Corpus special character validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let specialCharCount = 0;
const specialCharFiles: string[] = [];
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for special characters
const sampleSize = Math.min(60, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check for non-ASCII characters
if (/[^\x00-\x7F]/.test(xmlString)) {
specialCharCount++;
// Check for specific character ranges
if (/[À-ÿ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended)`);
} else if (/[Ā-ſ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended-A)`);
} else if (/[\u0400-\u04FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Cyrillic)`);
} else if (/[\u4E00-\u9FFF]/.test(xmlString)) {
specialCharFiles.push(`${file} (CJK)`);
} else if (/[\u0600-\u06FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Arabic)`);
}
}
processedCount++;
} catch (error) {
console.log(`Special char issue in ${file}:`, error.message);
}
}
console.log(`Special character corpus test: ${specialCharCount}/${processedCount} files contain special characters`);
if (specialCharFiles.length > 0) {
console.log('Sample files with special characters:', specialCharFiles.slice(0, 5));
}
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-special', elapsed);
});
t.test('Zero-width and invisible characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>INVISIBLE-CHARS</ID>
<Note>Zero-widthspace (U+200B)</Note>
<PaymentTerms>
<Note>Nonbreakingzerowidthjoiner</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Soft­hyphen­test</Name>
<Description>Lefttorightmark and righttoleftmark</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// These characters might be preserved or stripped
// Check that the text is still readable
expect(xmlString).toMatch(/Zero.*width.*space/);
expect(xmlString).toMatch(/Non.*breaking.*zero.*width.*joiner/);
expect(xmlString).toMatch(/Soft.*hyphen.*test/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invisible-chars', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Special character operations should be reasonably fast
});
tap.start();