einvoice/test/suite/einvoice_encoding/test.enc-05.special-characters.ts
2025-05-25 19:45:37 +00:00

535 lines
18 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-05: Special Characters - should handle special and international characters correctly', async (t) => {
// ENC-05: Verify handling of special characters across different languages and scripts
// This test ensures proper support for international invoicing
const performanceTracker = new PerformanceTracker('ENC-05: Special Characters');
const corpusLoader = new CorpusLoader();
t.test('European special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EU-SPECIAL-CHARS</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>European chars test</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Åsa Öberg AB (Sweden)</Name>
</PartyName>
<PostalAddress>
<StreetName>Østergade 42</StreetName>
<CityName>København</CityName>
<Country><IdentificationCode>DK</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Müller & Schäfer GmbH</Name>
</PartyName>
<PostalAddress>
<StreetName>Hauptstraße 15</StreetName>
<CityName>Düsseldorf</CityName>
<Country><IdentificationCode>DE</IdentificationCode></Country>
</PostalAddress>
<Contact>
<Name>François Lefèvre</Name>
<ElectronicMail>f.lefevre@müller-schäfer.de</ElectronicMail>
</Contact>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name>Château Margaux (Bordeaux)</Name>
<Description>Vin rouge, millésime 2015, cépage: Cabernet Sauvignon</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Prošek (Croatian dessert wine)</Name>
<Description>Vino desertno, područje: Dalmacija</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Żubrówka (Polish vodka)</Name>
<Description>Wódka żytnia z trawą żubrową</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Nordic characters
expect(xmlString).toContain('Åsa Öberg');
expect(xmlString).toContain('Østergade');
expect(xmlString).toContain('København');
// German characters
expect(xmlString).toContain('Müller & Schäfer');
expect(xmlString).toContain('Hauptstraße');
expect(xmlString).toContain('Düsseldorf');
expect(xmlString).toContain('müller-schäfer.de');
// French characters
expect(xmlString).toContain('François Lefèvre');
expect(xmlString).toContain('Château Margaux');
expect(xmlString).toContain('millésime');
expect(xmlString).toContain('cépage');
// Croatian characters
expect(xmlString).toContain('Prošek');
expect(xmlString).toContain('područje');
// Polish characters
expect(xmlString).toContain('Żubrówka');
expect(xmlString).toContain('żytnia');
expect(xmlString).toContain('żubrową');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('european-chars', elapsed);
});
t.test('Currency and monetary symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CURRENCY-SYMBOLS</ID>
<Note>Currency symbols: € £ $ ¥ ₹ ₽ ₪ ₩ ₡ ₦ ₨ ₱ ₴ ₵ ₸ ₹ ₺ ₼</Note>
<TaxTotal>
<TaxAmount currencyID="EUR">€1,234.56</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="GBP">£987.65</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="USD">$2,345.67</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="JPY">¥123,456</TaxAmount>
</TaxTotal>
<TaxTotal>
<TaxAmount currencyID="INR">₹98,765</TaxAmount>
</TaxTotal>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason>Discount (5% off orders > €500)</AllowanceChargeReason>
<Amount currencyID="EUR">25.50</Amount>
</AllowanceCharge>
<PaymentTerms>
<Note>Accepted: € EUR, £ GBP, $ USD, ¥ JPY, ₹ INR</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Major currency symbols
expect(xmlString).toContain('€'); // Euro
expect(xmlString).toContain('£'); // Pound
expect(xmlString).toContain('$'); // Dollar
expect(xmlString).toContain('¥'); // Yen
expect(xmlString).toContain('₹'); // Rupee
expect(xmlString).toContain('₽'); // Ruble
expect(xmlString).toContain('₪'); // Shekel
expect(xmlString).toContain('₩'); // Won
// Verify monetary formatting
expect(xmlString).toContain('€1,234.56');
expect(xmlString).toContain('£987.65');
expect(xmlString).toContain('$2,345.67');
expect(xmlString).toContain('¥123,456');
expect(xmlString).toContain('₹98,765');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('currency-symbols', elapsed);
});
t.test('Mathematical and technical symbols', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MATH-SYMBOLS</ID>
<Note>Math symbols: ± × ÷ ≤ ≥ ≠ ≈ ∞ √ ∑ ∏ ∫ ∂ ∇ ∈ ∉ ⊂ ⊃ ∩</Note>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
<PricingReference>
<AlternativeConditionPrice>
<PriceAmount currencyID="EUR">95.00</PriceAmount>
<PriceTypeCode>Discount ≥ 10 units</PriceTypeCode>
</AlternativeConditionPrice>
</PricingReference>
<Item>
<Description>Precision tool ± 0.001mm</Description>
<AdditionalItemProperty>
<Name>Temperature range</Name>
<Value>-40°C ≤ T ≤ +85°C</Value>
</AdditionalItemProperty>
<AdditionalItemProperty>
<Name>Dimensions</Name>
<Value>10cm × 5cm × 2cm</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Description>√2 ≈ 1.414, π ≈ 3.14159, e ≈ 2.71828</Description>
<AdditionalItemProperty>
<Name>Formula</Name>
<Value>Area = πr² (where r = radius)</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Mathematical operators
expect(xmlString).toContain('±'); // Plus-minus
expect(xmlString).toContain('×'); // Multiplication
expect(xmlString).toContain('÷'); // Division
expect(xmlString).toContain('≤'); // Less than or equal
expect(xmlString).toContain('≥'); // Greater than or equal
expect(xmlString).toContain('≠'); // Not equal
expect(xmlString).toContain('≈'); // Approximately
expect(xmlString).toContain('∞'); // Infinity
expect(xmlString).toContain('√'); // Square root
expect(xmlString).toContain('π'); // Pi
expect(xmlString).toContain('°'); // Degree
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('math-symbols', elapsed);
});
t.test('Asian scripts and characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ASIAN-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>株式会社山田商事 (Yamada Trading Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>東京都千代田区丸の内1-1-1</StreetName>
<CityName>東京</CityName>
<Country><IdentificationCode>JP</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>北京科技有限公司 (Beijing Tech Co., Ltd.)</Name>
</PartyName>
<PostalAddress>
<StreetName>北京市朝阳区建国路88号</StreetName>
<CityName>北京</CityName>
<Country><IdentificationCode>CN</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Item>
<Name>전자제품 (Electronics)</Name>
<Description>최신 스마트폰 모델</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>कंप्यूटर उपकरण</Name>
<Description>नवीनतम लैपटॉप मॉडल</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>ซอฟต์แวร์คอมพิวเตอร์</Name>
<Description>โปรแกรมสำนักงาน</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Japanese (Kanji, Hiragana, Katakana)
expect(xmlString).toContain('株式会社山田商事');
expect(xmlString).toContain('東京都千代田区丸の内');
// Chinese (Simplified)
expect(xmlString).toContain('北京科技有限公司');
expect(xmlString).toContain('北京市朝阳区建国路');
// Korean (Hangul)
expect(xmlString).toContain('전자제품');
expect(xmlString).toContain('최신 스마트폰 모델');
// Hindi (Devanagari)
expect(xmlString).toContain('कंप्यूटर उपकरण');
expect(xmlString).toContain('नवीनतम लैपटॉप मॉडल');
// Thai
expect(xmlString).toContain('ซอฟต์แวร์คอมพิวเตอร์');
expect(xmlString).toContain('โปรแกรมสำนักงาน');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('asian-scripts', elapsed);
});
t.test('Arabic and RTL scripts', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>RTL-SCRIPTS</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>شركة التقنية المحدودة</Name>
</PartyName>
<PostalAddress>
<StreetName>شارع الملك فهد</StreetName>
<CityName>الرياض</CityName>
<Country><IdentificationCode>SA</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>חברת הטכנולוגיה בע"מ</Name>
</PartyName>
<PostalAddress>
<StreetName>רחוב דיזנגוף 123</StreetName>
<CityName>תל אביב</CityName>
<Country><IdentificationCode>IL</IdentificationCode></Country>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<PaymentTerms>
<Note>الدفع: 30 يومًا صافي</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>منتج إلكتروني</Name>
<Description>جهاز كمبيوتر محمول</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>מוצר אלקטרוני</Name>
<Description>מחשב נייד מתקדם</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Arabic
expect(xmlString).toContain('شركة التقنية المحدودة');
expect(xmlString).toContain('شارع الملك فهد');
expect(xmlString).toContain('الرياض');
expect(xmlString).toContain('الدفع: 30 يومًا صافي');
expect(xmlString).toContain('منتج إلكتروني');
// Hebrew
expect(xmlString).toContain('חברת הטכנולוגיה בע"מ');
expect(xmlString).toContain('רחוב דיזנגוף');
expect(xmlString).toContain('תל אביב');
expect(xmlString).toContain('מוצר אלקטרוני');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('rtl-scripts', elapsed);
});
t.test('Emoji and emoticons', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>EMOJI-TEST</ID>
<Note>Thank you for your order! 😊 🎉 🚀</Note>
<PaymentTerms>
<Note>Payment methods: 💳 💰 🏦</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Premium Package 🌟</Name>
<Description>Includes: 📱 💻 🖱️ ⌨️ 🎧</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Express Shipping 🚚💨</Name>
<Description>Delivery: 📦 → 🏠 (1-2 days)</Description>
</Item>
</InvoiceLine>
<InvoiceLine>
<Item>
<Name>Customer Support 24/7 ☎️</Name>
<Description>Contact: 📧 📞 💬</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Common emojis
expect(xmlString).toContain('😊'); // Smiling face
expect(xmlString).toContain('🎉'); // Party
expect(xmlString).toContain('🚀'); // Rocket
expect(xmlString).toContain('💳'); // Credit card
expect(xmlString).toContain('💰'); // Money bag
expect(xmlString).toContain('🏦'); // Bank
expect(xmlString).toContain('🌟'); // Star
expect(xmlString).toContain('📱'); // Phone
expect(xmlString).toContain('💻'); // Laptop
expect(xmlString).toContain('🚚'); // Truck
expect(xmlString).toContain('📦'); // Package
expect(xmlString).toContain('🏠'); // House
expect(xmlString).toContain('☎️'); // Phone
expect(xmlString).toContain('📧'); // Email
expect(xmlString).toContain('💬'); // Chat
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('emoji', elapsed);
});
t.test('Corpus special character validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let specialCharCount = 0;
const specialCharFiles: string[] = [];
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for special characters
const sampleSize = Math.min(60, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check for non-ASCII characters
if (/[^\x00-\x7F]/.test(xmlString)) {
specialCharCount++;
// Check for specific character ranges
if (/[À-ÿ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended)`);
} else if (/[Ā-ſ]/.test(xmlString)) {
specialCharFiles.push(`${file} (Latin Extended-A)`);
} else if (/[\u0400-\u04FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Cyrillic)`);
} else if (/[\u4E00-\u9FFF]/.test(xmlString)) {
specialCharFiles.push(`${file} (CJK)`);
} else if (/[\u0600-\u06FF]/.test(xmlString)) {
specialCharFiles.push(`${file} (Arabic)`);
}
}
processedCount++;
} catch (error) {
console.log(`Special char issue in ${file}:`, error.message);
}
}
console.log(`Special character corpus test: ${specialCharCount}/${processedCount} files contain special characters`);
if (specialCharFiles.length > 0) {
console.log('Sample files with special characters:', specialCharFiles.slice(0, 5));
}
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-special', elapsed);
});
t.test('Zero-width and invisible characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>INVISIBLE-CHARS</ID>
<Note>Zero-widthspace (U+200B)</Note>
<PaymentTerms>
<Note>Nonbreakingzerowidthjoiner</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name>Soft­hyphen­test</Name>
<Description>Lefttorightmark and righttoleftmark</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// These characters might be preserved or stripped
// Check that the text is still readable
expect(xmlString).toMatch(/Zero.*width.*space/);
expect(xmlString).toMatch(/Non.*breaking.*zero.*width.*joiner/);
expect(xmlString).toMatch(/Soft.*hyphen.*test/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invisible-chars', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Special character operations should be reasonably fast
});
tap.start();