einvoice/test/suite/einvoice_encoding/test.enc-10.cross-format-encoding.ts
2025-05-25 19:45:37 +00:00

393 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-10: Cross-Format Encoding - should maintain encoding consistency across formats', async (t) => {
// ENC-10: Verify encoding consistency when converting between different invoice formats
// This test ensures character encoding is preserved during format conversions
const performanceTracker = new PerformanceTracker('ENC-10: Cross-Format Encoding');
const corpusLoader = new CorpusLoader();
t.test('UBL to CII encoding preservation', async () => {
const startTime = performance.now();
// UBL invoice with special characters
const ublContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:ID>CROSS-FORMAT-UBL-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:Note>Special chars: € £ ¥ © ® ™ § ¶ • ° ± × ÷</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Müller & Associés S.à r.l.</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>Rue de la Légion d'Honneur</cbc:StreetName>
<cbc:CityName>Saarbrücken</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Spëcïål cháracters: ñ ç ø å æ þ ð</cbc:Note>
<cac:Item>
<cbc:Name>Bücher über Köln</cbc:Name>
<cbc:Description>Prix: 25,50 € (TVA incluse)</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ublContent);
// Attempt format detection and conversion
const format = einvoice.getFormat();
console.log(`Detected format: ${format}`);
// Get the content back
const xmlString = einvoice.getXmlString();
// Verify all special characters are preserved
expect(xmlString).toContain('€ £ ¥ © ® ™ § ¶ • ° ± × ÷');
expect(xmlString).toContain('Müller & Associés S.à r.l.');
expect(xmlString).toContain('Rue de la Légion d\'Honneur');
expect(xmlString).toContain('Saarbrücken');
expect(xmlString).toContain('Spëcïål cháracters: ñ ç ø å æ þ ð');
expect(xmlString).toContain('Bücher über Köln');
expect(xmlString).toContain('25,50 €');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ubl-to-cii', elapsed);
});
t.test('CII to UBL encoding preservation', async () => {
const startTime = performance.now();
// CII invoice with international characters
const ciiContent = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocumentContext>
<ram:GuidelineSpecifiedDocumentContextParameter>
<ram:ID>urn:cen.eu:en16931:2017</ram:ID>
</ram:GuidelineSpecifiedDocumentContextParameter>
</rsm:ExchangedDocumentContext>
<rsm:ExchangedDocument>
<ram:ID>CROSS-FORMAT-CII-001</ram:ID>
<ram:IssueDateTime>2025-01-25</ram:IssueDateTime>
<ram:IncludedNote>
<ram:Content>Multi-language: Français, Español, Português, Română, Čeština</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>АО "Компания" (Россия)</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>ул. Тверская, д. 1</ram:LineOne>
<ram:CityName>Москва</ram:CityName>
<ram:CountryID>RU</ram:CountryID>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
<ram:IncludedSupplyChainTradeLineItem>
<ram:SpecifiedTradeProduct>
<ram:Name>北京烤鸭 (Beijing Duck)</ram:Name>
<ram:Description>Traditional Chinese dish: 传统中国菜</ram:Description>
</ram:SpecifiedTradeProduct>
</ram:IncludedSupplyChainTradeLineItem>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ciiContent);
const xmlString = einvoice.getXmlString();
// Verify international characters
expect(xmlString).toContain('Français, Español, Português, Română, Čeština');
expect(xmlString).toContain('АО "Компания" (Россия)');
expect(xmlString).toContain('ул. Тверская, д. 1');
expect(xmlString).toContain('Москва');
expect(xmlString).toContain('北京烤鸭 (Beijing Duck)');
expect(xmlString).toContain('Traditional Chinese dish: 传统中国菜');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cii-to-ubl', elapsed);
});
t.test('ZUGFeRD/Factur-X encoding in PDF', async () => {
const startTime = performance.now();
// XML content for ZUGFeRD with special German characters
const zugferdXml = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<rsm:ExchangedDocument>
<ram:ID xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">ZUGFERD-ENCODING-001</ram:ID>
<ram:Name xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">Rechnung für Büroartikel</ram:Name>
<ram:IncludedNote xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:Content>Sonderzeichen: ÄÖÜäöüß €§°²³µ</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:SellerTradeParty>
<ram:Name>Großhändler für Bürobedarf GmbH & Co. KG</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>Königsallee 42</ram:LineOne>
<ram:CityName>Düsseldorf</ram:CityName>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(zugferdXml);
const xmlString = einvoice.getXmlString();
// Verify German special characters
expect(xmlString).toContain('Rechnung für Büroartikel');
expect(xmlString).toContain('ÄÖÜäöüß €§°²³µ');
expect(xmlString).toContain('Großhändler für Bürobedarf GmbH & Co. KG');
expect(xmlString).toContain('Königsallee');
expect(xmlString).toContain('Düsseldorf');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('zugferd-encoding', elapsed);
});
t.test('XRechnung encoding requirements', async () => {
const startTime = performance.now();
// XRechnung with strict German public sector requirements
const xrechnungContent = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0</cbc:CustomizationID>
<cbc:ID>XRECHNUNG-ENCODING-001</cbc:ID>
<cbc:Note>Leitweg-ID: 991-12345-67</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyLegalEntity>
<cbc:RegistrationName>Behörde für Straßenbau und Verkehr</cbc:RegistrationName>
</cac:PartyLegalEntity>
<cac:Contact>
<cbc:Name>Herr Müller-Lüdenscheid</cbc:Name>
<cbc:Telephone>+49 (0)30 12345-678</cbc:Telephone>
<cbc:ElectronicMail>müller-lüdenscheid@behoerde.de</cbc:ElectronicMail>
</cac:Contact>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:Note>Straßenbauarbeiten gemäß § 3 Abs. 2 VOB/B</cbc:Note>
<cac:Item>
<cbc:Name>Asphaltierungsarbeiten (Fahrbahn)</cbc:Name>
<cbc:Description>Maße: 100m × 8m × 0,08m</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</ubl:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xrechnungContent);
const xmlString = einvoice.getXmlString();
// Verify XRechnung specific encoding
expect(xmlString).toContain('urn:xeinkauf.de:kosit:xrechnung_3.0');
expect(xmlString).toContain('Leitweg-ID: 991-12345-67');
expect(xmlString).toContain('Behörde für Straßenbau und Verkehr');
expect(xmlString).toContain('Herr Müller-Lüdenscheid');
expect(xmlString).toContain('müller-lüdenscheid@behoerde.de');
expect(xmlString).toContain('gemäß § 3 Abs. 2 VOB/B');
expect(xmlString).toContain('100m × 8m × 0,08m');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('xrechnung-encoding', elapsed);
});
t.test('Mixed format conversion chain', async () => {
const startTime = performance.now();
// Start with complex content
const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CHAIN-TEST-001</ID>
<Note>Characters to preserve:
Latin: àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
Greek: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ αβγδεζηθικλμνξοπρστυφχψω
Cyrillic: АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
Math: ∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷
Currency: €£¥₹₽₪₩
Emoji: 📧💰🌍
</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>测试公司 (Test Company) ทดสอบ บริษัท</Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice1 = new EInvoice();
await einvoice1.loadFromString(originalContent);
// First conversion
const xml1 = einvoice1.getXmlString();
// Load into new instance
const einvoice2 = new EInvoice();
await einvoice2.loadFromString(xml1);
// Second conversion
const xml2 = einvoice2.getXmlString();
// Verify nothing was lost in the chain
expect(xml2).toContain('àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ');
expect(xml2).toContain('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ');
expect(xml2).toContain('АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
expect(xml2).toContain('∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷');
expect(xml2).toContain('€£¥₹₽₪₩');
expect(xml2).toContain('📧💰🌍');
expect(xml2).toContain('测试公司');
expect(xml2).toContain('ทดสอบ บริษัท');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('conversion-chain', elapsed);
});
t.test('Encoding consistency across formats in corpus', async () => {
const startTime = performance.now();
let processedCount = 0;
let consistentCount = 0;
const formatEncoding: Record<string, Record<string, number>> = {};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Sample corpus for cross-format encoding
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const format = einvoice.getFormat() || 'unknown';
const xmlString = einvoice.getXmlString();
// Extract encoding declaration
const encodingMatch = xmlString.match(/encoding\s*=\s*["']([^"']+)["']/i);
const encoding = encodingMatch ? encodingMatch[1] : 'none';
// Track encoding by format
if (!formatEncoding[format]) {
formatEncoding[format] = {};
}
formatEncoding[format][encoding] = (formatEncoding[format][encoding] || 0) + 1;
// Check for special characters
if (/[^\x00-\x7F]/.test(xmlString)) {
consistentCount++;
}
processedCount++;
} catch (error) {
console.log(`Cross-format encoding issue in ${file}:`, error.message);
}
}
console.log(`Cross-format encoding analysis (${processedCount} files):`);
console.log(`- Files with non-ASCII characters: ${consistentCount}`);
console.log('Encoding by format:');
Object.entries(formatEncoding).forEach(([format, encodings]) => {
console.log(` ${format}:`, encodings);
});
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-cross-format', elapsed);
});
t.test('Round-trip encoding preservation', async () => {
const startTime = performance.now();
// Test content with various challenging characters
const testCases = [
{
name: 'European languages',
content: 'Zürich, München, København, Kraków, București'
},
{
name: 'Asian languages',
content: '東京 (Tokyo), 北京 (Beijing), 서울 (Seoul), กรุงเทพฯ (Bangkok)'
},
{
name: 'RTL languages',
content: 'العربية (Arabic), עברית (Hebrew), فارسی (Persian)'
},
{
name: 'Special symbols',
content: '™®©℗℠№℮¶§†‡•◊♠♣♥♦'
},
{
name: 'Mathematical',
content: '∀x∈: x²≥0, ∑ᵢ₌₁ⁿ i = n(n+1)/2'
}
];
for (const testCase of testCases) {
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>ROUND-TRIP-${testCase.name.toUpperCase().replace(/\s+/g, '-')}</ID>
<Note>${testCase.content}</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
// Round trip
const output = einvoice.getXmlString();
// Verify content is preserved
expect(output).toContain(testCase.content);
console.log(`Round-trip ${testCase.name}: OK`);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('round-trip', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Cross-format operations should be reasonably fast
});
tap.start();