einvoice/test/suite/einvoice_encoding/test.enc-10.cross-format-encoding.ts

393 lines
16 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-10: Cross-Format Encoding - should maintain encoding consistency across formats', async (t) => {
// ENC-10: Verify encoding consistency when converting between different invoice formats
// This test ensures character encoding is preserved during format conversions
const performanceTracker = new PerformanceTracker('ENC-10: Cross-Format Encoding');
const corpusLoader = new CorpusLoader();
t.test('UBL to CII encoding preservation', async () => {
const startTime = performance.now();
// UBL invoice with special characters
const ublContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:ID>CROSS-FORMAT-UBL-001</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:Note>Special chars: £ ¥ © ® § ° ± × ÷</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Müller & Associés S.à r.l.</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>Rue de la Légion d'Honneur</cbc:StreetName>
<cbc:CityName>Saarbrücken</cbc:CityName>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:Note>Spëcïål cháracters: ñ ç ø å æ þ ð</cbc:Note>
<cac:Item>
<cbc:Name>Bücher über Köln</cbc:Name>
<cbc:Description>Prix: 25,50 (TVA incluse)</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ublContent);
// Attempt format detection and conversion
const format = einvoice.getFormat();
console.log(`Detected format: ${format}`);
// Get the content back
const xmlString = einvoice.getXmlString();
// Verify all special characters are preserved
expect(xmlString).toContain('€ £ ¥ © ® ™ § ¶ • ° ± × ÷');
expect(xmlString).toContain('Müller & Associés S.à r.l.');
expect(xmlString).toContain('Rue de la Légion d\'Honneur');
expect(xmlString).toContain('Saarbrücken');
expect(xmlString).toContain('Spëcïål cháracters: ñ ç ø å æ þ ð');
expect(xmlString).toContain('Bücher über Köln');
expect(xmlString).toContain('25,50 €');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ubl-to-cii', elapsed);
});
t.test('CII to UBL encoding preservation', async () => {
const startTime = performance.now();
// CII invoice with international characters
const ciiContent = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocumentContext>
<ram:GuidelineSpecifiedDocumentContextParameter>
<ram:ID>urn:cen.eu:en16931:2017</ram:ID>
</ram:GuidelineSpecifiedDocumentContextParameter>
</rsm:ExchangedDocumentContext>
<rsm:ExchangedDocument>
<ram:ID>CROSS-FORMAT-CII-001</ram:ID>
<ram:IssueDateTime>2025-01-25</ram:IssueDateTime>
<ram:IncludedNote>
<ram:Content>Multi-language: Français, Español, Português, Română, Čeština</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>АО "Компания" (Россия)</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>ул. Тверская, д. 1</ram:LineOne>
<ram:CityName>Москва</ram:CityName>
<ram:CountryID>RU</ram:CountryID>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
<ram:IncludedSupplyChainTradeLineItem>
<ram:SpecifiedTradeProduct>
<ram:Name> (Beijing Duck)</ram:Name>
<ram:Description>Traditional Chinese dish: 传统中国菜</ram:Description>
</ram:SpecifiedTradeProduct>
</ram:IncludedSupplyChainTradeLineItem>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(ciiContent);
const xmlString = einvoice.getXmlString();
// Verify international characters
expect(xmlString).toContain('Français, Español, Português, Română, Čeština');
expect(xmlString).toContain('АО "Компания" (Россия)');
expect(xmlString).toContain('ул. Тверская, д. 1');
expect(xmlString).toContain('Москва');
expect(xmlString).toContain('北京烤鸭 (Beijing Duck)');
expect(xmlString).toContain('Traditional Chinese dish: 传统中国菜');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cii-to-ubl', elapsed);
});
t.test('ZUGFeRD/Factur-X encoding in PDF', async () => {
const startTime = performance.now();
// XML content for ZUGFeRD with special German characters
const zugferdXml = `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
<rsm:ExchangedDocument>
<ram:ID xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">ZUGFERD-ENCODING-001</ram:ID>
<ram:Name xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">Rechnung für Büroartikel</ram:Name>
<ram:IncludedNote xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:Content>Sonderzeichen: ÄÖÜäöüß §°²³µ</ram:Content>
</ram:IncludedNote>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<ram:SellerTradeParty>
<ram:Name>Großhändler für Bürobedarf GmbH & Co. KG</ram:Name>
<ram:PostalTradeAddress>
<ram:LineOne>Königsallee 42</ram:LineOne>
<ram:CityName>Düsseldorf</ram:CityName>
</ram:PostalTradeAddress>
</ram:SellerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(zugferdXml);
const xmlString = einvoice.getXmlString();
// Verify German special characters
expect(xmlString).toContain('Rechnung für Büroartikel');
expect(xmlString).toContain('ÄÖÜäöüß €§°²³µ');
expect(xmlString).toContain('Großhändler für Bürobedarf GmbH & Co. KG');
expect(xmlString).toContain('Königsallee');
expect(xmlString).toContain('Düsseldorf');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('zugferd-encoding', elapsed);
});
t.test('XRechnung encoding requirements', async () => {
const startTime = performance.now();
// XRechnung with strict German public sector requirements
const xrechnungContent = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0</cbc:CustomizationID>
<cbc:ID>XRECHNUNG-ENCODING-001</cbc:ID>
<cbc:Note>Leitweg-ID: 991-12345-67</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyLegalEntity>
<cbc:RegistrationName>Behörde für Straßenbau und Verkehr</cbc:RegistrationName>
</cac:PartyLegalEntity>
<cac:Contact>
<cbc:Name>Herr Müller-Lüdenscheid</cbc:Name>
<cbc:Telephone>+49 (0)30 12345-678</cbc:Telephone>
<cbc:ElectronicMail>müller-lüdenscheid@behoerde.de</cbc:ElectronicMail>
</cac:Contact>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:InvoiceLine>
<cbc:Note>Straßenbauarbeiten gemäß § 3 Abs. 2 VOB/B</cbc:Note>
<cac:Item>
<cbc:Name>Asphaltierungsarbeiten (Fahrbahn)</cbc:Name>
<cbc:Description>Maße: 100m × 8m × 0,08m</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
</ubl:Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xrechnungContent);
const xmlString = einvoice.getXmlString();
// Verify XRechnung specific encoding
expect(xmlString).toContain('urn:xeinkauf.de:kosit:xrechnung_3.0');
expect(xmlString).toContain('Leitweg-ID: 991-12345-67');
expect(xmlString).toContain('Behörde für Straßenbau und Verkehr');
expect(xmlString).toContain('Herr Müller-Lüdenscheid');
expect(xmlString).toContain('müller-lüdenscheid@behoerde.de');
expect(xmlString).toContain('gemäß § 3 Abs. 2 VOB/B');
expect(xmlString).toContain('100m × 8m × 0,08m');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('xrechnung-encoding', elapsed);
});
t.test('Mixed format conversion chain', async () => {
const startTime = performance.now();
// Start with complex content
const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CHAIN-TEST-001</ID>
<Note>Characters to preserve:
Latin: àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
Greek: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ αβγδεζηθικλμνξοπρστυφχψω
Cyrillic: АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
Math: ±×÷
Currency: £¥
Emoji: 📧💰🌍
</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name> (Test Company) </Name>
</PartyName>
</Party>
</AccountingSupplierParty>
</Invoice>`;
const einvoice1 = new EInvoice();
await einvoice1.loadFromString(originalContent);
// First conversion
const xml1 = einvoice1.getXmlString();
// Load into new instance
const einvoice2 = new EInvoice();
await einvoice2.loadFromString(xml1);
// Second conversion
const xml2 = einvoice2.getXmlString();
// Verify nothing was lost in the chain
expect(xml2).toContain('àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ');
expect(xml2).toContain('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ');
expect(xml2).toContain('АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
expect(xml2).toContain('∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷');
expect(xml2).toContain('€£¥₹₽₪₩');
expect(xml2).toContain('📧💰🌍');
expect(xml2).toContain('测试公司');
expect(xml2).toContain('ทดสอบ บริษัท');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('conversion-chain', elapsed);
});
t.test('Encoding consistency across formats in corpus', async () => {
const startTime = performance.now();
let processedCount = 0;
let consistentCount = 0;
const formatEncoding: Record<string, Record<string, number>> = {};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Sample corpus for cross-format encoding
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const format = einvoice.getFormat() || 'unknown';
const xmlString = einvoice.getXmlString();
// Extract encoding declaration
const encodingMatch = xmlString.match(/encoding\s*=\s*["']([^"']+)["']/i);
const encoding = encodingMatch ? encodingMatch[1] : 'none';
// Track encoding by format
if (!formatEncoding[format]) {
formatEncoding[format] = {};
}
formatEncoding[format][encoding] = (formatEncoding[format][encoding] || 0) + 1;
// Check for special characters
if (/[^\x00-\x7F]/.test(xmlString)) {
consistentCount++;
}
processedCount++;
} catch (error) {
console.log(`Cross-format encoding issue in ${file}:`, error.message);
}
}
console.log(`Cross-format encoding analysis (${processedCount} files):`);
console.log(`- Files with non-ASCII characters: ${consistentCount}`);
console.log('Encoding by format:');
Object.entries(formatEncoding).forEach(([format, encodings]) => {
console.log(` ${format}:`, encodings);
});
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-cross-format', elapsed);
});
t.test('Round-trip encoding preservation', async () => {
const startTime = performance.now();
// Test content with various challenging characters
const testCases = [
{
name: 'European languages',
content: 'Zürich, München, København, Kraków, București'
},
{
name: 'Asian languages',
content: '東京 (Tokyo), 北京 (Beijing), 서울 (Seoul), กรุงเทพฯ (Bangkok)'
},
{
name: 'RTL languages',
content: 'العربية (Arabic), עברית (Hebrew), فارسی (Persian)'
},
{
name: 'Special symbols',
content: '™®©℗℠№℮¶§†‡•◊♠♣♥♦'
},
{
name: 'Mathematical',
content: '∀x∈: x²≥0, ∑ᵢ₌₁ⁿ i = n(n+1)/2'
}
];
for (const testCase of testCases) {
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>ROUND-TRIP-${testCase.name.toUpperCase().replace(/\s+/g, '-')}</ID>
<Note>${testCase.content}</Note>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
// Round trip
const output = einvoice.getXmlString();
// Verify content is preserved
expect(output).toContain(testCase.content);
console.log(`Round-trip ${testCase.name}: OK`);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('round-trip', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Cross-format operations should be reasonably fast
});
tap.start();