393 lines
16 KiB
TypeScript
393 lines
16 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
import * as plugins from '../plugins.js';
|
||
import { EInvoice } from '../../../ts/index.js';
|
||
import { CorpusLoader } from '../corpus.loader.js';
|
||
import { PerformanceTracker } from '../performance.tracker.js';
|
||
|
||
tap.test('ENC-10: Cross-Format Encoding - should maintain encoding consistency across formats', async (t) => {
|
||
// ENC-10: Verify encoding consistency when converting between different invoice formats
|
||
// This test ensures character encoding is preserved during format conversions
|
||
|
||
const performanceTracker = new PerformanceTracker('ENC-10: Cross-Format Encoding');
|
||
const corpusLoader = new CorpusLoader();
|
||
|
||
t.test('UBL to CII encoding preservation', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// UBL invoice with special characters
|
||
const ublContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
|
||
<cbc:ID>CROSS-FORMAT-UBL-001</cbc:ID>
|
||
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
|
||
<cbc:Note>Special chars: € £ ¥ © ® ™ § ¶ • ° ± × ÷</cbc:Note>
|
||
<cac:AccountingSupplierParty>
|
||
<cac:Party>
|
||
<cac:PartyName>
|
||
<cbc:Name>Müller & Associés S.à r.l.</cbc:Name>
|
||
</cac:PartyName>
|
||
<cac:PostalAddress>
|
||
<cbc:StreetName>Rue de la Légion d'Honneur</cbc:StreetName>
|
||
<cbc:CityName>Saarbrücken</cbc:CityName>
|
||
<cac:Country>
|
||
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
|
||
</cac:Country>
|
||
</cac:PostalAddress>
|
||
</cac:Party>
|
||
</cac:AccountingSupplierParty>
|
||
<cac:InvoiceLine>
|
||
<cbc:ID>1</cbc:ID>
|
||
<cbc:Note>Spëcïål cháracters: ñ ç ø å æ þ ð</cbc:Note>
|
||
<cac:Item>
|
||
<cbc:Name>Bücher über Köln</cbc:Name>
|
||
<cbc:Description>Prix: 25,50 € (TVA incluse)</cbc:Description>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(ublContent);
|
||
|
||
// Attempt format detection and conversion
|
||
const format = einvoice.getFormat();
|
||
console.log(`Detected format: ${format}`);
|
||
|
||
// Get the content back
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify all special characters are preserved
|
||
expect(xmlString).toContain('€ £ ¥ © ® ™ § ¶ • ° ± × ÷');
|
||
expect(xmlString).toContain('Müller & Associés S.à r.l.');
|
||
expect(xmlString).toContain('Rue de la Légion d\'Honneur');
|
||
expect(xmlString).toContain('Saarbrücken');
|
||
expect(xmlString).toContain('Spëcïål cháracters: ñ ç ø å æ þ ð');
|
||
expect(xmlString).toContain('Bücher über Köln');
|
||
expect(xmlString).toContain('25,50 €');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('ubl-to-cii', elapsed);
|
||
});
|
||
|
||
t.test('CII to UBL encoding preservation', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// CII invoice with international characters
|
||
const ciiContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<rsm:CrossIndustryInvoice
|
||
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
|
||
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
|
||
<rsm:ExchangedDocumentContext>
|
||
<ram:GuidelineSpecifiedDocumentContextParameter>
|
||
<ram:ID>urn:cen.eu:en16931:2017</ram:ID>
|
||
</ram:GuidelineSpecifiedDocumentContextParameter>
|
||
</rsm:ExchangedDocumentContext>
|
||
<rsm:ExchangedDocument>
|
||
<ram:ID>CROSS-FORMAT-CII-001</ram:ID>
|
||
<ram:IssueDateTime>2025-01-25</ram:IssueDateTime>
|
||
<ram:IncludedNote>
|
||
<ram:Content>Multi-language: Français, Español, Português, Română, Čeština</ram:Content>
|
||
</ram:IncludedNote>
|
||
</rsm:ExchangedDocument>
|
||
<rsm:SupplyChainTradeTransaction>
|
||
<ram:ApplicableHeaderTradeAgreement>
|
||
<ram:SellerTradeParty>
|
||
<ram:Name>АО "Компания" (Россия)</ram:Name>
|
||
<ram:PostalTradeAddress>
|
||
<ram:LineOne>ул. Тверская, д. 1</ram:LineOne>
|
||
<ram:CityName>Москва</ram:CityName>
|
||
<ram:CountryID>RU</ram:CountryID>
|
||
</ram:PostalTradeAddress>
|
||
</ram:SellerTradeParty>
|
||
</ram:ApplicableHeaderTradeAgreement>
|
||
<ram:IncludedSupplyChainTradeLineItem>
|
||
<ram:SpecifiedTradeProduct>
|
||
<ram:Name>北京烤鸭 (Beijing Duck)</ram:Name>
|
||
<ram:Description>Traditional Chinese dish: 传统中国菜</ram:Description>
|
||
</ram:SpecifiedTradeProduct>
|
||
</ram:IncludedSupplyChainTradeLineItem>
|
||
</rsm:SupplyChainTradeTransaction>
|
||
</rsm:CrossIndustryInvoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(ciiContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify international characters
|
||
expect(xmlString).toContain('Français, Español, Português, Română, Čeština');
|
||
expect(xmlString).toContain('АО "Компания" (Россия)');
|
||
expect(xmlString).toContain('ул. Тверская, д. 1');
|
||
expect(xmlString).toContain('Москва');
|
||
expect(xmlString).toContain('北京烤鸭 (Beijing Duck)');
|
||
expect(xmlString).toContain('Traditional Chinese dish: 传统中国菜');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('cii-to-ubl', elapsed);
|
||
});
|
||
|
||
t.test('ZUGFeRD/Factur-X encoding in PDF', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// XML content for ZUGFeRD with special German characters
|
||
const zugferdXml = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">
|
||
<rsm:ExchangedDocument>
|
||
<ram:ID xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">ZUGFERD-ENCODING-001</ram:ID>
|
||
<ram:Name xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">Rechnung für Büroartikel</ram:Name>
|
||
<ram:IncludedNote xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
|
||
<ram:Content>Sonderzeichen: ÄÖÜäöüß €§°²³µ</ram:Content>
|
||
</ram:IncludedNote>
|
||
</rsm:ExchangedDocument>
|
||
<rsm:SupplyChainTradeTransaction>
|
||
<ram:ApplicableHeaderTradeAgreement xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
|
||
<ram:SellerTradeParty>
|
||
<ram:Name>Großhändler für Bürobedarf GmbH & Co. KG</ram:Name>
|
||
<ram:PostalTradeAddress>
|
||
<ram:LineOne>Königsallee 42</ram:LineOne>
|
||
<ram:CityName>Düsseldorf</ram:CityName>
|
||
</ram:PostalTradeAddress>
|
||
</ram:SellerTradeParty>
|
||
</ram:ApplicableHeaderTradeAgreement>
|
||
</rsm:SupplyChainTradeTransaction>
|
||
</rsm:CrossIndustryInvoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(zugferdXml);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify German special characters
|
||
expect(xmlString).toContain('Rechnung für Büroartikel');
|
||
expect(xmlString).toContain('ÄÖÜäöüß €§°²³µ');
|
||
expect(xmlString).toContain('Großhändler für Bürobedarf GmbH & Co. KG');
|
||
expect(xmlString).toContain('Königsallee');
|
||
expect(xmlString).toContain('Düsseldorf');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('zugferd-encoding', elapsed);
|
||
});
|
||
|
||
t.test('XRechnung encoding requirements', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// XRechnung with strict German public sector requirements
|
||
const xrechnungContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:UBLVersionID>2.1</cbc:UBLVersionID>
|
||
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0</cbc:CustomizationID>
|
||
<cbc:ID>XRECHNUNG-ENCODING-001</cbc:ID>
|
||
<cbc:Note>Leitweg-ID: 991-12345-67</cbc:Note>
|
||
<cac:AccountingSupplierParty>
|
||
<cac:Party>
|
||
<cac:PartyLegalEntity>
|
||
<cbc:RegistrationName>Behörde für Straßenbau und Verkehr</cbc:RegistrationName>
|
||
</cac:PartyLegalEntity>
|
||
<cac:Contact>
|
||
<cbc:Name>Herr Müller-Lüdenscheid</cbc:Name>
|
||
<cbc:Telephone>+49 (0)30 12345-678</cbc:Telephone>
|
||
<cbc:ElectronicMail>müller-lüdenscheid@behoerde.de</cbc:ElectronicMail>
|
||
</cac:Contact>
|
||
</cac:Party>
|
||
</cac:AccountingSupplierParty>
|
||
<cac:InvoiceLine>
|
||
<cbc:Note>Straßenbauarbeiten gemäß § 3 Abs. 2 VOB/B</cbc:Note>
|
||
<cac:Item>
|
||
<cbc:Name>Asphaltierungsarbeiten (Fahrbahn)</cbc:Name>
|
||
<cbc:Description>Maße: 100m × 8m × 0,08m</cbc:Description>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>
|
||
</ubl:Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xrechnungContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify XRechnung specific encoding
|
||
expect(xmlString).toContain('urn:xeinkauf.de:kosit:xrechnung_3.0');
|
||
expect(xmlString).toContain('Leitweg-ID: 991-12345-67');
|
||
expect(xmlString).toContain('Behörde für Straßenbau und Verkehr');
|
||
expect(xmlString).toContain('Herr Müller-Lüdenscheid');
|
||
expect(xmlString).toContain('müller-lüdenscheid@behoerde.de');
|
||
expect(xmlString).toContain('gemäß § 3 Abs. 2 VOB/B');
|
||
expect(xmlString).toContain('100m × 8m × 0,08m');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('xrechnung-encoding', elapsed);
|
||
});
|
||
|
||
t.test('Mixed format conversion chain', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// Start with complex content
|
||
const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>CHAIN-TEST-001</ID>
|
||
<Note>Characters to preserve:
|
||
Latin: àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ
|
||
Greek: ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ αβγδεζηθικλμνξοπρστυφχψω
|
||
Cyrillic: АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
|
||
Math: ∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷
|
||
Currency: €£¥₹₽₪₩
|
||
Emoji: 📧💰🌍
|
||
</Note>
|
||
<AccountingSupplierParty>
|
||
<Party>
|
||
<PartyName>
|
||
<Name>测试公司 (Test Company) ทดสอบ บริษัท</Name>
|
||
</PartyName>
|
||
</Party>
|
||
</AccountingSupplierParty>
|
||
</Invoice>`;
|
||
|
||
const einvoice1 = new EInvoice();
|
||
await einvoice1.loadFromString(originalContent);
|
||
|
||
// First conversion
|
||
const xml1 = einvoice1.getXmlString();
|
||
|
||
// Load into new instance
|
||
const einvoice2 = new EInvoice();
|
||
await einvoice2.loadFromString(xml1);
|
||
|
||
// Second conversion
|
||
const xml2 = einvoice2.getXmlString();
|
||
|
||
// Verify nothing was lost in the chain
|
||
expect(xml2).toContain('àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ');
|
||
expect(xml2).toContain('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ');
|
||
expect(xml2).toContain('АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
|
||
expect(xml2).toContain('∑∏∫∂∇∈∉⊂⊃∪∩≤≥≠≈∞±×÷');
|
||
expect(xml2).toContain('€£¥₹₽₪₩');
|
||
expect(xml2).toContain('📧💰🌍');
|
||
expect(xml2).toContain('测试公司');
|
||
expect(xml2).toContain('ทดสอบ บริษัท');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('conversion-chain', elapsed);
|
||
});
|
||
|
||
t.test('Encoding consistency across formats in corpus', async () => {
|
||
const startTime = performance.now();
|
||
let processedCount = 0;
|
||
let consistentCount = 0;
|
||
const formatEncoding: Record<string, Record<string, number>> = {};
|
||
|
||
const files = await corpusLoader.getAllFiles();
|
||
const xmlFiles = files.filter(f => f.endsWith('.xml'));
|
||
|
||
// Sample corpus for cross-format encoding
|
||
const sampleSize = Math.min(80, xmlFiles.length);
|
||
const sample = xmlFiles.slice(0, sampleSize);
|
||
|
||
for (const file of sample) {
|
||
try {
|
||
const content = await corpusLoader.readFile(file);
|
||
const einvoice = new EInvoice();
|
||
|
||
if (typeof content === 'string') {
|
||
await einvoice.loadFromString(content);
|
||
} else {
|
||
await einvoice.loadFromBuffer(content);
|
||
}
|
||
|
||
const format = einvoice.getFormat() || 'unknown';
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Extract encoding declaration
|
||
const encodingMatch = xmlString.match(/encoding\s*=\s*["']([^"']+)["']/i);
|
||
const encoding = encodingMatch ? encodingMatch[1] : 'none';
|
||
|
||
// Track encoding by format
|
||
if (!formatEncoding[format]) {
|
||
formatEncoding[format] = {};
|
||
}
|
||
formatEncoding[format][encoding] = (formatEncoding[format][encoding] || 0) + 1;
|
||
|
||
// Check for special characters
|
||
if (/[^\x00-\x7F]/.test(xmlString)) {
|
||
consistentCount++;
|
||
}
|
||
|
||
processedCount++;
|
||
} catch (error) {
|
||
console.log(`Cross-format encoding issue in ${file}:`, error.message);
|
||
}
|
||
}
|
||
|
||
console.log(`Cross-format encoding analysis (${processedCount} files):`);
|
||
console.log(`- Files with non-ASCII characters: ${consistentCount}`);
|
||
console.log('Encoding by format:');
|
||
Object.entries(formatEncoding).forEach(([format, encodings]) => {
|
||
console.log(` ${format}:`, encodings);
|
||
});
|
||
|
||
expect(processedCount).toBeGreaterThan(0);
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('corpus-cross-format', elapsed);
|
||
});
|
||
|
||
t.test('Round-trip encoding preservation', async () => {
|
||
const startTime = performance.now();
|
||
|
||
// Test content with various challenging characters
|
||
const testCases = [
|
||
{
|
||
name: 'European languages',
|
||
content: 'Zürich, München, København, Kraków, București'
|
||
},
|
||
{
|
||
name: 'Asian languages',
|
||
content: '東京 (Tokyo), 北京 (Beijing), 서울 (Seoul), กรุงเทพฯ (Bangkok)'
|
||
},
|
||
{
|
||
name: 'RTL languages',
|
||
content: 'العربية (Arabic), עברית (Hebrew), فارسی (Persian)'
|
||
},
|
||
{
|
||
name: 'Special symbols',
|
||
content: '™®©℗℠№℮¶§†‡•◊♠♣♥♦'
|
||
},
|
||
{
|
||
name: 'Mathematical',
|
||
content: '∀x∈ℝ: x²≥0, ∑ᵢ₌₁ⁿ i = n(n+1)/2'
|
||
}
|
||
];
|
||
|
||
for (const testCase of testCases) {
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<ID>ROUND-TRIP-${testCase.name.toUpperCase().replace(/\s+/g, '-')}</ID>
|
||
<Note>${testCase.content}</Note>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
// Round trip
|
||
const output = einvoice.getXmlString();
|
||
|
||
// Verify content is preserved
|
||
expect(output).toContain(testCase.content);
|
||
console.log(`Round-trip ${testCase.name}: OK`);
|
||
}
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('round-trip', elapsed);
|
||
});
|
||
|
||
// Print performance summary
|
||
performanceTracker.printSummary();
|
||
|
||
// Performance assertions
|
||
const avgTime = performanceTracker.getAverageTime();
|
||
expect(avgTime).toBeLessThan(150); // Cross-format operations should be reasonably fast
|
||
});
|
||
|
||
tap.start(); |