einvoice/test/suite/einvoice_encoding/test.enc-03.iso88591-encoding.ts

351 lines
12 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-03: ISO-8859-1 Encoding - should handle ISO-8859-1 (Latin-1) encoded documents', async (t) => {
// ENC-03: Verify correct handling of ISO-8859-1 encoded XML documents
// This test ensures support for legacy Western European character encoding
const performanceTracker = new PerformanceTracker('ENC-03: ISO-8859-1 Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic ISO-8859-1 encoding', async () => {
const startTime = performance.now();
// Create ISO-8859-1 content with Latin-1 specific characters
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-TEST</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>ISO-8859-1 Test: àáâãäåæçèéêëìíîïñòóôõöøùúûüý</Note>
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Société Générale</Name>
</PartyName>
<PostalAddress>
<StreetName>Rue de la Paix</StreetName>
<CityName>Paris</CityName>
<Country>
<IdentificationCode>FR</IdentificationCode>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<AccountingCustomerParty>
<Party>
<PartyName>
<Name>Müller & Söhne GmbH</Name>
</PartyName>
<PostalAddress>
<StreetName>Königsallee</StreetName>
<CityName>Düsseldorf</CityName>
</PostalAddress>
</Party>
</AccountingCustomerParty>
<InvoiceLine>
<Note>Prix unitaire: 25,50 (vingt-cinq euros cinquante)</Note>
</InvoiceLine>
</Invoice>`;
// Convert to ISO-8859-1 buffer
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('ISO88591-TEST');
expect(xmlString).toContain('àáâãäåæçèéêëìíîïñòóôõöøùúûüý');
expect(xmlString).toContain('Société Générale');
expect(xmlString).toContain('Müller & Söhne GmbH');
expect(xmlString).toContain('Königsallee');
expect(xmlString).toContain('Düsseldorf');
expect(xmlString).toContain('25,50 €');
} catch (error) {
console.log('ISO-8859-1 handling issue:', error.message);
// Try string conversion fallback
const decoded = iso88591Buffer.toString('latin1');
await einvoice.loadFromString(decoded);
expect(einvoice.getXmlString()).toContain('ISO88591-TEST');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-iso88591', elapsed);
});
t.test('ISO-8859-1 special characters', async () => {
const startTime = performance.now();
// Test all printable ISO-8859-1 characters (160-255)
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-SPECIAL</ID>
<Note>Special chars: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ·¸¹º»¼½¾¿</Note>
<PaymentMeans>
<PaymentID>REF°12345</PaymentID>
<InstructionNote>Amount: £100 or 120 (±5%)</InstructionNote>
</PaymentMeans>
<TaxTotal>
<TaxSubtotal>
<TaxCategory>
<ID>S</ID>
<Percent>19</Percent>
<TaxScheme>
<Name>VAT § 19</Name>
</TaxScheme>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<LegalMonetaryTotal>
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
<PayableAmount currencyID="EUR">119.00</PayableAmount>
</LegalMonetaryTotal>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿');
expect(xmlString).toContain('REF°12345');
expect(xmlString).toContain('£100 or €120 (±5%)');
expect(xmlString).toContain('VAT § 19');
} catch (error) {
console.log('ISO-8859-1 special characters:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso88591-special', elapsed);
});
t.test('ISO-8859-1 to UTF-8 conversion', async () => {
const startTime = performance.now();
// Test conversion from ISO-8859-1 to UTF-8
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO-TO-UTF8</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>André's Café</Name>
</PartyName>
<Contact>
<Name>François Müller</Name>
<ElectronicMail>françois@café.fr</ElectronicMail>
</Contact>
</Party>
</AccountingSupplierParty>
<InvoiceLine>
<Item>
<Name>Crème brûlée</Name>
<Description>Dessert français traditionnel</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
// Get as UTF-8 string
const xmlString = einvoice.getXmlString();
// Verify content is properly converted
expect(xmlString).toContain("André's Café");
expect(xmlString).toContain('François Müller');
expect(xmlString).toContain('françois@café.fr');
expect(xmlString).toContain('Crème brûlée');
expect(xmlString).toContain('Dessert français traditionnel');
// Verify output is valid UTF-8
const utf8Buffer = Buffer.from(xmlString, 'utf8');
expect(utf8Buffer.toString('utf8')).toBe(xmlString);
} catch (error) {
console.log('ISO-8859-1 to UTF-8 conversion:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso-to-utf8', elapsed);
});
t.test('ISO-8859-1 limitations', async () => {
const startTime = performance.now();
// Test characters outside ISO-8859-1 range
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ISO88591-LIMITS</ID>
<Note>Euro: Pound: £ Yen: ¥</Note>
<InvoiceLine>
<Note>Temperature: 20°C (68°F)</Note>
<Item>
<Name>Naïve café</Name>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
try {
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
// These characters exist in ISO-8859-1
expect(xmlString).toContain('£'); // Pound sign (163)
expect(xmlString).toContain('¥'); // Yen sign (165)
expect(xmlString).toContain('°'); // Degree sign (176)
expect(xmlString).toContain('Naïve café');
// Note: Euro sign (€) is NOT in ISO-8859-1 (it's in ISO-8859-15)
// It might be replaced or cause issues
} catch (error) {
console.log('ISO-8859-1 limitation test:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('iso88591-limits', elapsed);
});
t.test('Mixed encoding scenarios', async () => {
const startTime = performance.now();
// Test file declared as ISO-8859-1 but might contain other encodings
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-ENCODING</ID>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>José García S.A.</Name>
</PartyName>
<PostalAddress>
<StreetName>Passeig de Gràcia</StreetName>
<CityName>Barcelona</CityName>
<CountrySubentity>Catalunya</CountrySubentity>
<Country>
<IdentificationCode>ES</IdentificationCode>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note>Pago: 30 días fecha factura</Note>
</PaymentTerms>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
expect(xmlString).toContain('José García S.A.');
expect(xmlString).toContain('Passeig de Gràcia');
expect(xmlString).toContain('Catalunya');
expect(xmlString).toContain('30 días fecha factura');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('mixed-encoding', elapsed);
});
t.test('Corpus ISO-8859-1 detection', async () => {
const startTime = performance.now();
let iso88591Count = 0;
let checkedCount = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for ISO-8859-1 encoded files
const sampleSize = Math.min(40, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Check for ISO-8859-1 encoding declaration
if (xmlString.includes('encoding="ISO-8859-1"') ||
xmlString.includes("encoding='ISO-8859-1'") ||
xmlString.includes('encoding="iso-8859-1"')) {
iso88591Count++;
console.log(`Found ISO-8859-1 file: ${file}`);
}
checkedCount++;
} catch (error) {
// Skip problematic files
}
}
console.log(`ISO-8859-1 corpus scan: ${iso88591Count}/${checkedCount} files use ISO-8859-1`);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-iso88591', elapsed);
});
t.test('Character reference handling', async () => {
const startTime = performance.now();
// Test numeric character references for chars outside ISO-8859-1
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CHAR-REF-TEST</ID>
<Note>Euro: &#8364; Em dash: &#8212; Ellipsis: &#8230;</Note>
<InvoiceLine>
<Note>Smart quotes: &#8220;Hello&#8221; &#8216;World&#8217;</Note>
<Item>
<Name>Trademark&#8482; Product</Name>
<Description>Copyright &#169; 2025</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
const einvoice = new EInvoice();
await einvoice.loadFromBuffer(iso88591Buffer);
const xmlString = einvoice.getXmlString();
// Character references should be preserved or converted
expect(xmlString).toMatch(/Euro:.*€|&#8364;/);
expect(xmlString).toMatch(/Copyright.*©|&#169;/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('char-references', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // ISO-8859-1 operations should be reasonably fast
});
tap.start();