351 lines
12 KiB
TypeScript
351 lines
12 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
import * as plugins from '../plugins.js';
|
|
import { EInvoice } from '../../../ts/index.js';
|
|
import { CorpusLoader } from '../corpus.loader.js';
|
|
import { PerformanceTracker } from '../performance.tracker.js';
|
|
|
|
tap.test('ENC-03: ISO-8859-1 Encoding - should handle ISO-8859-1 (Latin-1) encoded documents', async (t) => {
|
|
// ENC-03: Verify correct handling of ISO-8859-1 encoded XML documents
|
|
// This test ensures support for legacy Western European character encoding
|
|
|
|
const performanceTracker = new PerformanceTracker('ENC-03: ISO-8859-1 Encoding');
|
|
const corpusLoader = new CorpusLoader();
|
|
|
|
t.test('Basic ISO-8859-1 encoding', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Create ISO-8859-1 content with Latin-1 specific characters
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>ISO88591-TEST</ID>
|
|
<IssueDate>2025-01-25</IssueDate>
|
|
<Note>ISO-8859-1 Test: àáâãäåæçèéêëìíîïñòóôõöøùúûüý</Note>
|
|
<DocumentCurrencyCode>EUR</DocumentCurrencyCode>
|
|
<AccountingSupplierParty>
|
|
<Party>
|
|
<PartyName>
|
|
<Name>Société Générale</Name>
|
|
</PartyName>
|
|
<PostalAddress>
|
|
<StreetName>Rue de la Paix</StreetName>
|
|
<CityName>Paris</CityName>
|
|
<Country>
|
|
<IdentificationCode>FR</IdentificationCode>
|
|
</Country>
|
|
</PostalAddress>
|
|
</Party>
|
|
</AccountingSupplierParty>
|
|
<AccountingCustomerParty>
|
|
<Party>
|
|
<PartyName>
|
|
<Name>Müller & Söhne GmbH</Name>
|
|
</PartyName>
|
|
<PostalAddress>
|
|
<StreetName>Königsallee</StreetName>
|
|
<CityName>Düsseldorf</CityName>
|
|
</PostalAddress>
|
|
</Party>
|
|
</AccountingCustomerParty>
|
|
<InvoiceLine>
|
|
<Note>Prix unitaire: 25,50 € (vingt-cinq euros cinquante)</Note>
|
|
</InvoiceLine>
|
|
</Invoice>`;
|
|
|
|
// Convert to ISO-8859-1 buffer
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
try {
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
const xmlString = einvoice.getXmlString();
|
|
expect(xmlString).toContain('ISO88591-TEST');
|
|
expect(xmlString).toContain('àáâãäåæçèéêëìíîïñòóôõöøùúûüý');
|
|
expect(xmlString).toContain('Société Générale');
|
|
expect(xmlString).toContain('Müller & Söhne GmbH');
|
|
expect(xmlString).toContain('Königsallee');
|
|
expect(xmlString).toContain('Düsseldorf');
|
|
expect(xmlString).toContain('25,50 €');
|
|
} catch (error) {
|
|
console.log('ISO-8859-1 handling issue:', error.message);
|
|
// Try string conversion fallback
|
|
const decoded = iso88591Buffer.toString('latin1');
|
|
await einvoice.loadFromString(decoded);
|
|
expect(einvoice.getXmlString()).toContain('ISO88591-TEST');
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('basic-iso88591', elapsed);
|
|
});
|
|
|
|
t.test('ISO-8859-1 special characters', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Test all printable ISO-8859-1 characters (160-255)
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>ISO88591-SPECIAL</ID>
|
|
<Note>Special chars: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿</Note>
|
|
<PaymentMeans>
|
|
<PaymentID>REF°12345</PaymentID>
|
|
<InstructionNote>Amount: £100 or €120 (±5%)</InstructionNote>
|
|
</PaymentMeans>
|
|
<TaxTotal>
|
|
<TaxSubtotal>
|
|
<TaxCategory>
|
|
<ID>S</ID>
|
|
<Percent>19</Percent>
|
|
<TaxScheme>
|
|
<Name>VAT § 19</Name>
|
|
</TaxScheme>
|
|
</TaxCategory>
|
|
</TaxSubtotal>
|
|
</TaxTotal>
|
|
<LegalMonetaryTotal>
|
|
<TaxExclusiveAmount currencyID="EUR">100.00</TaxExclusiveAmount>
|
|
<PayableAmount currencyID="EUR">119.00</PayableAmount>
|
|
</LegalMonetaryTotal>
|
|
</Invoice>`;
|
|
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
try {
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
const xmlString = einvoice.getXmlString();
|
|
expect(xmlString).toContain('¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿');
|
|
expect(xmlString).toContain('REF°12345');
|
|
expect(xmlString).toContain('£100 or €120 (±5%)');
|
|
expect(xmlString).toContain('VAT § 19');
|
|
} catch (error) {
|
|
console.log('ISO-8859-1 special characters:', error.message);
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('iso88591-special', elapsed);
|
|
});
|
|
|
|
t.test('ISO-8859-1 to UTF-8 conversion', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Test conversion from ISO-8859-1 to UTF-8
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>ISO-TO-UTF8</ID>
|
|
<AccountingSupplierParty>
|
|
<Party>
|
|
<PartyName>
|
|
<Name>André's Café</Name>
|
|
</PartyName>
|
|
<Contact>
|
|
<Name>François Müller</Name>
|
|
<ElectronicMail>françois@café.fr</ElectronicMail>
|
|
</Contact>
|
|
</Party>
|
|
</AccountingSupplierParty>
|
|
<InvoiceLine>
|
|
<Item>
|
|
<Name>Crème brûlée</Name>
|
|
<Description>Dessert français traditionnel</Description>
|
|
</Item>
|
|
</InvoiceLine>
|
|
</Invoice>`;
|
|
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
try {
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
// Get as UTF-8 string
|
|
const xmlString = einvoice.getXmlString();
|
|
|
|
// Verify content is properly converted
|
|
expect(xmlString).toContain("André's Café");
|
|
expect(xmlString).toContain('François Müller');
|
|
expect(xmlString).toContain('françois@café.fr');
|
|
expect(xmlString).toContain('Crème brûlée');
|
|
expect(xmlString).toContain('Dessert français traditionnel');
|
|
|
|
// Verify output is valid UTF-8
|
|
const utf8Buffer = Buffer.from(xmlString, 'utf8');
|
|
expect(utf8Buffer.toString('utf8')).toBe(xmlString);
|
|
} catch (error) {
|
|
console.log('ISO-8859-1 to UTF-8 conversion:', error.message);
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('iso-to-utf8', elapsed);
|
|
});
|
|
|
|
t.test('ISO-8859-1 limitations', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Test characters outside ISO-8859-1 range
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>ISO88591-LIMITS</ID>
|
|
<Note>Euro: € Pound: £ Yen: ¥</Note>
|
|
<InvoiceLine>
|
|
<Note>Temperature: 20°C (68°F)</Note>
|
|
<Item>
|
|
<Name>Naïve café</Name>
|
|
</Item>
|
|
</InvoiceLine>
|
|
</Invoice>`;
|
|
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
try {
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
const xmlString = einvoice.getXmlString();
|
|
// These characters exist in ISO-8859-1
|
|
expect(xmlString).toContain('£'); // Pound sign (163)
|
|
expect(xmlString).toContain('¥'); // Yen sign (165)
|
|
expect(xmlString).toContain('°'); // Degree sign (176)
|
|
expect(xmlString).toContain('Naïve café');
|
|
|
|
// Note: Euro sign (€) is NOT in ISO-8859-1 (it's in ISO-8859-15)
|
|
// It might be replaced or cause issues
|
|
} catch (error) {
|
|
console.log('ISO-8859-1 limitation test:', error.message);
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('iso88591-limits', elapsed);
|
|
});
|
|
|
|
t.test('Mixed encoding scenarios', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Test file declared as ISO-8859-1 but might contain other encodings
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>MIXED-ENCODING</ID>
|
|
<AccountingSupplierParty>
|
|
<Party>
|
|
<PartyName>
|
|
<Name>José García S.A.</Name>
|
|
</PartyName>
|
|
<PostalAddress>
|
|
<StreetName>Passeig de Gràcia</StreetName>
|
|
<CityName>Barcelona</CityName>
|
|
<CountrySubentity>Catalunya</CountrySubentity>
|
|
<Country>
|
|
<IdentificationCode>ES</IdentificationCode>
|
|
</Country>
|
|
</PostalAddress>
|
|
</Party>
|
|
</AccountingSupplierParty>
|
|
<PaymentTerms>
|
|
<Note>Pago: 30 días fecha factura</Note>
|
|
</PaymentTerms>
|
|
</Invoice>`;
|
|
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
const xmlString = einvoice.getXmlString();
|
|
expect(xmlString).toContain('José García S.A.');
|
|
expect(xmlString).toContain('Passeig de Gràcia');
|
|
expect(xmlString).toContain('Catalunya');
|
|
expect(xmlString).toContain('30 días fecha factura');
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('mixed-encoding', elapsed);
|
|
});
|
|
|
|
t.test('Corpus ISO-8859-1 detection', async () => {
|
|
const startTime = performance.now();
|
|
let iso88591Count = 0;
|
|
let checkedCount = 0;
|
|
|
|
const files = await corpusLoader.getAllFiles();
|
|
const xmlFiles = files.filter(f => f.endsWith('.xml'));
|
|
|
|
// Check sample for ISO-8859-1 encoded files
|
|
const sampleSize = Math.min(40, xmlFiles.length);
|
|
const sample = xmlFiles.slice(0, sampleSize);
|
|
|
|
for (const file of sample) {
|
|
try {
|
|
const content = await corpusLoader.readFile(file);
|
|
let xmlString: string;
|
|
|
|
if (Buffer.isBuffer(content)) {
|
|
xmlString = content.toString('utf8');
|
|
} else {
|
|
xmlString = content;
|
|
}
|
|
|
|
// Check for ISO-8859-1 encoding declaration
|
|
if (xmlString.includes('encoding="ISO-8859-1"') ||
|
|
xmlString.includes("encoding='ISO-8859-1'") ||
|
|
xmlString.includes('encoding="iso-8859-1"')) {
|
|
iso88591Count++;
|
|
console.log(`Found ISO-8859-1 file: ${file}`);
|
|
}
|
|
|
|
checkedCount++;
|
|
} catch (error) {
|
|
// Skip problematic files
|
|
}
|
|
}
|
|
|
|
console.log(`ISO-8859-1 corpus scan: ${iso88591Count}/${checkedCount} files use ISO-8859-1`);
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('corpus-iso88591', elapsed);
|
|
});
|
|
|
|
t.test('Character reference handling', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Test numeric character references for chars outside ISO-8859-1
|
|
const xmlContent = `<?xml version="1.0" encoding="ISO-8859-1"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<UBLVersionID>2.1</UBLVersionID>
|
|
<ID>CHAR-REF-TEST</ID>
|
|
<Note>Euro: € Em dash: — Ellipsis: …</Note>
|
|
<InvoiceLine>
|
|
<Note>Smart quotes: “Hello” ‘World’</Note>
|
|
<Item>
|
|
<Name>Trademark™ Product</Name>
|
|
<Description>Copyright © 2025</Description>
|
|
</Item>
|
|
</InvoiceLine>
|
|
</Invoice>`;
|
|
|
|
const iso88591Buffer = Buffer.from(xmlContent, 'latin1');
|
|
|
|
const einvoice = new EInvoice();
|
|
await einvoice.loadFromBuffer(iso88591Buffer);
|
|
|
|
const xmlString = einvoice.getXmlString();
|
|
// Character references should be preserved or converted
|
|
expect(xmlString).toMatch(/Euro:.*€|€/);
|
|
expect(xmlString).toMatch(/Copyright.*©|©/);
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('char-references', elapsed);
|
|
});
|
|
|
|
// Print performance summary
|
|
performanceTracker.printSummary();
|
|
|
|
// Performance assertions
|
|
const avgTime = performanceTracker.getAverageTime();
|
|
expect(avgTime).toBeLessThan(120); // ISO-8859-1 operations should be reasonably fast
|
|
});
|
|
|
|
tap.start(); |