einvoice/test/suite/einvoice_encoding/test.enc-04.character-escaping.ts

371 lines
14 KiB
TypeScript

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-04: Character Escaping - should handle XML character escaping correctly', async (t) => {
// ENC-04: Verify proper escaping and unescaping of special XML characters
// This test ensures XML entities and special characters are handled correctly
const performanceTracker = new PerformanceTracker('ENC-04: Character Escaping');
const corpusLoader = new CorpusLoader();
t.test('Basic XML entity escaping', async () => {
const startTime = performance.now();
// Test the five predefined XML entities
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ESCAPE-TEST-001</ID>
<IssueDate>2025-01-25</IssueDate>
<Note>Test &amp; verify: &lt;invoice&gt; with "quotes" &amp; 'apostrophes'</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name>Smith &amp; Jones Ltd.</Name>
</PartyName>
<Contact>
<ElectronicMail>info@smith&amp;jones.com</ElectronicMail>
</Contact>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note>Terms: 2/10 net 30 (2% if paid &lt;= 10 days)</Note>
</PaymentTerms>
<InvoiceLine>
<Note>Price comparison: USD &lt; EUR &gt; GBP</Note>
<Item>
<Description>Product "A" &amp; Product 'B'</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const invoiceData = einvoice.getInvoiceData();
const xmlString = einvoice.getXmlString();
// Verify entities are properly escaped in output
expect(xmlString).toContain('Smith &amp; Jones Ltd.');
expect(xmlString).toContain('info@smith&amp;jones.com');
expect(xmlString).toContain('2% if paid &lt;= 10 days');
expect(xmlString).toContain('USD &lt; EUR &gt; GBP');
expect(xmlString).toContain('Product "A" &amp; Product \'B\'');
// Verify data is unescaped when accessed
if (invoiceData?.notes) {
expect(invoiceData.notes[0]).toContain('Test & verify: <invoice> with "quotes" & \'apostrophes\'');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-escaping', elapsed);
});
t.test('Numeric character references', async () => {
const startTime = performance.now();
// Test decimal and hexadecimal character references
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>NUMERIC-REF-TEST</ID>
<Note>Decimal refs: &#8364; &#163; &#165; &#8482;</Note>
<PaymentMeans>
<InstructionNote>Hex refs: &#x20AC; &#x00A3; &#x00A5; &#x2122;</InstructionNote>
</PaymentMeans>
<InvoiceLine>
<Note>Mixed: &#169; 2025 &#x2014; All rights reserved&#x2122;</Note>
<Item>
<Name>Special chars: &#8211; &#8212; &#8230; &#8220;quoted&#8221;</Name>
<Description>Math: &#8804; &#8805; &#8800; &#177; &#247; &#215;</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify numeric references are preserved or converted correctly
// The implementation might convert them to actual characters or preserve as entities
expect(xmlString).toMatch(/€|&#8364;|&#x20AC;/); // Euro
expect(xmlString).toMatch(/£|&#163;|&#x00A3;/); // Pound
expect(xmlString).toMatch(/¥|&#165;|&#x00A5;/); // Yen
expect(xmlString).toMatch(/™|&#8482;|&#x2122;/); // Trademark
expect(xmlString).toMatch(/©|&#169;/); // Copyright
expect(xmlString).toMatch(/—|&#8212;|&#x2014;/); // Em dash
expect(xmlString).toMatch(/"|&#8220;/); // Left quote
expect(xmlString).toMatch(/"|&#8221;/); // Right quote
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('numeric-refs', elapsed);
});
t.test('Attribute value escaping', async () => {
const startTime = performance.now();
// Test escaping in attribute values
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-ESCAPE-TEST</ID>
<PaymentMeans>
<PaymentMeansCode name="Bank &amp; Wire Transfer">30</PaymentMeansCode>
<PaymentID type="Order &lt;123&gt;">REF-2025-001</PaymentID>
<InstructionNote condition='If amount &gt; 1000 &amp; currency = "EUR"'>Special handling required</InstructionNote>
</PaymentMeans>
<TaxTotal>
<TaxAmount currencyID="EUR" note="Amount includes 19% VAT &amp; fees">119.00</TaxAmount>
</TaxTotal>
<InvoiceLine>
<DocumentReference>
<ID schemeID="Item's &quot;special&quot; code">ITEM-001</ID>
<DocumentDescription>Product with 'quotes' &amp; "double quotes"</DocumentDescription>
</DocumentReference>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify attributes are properly escaped
expect(xmlString).toMatch(/name="Bank &amp; Wire Transfer"|name='Bank &amp; Wire Transfer'/);
expect(xmlString).toMatch(/type="Order &lt;123&gt;"|type='Order &lt;123&gt;'/);
expect(xmlString).toContain('&amp;');
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
// Quotes in attributes should be escaped
expect(xmlString).toMatch(/&quot;|'/); // Quotes should be escaped or use different quote style
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('attribute-escaping', elapsed);
});
t.test('CDATA sections with special characters', async () => {
const startTime = performance.now();
// Test CDATA sections that don't need escaping
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>CDATA-ESCAPE-TEST</ID>
<Note><![CDATA[Special characters: < > & " ' without escaping]]></Note>
<PaymentTerms>
<Note><![CDATA[HTML content: <p>Payment terms: <b>30 days</b> net</p>]]></Note>
</PaymentTerms>
<AdditionalDocumentReference>
<ID>SCRIPT-001</ID>
<DocumentDescription><![CDATA[
JavaScript example:
if (amount > 100 && currency == "EUR") {
discount = amount * 0.05;
}
]]></DocumentDescription>
</AdditionalDocumentReference>
<InvoiceLine>
<Note><![CDATA[Price formula: if quantity >= 10 then price < 50.00]]></Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// CDATA content should be preserved
if (xmlString.includes('CDATA')) {
expect(xmlString).toContain('<![CDATA[');
expect(xmlString).toContain(']]>');
// Inside CDATA, characters are not escaped
expect(xmlString).toMatch(/<!\[CDATA\[.*[<>&].*\]\]>/);
} else {
// If CDATA is converted to text, it should be escaped
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
expect(xmlString).toContain('&amp;');
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cdata-escaping', elapsed);
});
t.test('Invalid character handling', async () => {
const startTime = performance.now();
// Test handling of characters that are invalid in XML
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>INVALID-CHAR-TEST</ID>
<Note>Control chars: &#x0; &#x1; &#x8; &#xB; &#xC; &#xE; &#x1F;</Note>
<PaymentTerms>
<Note>Valid controls: &#x9; &#xA; &#xD; (tab, LF, CR)</Note>
</PaymentTerms>
<InvoiceLine>
<Note>High Unicode: &#x10000; &#x10FFFF;</Note>
<Item>
<Description>Surrogate pairs: &#xD800; &#xDFFF; (invalid)</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
try {
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Valid control characters should be preserved
expect(xmlString).toMatch(/&#x9;| /); // Tab
expect(xmlString).toMatch(/&#xA;|\n/); // Line feed
expect(xmlString).toMatch(/&#xD;|\r/); // Carriage return
// Invalid characters might be filtered or cause errors
// Implementation specific behavior
} catch (error) {
// Some parsers reject invalid character references
console.log('Invalid character handling:', error.message);
expect(error.message).toMatch(/invalid.*character|character.*reference/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('invalid-chars', elapsed);
});
t.test('Mixed content escaping', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-ESCAPE-TEST</ID>
<Note>Regular text with &amp; ampersand</Note>
<PaymentTerms>
<Note><![CDATA[CDATA with <b>tags</b> & ampersands]]></Note>
<SettlementPeriod>
<Description>Payment due in &lt; 30 days</Description>
<DurationMeasure unitCode="DAY">30</DurationMeasure>
</SettlementPeriod>
</PaymentTerms>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason>Discount for orders &gt; &#8364;1000</AllowanceChargeReason>
<Amount currencyID="EUR">50.00</Amount>
</AllowanceCharge>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Mixed content should maintain proper escaping
expect(xmlString).toContain('&amp;');
expect(xmlString).toContain('&lt;');
expect(xmlString).toContain('&gt;');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('mixed-escaping', elapsed);
});
t.test('Corpus escaping validation', async () => {
const startTime = performance.now();
let processedCount = 0;
let escapedCount = 0;
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Check sample for proper escaping
const sampleSize = Math.min(50, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
const einvoice = new EInvoice();
if (typeof content === 'string') {
await einvoice.loadFromString(content);
} else {
await einvoice.loadFromBuffer(content);
}
const xmlString = einvoice.getXmlString();
// Check for proper escaping
if (xmlString.includes('&amp;') ||
xmlString.includes('&lt;') ||
xmlString.includes('&gt;') ||
xmlString.includes('&quot;') ||
xmlString.includes('&apos;') ||
xmlString.includes('&#')) {
escapedCount++;
}
// Verify XML is well-formed after escaping
expect(xmlString).toBeTruthy();
expect(xmlString.includes('<?xml')).toBeTrue();
processedCount++;
} catch (error) {
console.log(`Escaping issue in ${file}:`, error.message);
}
}
console.log(`Corpus escaping test: ${escapedCount}/${processedCount} files contain escaped characters`);
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-escaping', elapsed);
});
t.test('Security: XML entity expansion', async () => {
const startTime = performance.now();
// Test protection against XML entity expansion attacks
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Invoice [
<!ENTITY lol "lol">
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
]>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ENTITY-EXPANSION-TEST</ID>
<Note>&lol3;</Note>
</Invoice>`;
const einvoice = new EInvoice();
try {
await einvoice.loadFromString(xmlContent);
// If entity expansion is allowed, check it's limited
const xmlString = einvoice.getXmlString();
expect(xmlString.length).toBeLessThan(1000000); // Should not explode in size
} catch (error) {
// Good - entity expansion might be blocked
console.log('Entity expansion protection:', error.message);
expect(error.message).toMatch(/entity|expansion|security/i);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('entity-expansion', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(100); // Escaping operations should be fast
});
tap.start();