einvoice/test/suite/einvoice_encoding/test.enc-08.mixed-content.ts

462 lines
16 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-08: Mixed Content Encoding - should handle mixed content (text and elements) correctly', async (t) => {
// ENC-08: Verify proper encoding of mixed content scenarios
// This test ensures text nodes, elements, CDATA, and comments are properly encoded together
const performanceTracker = new PerformanceTracker('ENC-08: Mixed Content');
const corpusLoader = new CorpusLoader();
t.test('Basic mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-BASIC-001</ID>
<Note>
This invoice includes <emphasis>important</emphasis> payment terms:
<term>Net 30 days</term> with <percentage>2%</percentage> early payment discount.
Please pay by <date>2025-02-25</date>.
</Note>
<PaymentTerms>
<Note>
Payment due in <days>30</days> days.
<condition>If paid within <days>10</days> days: <discount>2%</discount> discount</condition>
<condition>If paid after <days>30</days> days: <penalty>1.5%</penalty> interest</condition>
</Note>
</PaymentTerms>
<InvoiceLine>
<Note>
Item includes <quantity>10</quantity> units of <product>Widget A</product>
at <price currency="EUR">9.99</price> each.
Total: <total currency="EUR">99.90</total>
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify mixed content is preserved
expect(xmlString).toContain('This invoice includes');
expect(xmlString).toContain('<emphasis>important</emphasis>');
expect(xmlString).toContain('payment terms:');
expect(xmlString).toContain('<term>Net 30 days</term>');
expect(xmlString).toContain('with');
expect(xmlString).toContain('<percentage>2%</percentage>');
expect(xmlString).toContain('Please pay by');
expect(xmlString).toContain('<date>2025-02-25</date>');
// Verify nested mixed content
expect(xmlString).toContain('If paid within');
expect(xmlString).toContain('<days>10</days>');
expect(xmlString).toContain('days:');
expect(xmlString).toContain('<discount>2%</discount>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-mixed', elapsed);
});
t.test('Mixed content with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-SPECIAL-001</ID>
<Note>
Price: <amount>100.00</amount> (VAT <percentage>19%</percentage> = <vat>19.00</vat> )
Total: <total>119.00</total> for <company>Müller &amp; Söhne GmbH</company>
</Note>
<DocumentReference>
<DocumentDescription>
See contract <ref>§12.3</ref> for terms &amp; conditions.
<important>Payment &lt; 30 days</important> required.
Contact: <email>info@müller-söhne.de</email>
</DocumentDescription>
</DocumentReference>
<PaymentTerms>
<Note>
<condition type="discount"> 100 items 5% discount</condition>
<condition type="penalty">&gt; 30 days 1.5% interest</condition>
<formula>Total = Price × Quantity × (1 + VAT%)</formula>
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify special characters in mixed content
expect(xmlString).toContain('Price:');
expect(xmlString).toContain('€');
expect(xmlString).toContain('Müller &amp; Söhne GmbH');
expect(xmlString).toContain('§12.3');
expect(xmlString).toContain('terms &amp; conditions');
expect(xmlString).toContain('&lt; 30 days');
expect(xmlString).toContain('info@müller-söhne.de');
expect(xmlString).toContain('≥ 100 items → 5% discount');
expect(xmlString).toContain('&gt; 30 days → 1.5% interest');
expect(xmlString).toContain('×');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-mixed', elapsed);
});
t.test('Mixed content with CDATA sections', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-CDATA-001</ID>
<Note>
Regular text before CDATA.
<![CDATA[This section contains <unescaped> tags & special chars: < > & " ']]>
Text after CDATA with <element>nested element</element>.
</Note>
<AdditionalDocumentReference>
<DocumentDescription>
HTML content example:
<![CDATA[
<html>
<body>
<h1>Invoice Details</h1>
<p>Amount: 100.00</p>
<p>VAT: 19%</p>
</body>
</html>
]]>
End of description.
</DocumentDescription>
</AdditionalDocumentReference>
<PaymentTerms>
<Note>
Formula: <formula>price * quantity</formula>
<![CDATA[JavaScript: if (amount > 100) { discount = 5%; }]]>
Applied to all items.
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify mixed content with CDATA is handled
expect(xmlString).toContain('Regular text before CDATA');
expect(xmlString).toContain('Text after CDATA');
expect(xmlString).toContain('<element>nested element</element>');
// CDATA content should be preserved somehow
if (xmlString.includes('CDATA')) {
expect(xmlString).toContain('<![CDATA[');
expect(xmlString).toContain(']]>');
} else {
// Or converted to escaped text
expect(xmlString).toMatch(/&lt;unescaped&gt;|<unescaped>/);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('cdata-mixed', elapsed);
});
t.test('Mixed content with comments', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-COMMENTS-001</ID>
<Note>
<!-- Start of payment terms -->
Payment is due in <days>30</days> days.
<!-- Discount information follows -->
<discount>Early payment: 2% if paid within 10 days</discount>
<!-- End of payment terms -->
</Note>
<DocumentReference>
<DocumentDescription>
See attachment <!-- PDF document --> for details.
<attachment>invoice.pdf</attachment> <!-- 2 pages -->
Contact <!-- via email -->: <email>info@example.com</email>
</DocumentDescription>
</DocumentReference>
<InvoiceLine>
<!-- Line item 1 -->
<Note>
Product: <name>Widget</name> <!-- Best seller -->
Quantity: <qty>10</qty> <!-- In stock -->
Price: <price>9.99</price> <!-- EUR -->
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify text content is preserved (comments may or may not be preserved)
expect(xmlString).toContain('Payment is due in');
expect(xmlString).toContain('<days>30</days>');
expect(xmlString).toContain('days.');
expect(xmlString).toContain('<discount>Early payment: 2% if paid within 10 days</discount>');
expect(xmlString).toContain('See attachment');
expect(xmlString).toContain('for details.');
expect(xmlString).toContain('<attachment>invoice.pdf</attachment>');
expect(xmlString).toContain('Contact');
expect(xmlString).toContain('<email>info@example.com</email>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('comments-mixed', elapsed);
});
t.test('Whitespace preservation in mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-WHITESPACE-001</ID>
<Note>Text with multiple spaces and
newlines should be preserved.
<element>Indented element</element>
More text with tabs between words.
</Note>
<PaymentTerms>
<Note xml:space="preserve"> Leading spaces
<term>Net 30</term> Trailing spaces
Middle spaces preserved.
End with spaces </Note>
</PaymentTerms>
<DocumentReference>
<DocumentDescription>Line 1
<break/>
Line 2
<break/>
Line 3</DocumentDescription>
</DocumentReference>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Whitespace handling varies by implementation
expect(xmlString).toContain('Text with');
expect(xmlString).toContain('spaces');
expect(xmlString).toContain('<element>Indented element</element>');
expect(xmlString).toContain('More text with');
expect(xmlString).toContain('words');
// xml:space="preserve" should maintain whitespace
if (xmlString.includes('xml:space="preserve"')) {
expect(xmlString).toMatch(/Leading spaces|^\s+Leading/m);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('whitespace-mixed', elapsed);
});
t.test('Deeply nested mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-NESTED-001</ID>
<Note>
Level 1: Invoice for <customer>
<name>ABC Corp</name> (Customer ID: <id>C-12345</id>)
<address>
Located at <street>123 Main St</street>,
<city>New York</city>, <state>NY</state> <zip>10001</zip>
</address>
</customer> dated <date>2025-01-25</date>.
</Note>
<PaymentTerms>
<Note>
<terms>
Standard terms: <standard>
Net <days>30</days> days from <reference>
invoice date (<date>2025-01-25</date>)
</reference>
</standard>
<special>
Special conditions: <condition num="1">
For orders &gt; <amount currency="EUR">1000</amount>:
<discount>5%</discount> discount
</condition>
</special>
</terms>
</Note>
</PaymentTerms>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify deeply nested structure is preserved
expect(xmlString).toContain('Level 1: Invoice for');
expect(xmlString).toContain('<customer>');
expect(xmlString).toContain('<name>ABC Corp</name>');
expect(xmlString).toContain('(Customer ID:');
expect(xmlString).toContain('<id>C-12345</id>');
expect(xmlString).toContain('Located at');
expect(xmlString).toContain('<street>123 Main St</street>');
expect(xmlString).toContain('<city>New York</city>');
expect(xmlString).toContain('<state>NY</state>');
expect(xmlString).toContain('<zip>10001</zip>');
expect(xmlString).toContain('dated');
expect(xmlString).toContain('<date>2025-01-25</date>');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('nested-mixed', elapsed);
});
t.test('International mixed content', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>MIXED-INTL-001</ID>
<Note>
Invoice for <company lang="de">Müller GmbH</company> from <city>München</city>.
Total: <amount currency="EUR">1.234,56</amount> (inkl. <tax>19% MwSt</tax>).
<terms lang="zh">30</terms>
: <terms lang="ja">30</terms>
</Note>
<PaymentTerms>
<Note>
<multilang>
<en>Payment due in <days>30</days> days</en>
<de>Zahlung fällig in <days>30</days> Tagen</de>
<fr>Paiement dans <days>30</days> jours</fr>
<es>Pago debido en <days>30</days> días</es>
</multilang>
</Note>
</PaymentTerms>
<InvoiceLine>
<Note>
Product: <name lang="multi">
<en>Book</en> / <de>Buch</de> / <fr>Livre</fr> /
<zh></zh> / <ja></ja> / <ar>كتاب</ar>
</name>
Price: <price>25.00</price> per <unit>Stück</unit>
</Note>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify international mixed content
expect(xmlString).toContain('Müller GmbH');
expect(xmlString).toContain('München');
expect(xmlString).toContain('€1.234,56');
expect(xmlString).toContain('19% MwSt');
expect(xmlString).toContain('支付条款:');
expect(xmlString).toContain('30天内付款');
expect(xmlString).toContain('お支払い:');
expect(xmlString).toContain('30日以内');
expect(xmlString).toContain('Zahlung fällig in');
expect(xmlString).toContain('Tagen');
expect(xmlString).toContain('Paiement dû dans');
expect(xmlString).toContain('书');
expect(xmlString).toContain('本');
expect(xmlString).toContain('كتاب');
expect(xmlString).toContain('Stück');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('intl-mixed', elapsed);
});
t.test('Corpus mixed content analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
let mixedContentCount = 0;
const mixedContentExamples: string[] = [];
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Sample corpus for mixed content patterns
const sampleSize = Math.min(60, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Look for mixed content patterns
// Pattern: text followed by element followed by text within same parent
const mixedPattern = />([^<]+)<[^>]+>[^<]+<\/[^>]+>([^<]+)</;
if (mixedPattern.test(xmlString)) {
mixedContentCount++;
// Extract example
const match = xmlString.match(mixedPattern);
if (match && mixedContentExamples.length < 5) {
mixedContentExamples.push(`${file}: "${match[0].substring(0, 100)}..."`);
}
}
// Also check for CDATA sections
if (xmlString.includes('<![CDATA[')) {
if (!mixedContentExamples.some(ex => ex.includes('CDATA'))) {
mixedContentExamples.push(`${file}: Contains CDATA sections`);
}
}
processedCount++;
} catch (error) {
console.log(`Mixed content parsing issue in ${file}:`, error.message);
}
}
console.log(`Mixed content corpus analysis (${processedCount} files):`);
console.log(`- Files with mixed content patterns: ${mixedContentCount}`);
if (mixedContentExamples.length > 0) {
console.log('Mixed content examples:');
mixedContentExamples.forEach(ex => console.log(` ${ex}`));
}
expect(processedCount).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-mixed', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(150); // Mixed content operations may be slightly slower
});
tap.start();