462 lines
16 KiB
TypeScript
462 lines
16 KiB
TypeScript
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|||
|
import * as plugins from '../plugins.js';
|
|||
|
import { EInvoice } from '../../../ts/index.js';
|
|||
|
import { CorpusLoader } from '../corpus.loader.js';
|
|||
|
import { PerformanceTracker } from '../performance.tracker.js';
|
|||
|
|
|||
|
tap.test('ENC-08: Mixed Content Encoding - should handle mixed content (text and elements) correctly', async (t) => {
|
|||
|
// ENC-08: Verify proper encoding of mixed content scenarios
|
|||
|
// This test ensures text nodes, elements, CDATA, and comments are properly encoded together
|
|||
|
|
|||
|
const performanceTracker = new PerformanceTracker('ENC-08: Mixed Content');
|
|||
|
const corpusLoader = new CorpusLoader();
|
|||
|
|
|||
|
t.test('Basic mixed content', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-BASIC-001</ID>
|
|||
|
<Note>
|
|||
|
This invoice includes <emphasis>important</emphasis> payment terms:
|
|||
|
<term>Net 30 days</term> with <percentage>2%</percentage> early payment discount.
|
|||
|
Please pay by <date>2025-02-25</date>.
|
|||
|
</Note>
|
|||
|
<PaymentTerms>
|
|||
|
<Note>
|
|||
|
Payment due in <days>30</days> days.
|
|||
|
<condition>If paid within <days>10</days> days: <discount>2%</discount> discount</condition>
|
|||
|
<condition>If paid after <days>30</days> days: <penalty>1.5%</penalty> interest</condition>
|
|||
|
</Note>
|
|||
|
</PaymentTerms>
|
|||
|
<InvoiceLine>
|
|||
|
<Note>
|
|||
|
Item includes <quantity>10</quantity> units of <product>Widget A</product>
|
|||
|
at <price currency="EUR">€9.99</price> each.
|
|||
|
Total: <total currency="EUR">€99.90</total>
|
|||
|
</Note>
|
|||
|
</InvoiceLine>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify mixed content is preserved
|
|||
|
expect(xmlString).toContain('This invoice includes');
|
|||
|
expect(xmlString).toContain('<emphasis>important</emphasis>');
|
|||
|
expect(xmlString).toContain('payment terms:');
|
|||
|
expect(xmlString).toContain('<term>Net 30 days</term>');
|
|||
|
expect(xmlString).toContain('with');
|
|||
|
expect(xmlString).toContain('<percentage>2%</percentage>');
|
|||
|
expect(xmlString).toContain('Please pay by');
|
|||
|
expect(xmlString).toContain('<date>2025-02-25</date>');
|
|||
|
|
|||
|
// Verify nested mixed content
|
|||
|
expect(xmlString).toContain('If paid within');
|
|||
|
expect(xmlString).toContain('<days>10</days>');
|
|||
|
expect(xmlString).toContain('days:');
|
|||
|
expect(xmlString).toContain('<discount>2%</discount>');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('basic-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Mixed content with special characters', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-SPECIAL-001</ID>
|
|||
|
<Note>
|
|||
|
Price: <amount>100.00</amount> € (VAT <percentage>19%</percentage> = <vat>19.00</vat> €)
|
|||
|
Total: <total>119.00</total> € for <company>Müller & Söhne GmbH</company>
|
|||
|
</Note>
|
|||
|
<DocumentReference>
|
|||
|
<DocumentDescription>
|
|||
|
See contract <ref>§12.3</ref> for terms & conditions.
|
|||
|
<important>Payment < 30 days</important> required.
|
|||
|
Contact: <email>info@müller-söhne.de</email>
|
|||
|
</DocumentDescription>
|
|||
|
</DocumentReference>
|
|||
|
<PaymentTerms>
|
|||
|
<Note>
|
|||
|
<condition type="discount">≥ 100 items → 5% discount</condition>
|
|||
|
<condition type="penalty">> 30 days → 1.5% interest</condition>
|
|||
|
<formula>Total = Price × Quantity × (1 + VAT%)</formula>
|
|||
|
</Note>
|
|||
|
</PaymentTerms>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify special characters in mixed content
|
|||
|
expect(xmlString).toContain('Price:');
|
|||
|
expect(xmlString).toContain('€');
|
|||
|
expect(xmlString).toContain('Müller & Söhne GmbH');
|
|||
|
expect(xmlString).toContain('§12.3');
|
|||
|
expect(xmlString).toContain('terms & conditions');
|
|||
|
expect(xmlString).toContain('< 30 days');
|
|||
|
expect(xmlString).toContain('info@müller-söhne.de');
|
|||
|
expect(xmlString).toContain('≥ 100 items → 5% discount');
|
|||
|
expect(xmlString).toContain('> 30 days → 1.5% interest');
|
|||
|
expect(xmlString).toContain('×');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('special-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Mixed content with CDATA sections', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-CDATA-001</ID>
|
|||
|
<Note>
|
|||
|
Regular text before CDATA.
|
|||
|
<![CDATA[This section contains <unescaped> tags & special chars: < > & " ']]>
|
|||
|
Text after CDATA with <element>nested element</element>.
|
|||
|
</Note>
|
|||
|
<AdditionalDocumentReference>
|
|||
|
<DocumentDescription>
|
|||
|
HTML content example:
|
|||
|
<![CDATA[
|
|||
|
<html>
|
|||
|
<body>
|
|||
|
<h1>Invoice Details</h1>
|
|||
|
<p>Amount: €100.00</p>
|
|||
|
<p>VAT: 19%</p>
|
|||
|
</body>
|
|||
|
</html>
|
|||
|
]]>
|
|||
|
End of description.
|
|||
|
</DocumentDescription>
|
|||
|
</AdditionalDocumentReference>
|
|||
|
<PaymentTerms>
|
|||
|
<Note>
|
|||
|
Formula: <formula>price * quantity</formula>
|
|||
|
<![CDATA[JavaScript: if (amount > 100) { discount = 5%; }]]>
|
|||
|
Applied to all items.
|
|||
|
</Note>
|
|||
|
</PaymentTerms>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify mixed content with CDATA is handled
|
|||
|
expect(xmlString).toContain('Regular text before CDATA');
|
|||
|
expect(xmlString).toContain('Text after CDATA');
|
|||
|
expect(xmlString).toContain('<element>nested element</element>');
|
|||
|
|
|||
|
// CDATA content should be preserved somehow
|
|||
|
if (xmlString.includes('CDATA')) {
|
|||
|
expect(xmlString).toContain('<![CDATA[');
|
|||
|
expect(xmlString).toContain(']]>');
|
|||
|
} else {
|
|||
|
// Or converted to escaped text
|
|||
|
expect(xmlString).toMatch(/<unescaped>|<unescaped>/);
|
|||
|
}
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('cdata-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Mixed content with comments', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-COMMENTS-001</ID>
|
|||
|
<Note>
|
|||
|
<!-- Start of payment terms -->
|
|||
|
Payment is due in <days>30</days> days.
|
|||
|
<!-- Discount information follows -->
|
|||
|
<discount>Early payment: 2% if paid within 10 days</discount>
|
|||
|
<!-- End of payment terms -->
|
|||
|
</Note>
|
|||
|
<DocumentReference>
|
|||
|
<DocumentDescription>
|
|||
|
See attachment <!-- PDF document --> for details.
|
|||
|
<attachment>invoice.pdf</attachment> <!-- 2 pages -->
|
|||
|
Contact <!-- via email -->: <email>info@example.com</email>
|
|||
|
</DocumentDescription>
|
|||
|
</DocumentReference>
|
|||
|
<InvoiceLine>
|
|||
|
<!-- Line item 1 -->
|
|||
|
<Note>
|
|||
|
Product: <name>Widget</name> <!-- Best seller -->
|
|||
|
Quantity: <qty>10</qty> <!-- In stock -->
|
|||
|
Price: <price>9.99</price> <!-- EUR -->
|
|||
|
</Note>
|
|||
|
</InvoiceLine>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify text content is preserved (comments may or may not be preserved)
|
|||
|
expect(xmlString).toContain('Payment is due in');
|
|||
|
expect(xmlString).toContain('<days>30</days>');
|
|||
|
expect(xmlString).toContain('days.');
|
|||
|
expect(xmlString).toContain('<discount>Early payment: 2% if paid within 10 days</discount>');
|
|||
|
expect(xmlString).toContain('See attachment');
|
|||
|
expect(xmlString).toContain('for details.');
|
|||
|
expect(xmlString).toContain('<attachment>invoice.pdf</attachment>');
|
|||
|
expect(xmlString).toContain('Contact');
|
|||
|
expect(xmlString).toContain('<email>info@example.com</email>');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('comments-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Whitespace preservation in mixed content', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-WHITESPACE-001</ID>
|
|||
|
<Note>Text with multiple spaces and
|
|||
|
newlines should be preserved.
|
|||
|
<element>Indented element</element>
|
|||
|
More text with tabs between words.
|
|||
|
</Note>
|
|||
|
<PaymentTerms>
|
|||
|
<Note xml:space="preserve"> Leading spaces
|
|||
|
<term>Net 30</term> Trailing spaces
|
|||
|
Middle spaces preserved.
|
|||
|
End with spaces </Note>
|
|||
|
</PaymentTerms>
|
|||
|
<DocumentReference>
|
|||
|
<DocumentDescription>Line 1
|
|||
|
<break/>
|
|||
|
Line 2
|
|||
|
<break/>
|
|||
|
Line 3</DocumentDescription>
|
|||
|
</DocumentReference>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Whitespace handling varies by implementation
|
|||
|
expect(xmlString).toContain('Text with');
|
|||
|
expect(xmlString).toContain('spaces');
|
|||
|
expect(xmlString).toContain('<element>Indented element</element>');
|
|||
|
expect(xmlString).toContain('More text with');
|
|||
|
expect(xmlString).toContain('words');
|
|||
|
|
|||
|
// xml:space="preserve" should maintain whitespace
|
|||
|
if (xmlString.includes('xml:space="preserve"')) {
|
|||
|
expect(xmlString).toMatch(/Leading spaces|^\s+Leading/m);
|
|||
|
}
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('whitespace-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Deeply nested mixed content', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-NESTED-001</ID>
|
|||
|
<Note>
|
|||
|
Level 1: Invoice for <customer>
|
|||
|
<name>ABC Corp</name> (Customer ID: <id>C-12345</id>)
|
|||
|
<address>
|
|||
|
Located at <street>123 Main St</street>,
|
|||
|
<city>New York</city>, <state>NY</state> <zip>10001</zip>
|
|||
|
</address>
|
|||
|
</customer> dated <date>2025-01-25</date>.
|
|||
|
</Note>
|
|||
|
<PaymentTerms>
|
|||
|
<Note>
|
|||
|
<terms>
|
|||
|
Standard terms: <standard>
|
|||
|
Net <days>30</days> days from <reference>
|
|||
|
invoice date (<date>2025-01-25</date>)
|
|||
|
</reference>
|
|||
|
</standard>
|
|||
|
<special>
|
|||
|
Special conditions: <condition num="1">
|
|||
|
For orders > <amount currency="EUR">€1000</amount>:
|
|||
|
<discount>5%</discount> discount
|
|||
|
</condition>
|
|||
|
</special>
|
|||
|
</terms>
|
|||
|
</Note>
|
|||
|
</PaymentTerms>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify deeply nested structure is preserved
|
|||
|
expect(xmlString).toContain('Level 1: Invoice for');
|
|||
|
expect(xmlString).toContain('<customer>');
|
|||
|
expect(xmlString).toContain('<name>ABC Corp</name>');
|
|||
|
expect(xmlString).toContain('(Customer ID:');
|
|||
|
expect(xmlString).toContain('<id>C-12345</id>');
|
|||
|
expect(xmlString).toContain('Located at');
|
|||
|
expect(xmlString).toContain('<street>123 Main St</street>');
|
|||
|
expect(xmlString).toContain('<city>New York</city>');
|
|||
|
expect(xmlString).toContain('<state>NY</state>');
|
|||
|
expect(xmlString).toContain('<zip>10001</zip>');
|
|||
|
expect(xmlString).toContain('dated');
|
|||
|
expect(xmlString).toContain('<date>2025-01-25</date>');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('nested-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('International mixed content', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|||
|
<UBLVersionID>2.1</UBLVersionID>
|
|||
|
<ID>MIXED-INTL-001</ID>
|
|||
|
<Note>
|
|||
|
Invoice for <company lang="de">Müller GmbH</company> from <city>München</city>.
|
|||
|
Total: <amount currency="EUR">€1.234,56</amount> (inkl. <tax>19% MwSt</tax>).
|
|||
|
支付条款:<terms lang="zh">30天内付款</terms>。
|
|||
|
お支払い: <terms lang="ja">30日以内</terms>。
|
|||
|
</Note>
|
|||
|
<PaymentTerms>
|
|||
|
<Note>
|
|||
|
<multilang>
|
|||
|
<en>Payment due in <days>30</days> days</en>
|
|||
|
<de>Zahlung fällig in <days>30</days> Tagen</de>
|
|||
|
<fr>Paiement dû dans <days>30</days> jours</fr>
|
|||
|
<es>Pago debido en <days>30</days> días</es>
|
|||
|
</multilang>
|
|||
|
</Note>
|
|||
|
</PaymentTerms>
|
|||
|
<InvoiceLine>
|
|||
|
<Note>
|
|||
|
Product: <name lang="multi">
|
|||
|
<en>Book</en> / <de>Buch</de> / <fr>Livre</fr> /
|
|||
|
<zh>书</zh> / <ja>本</ja> / <ar>كتاب</ar>
|
|||
|
</name>
|
|||
|
Price: <price>€25.00</price> per <unit>Stück</unit>
|
|||
|
</Note>
|
|||
|
</InvoiceLine>
|
|||
|
</Invoice>`;
|
|||
|
|
|||
|
const einvoice = new EInvoice();
|
|||
|
await einvoice.loadFromString(xmlContent);
|
|||
|
|
|||
|
const xmlString = einvoice.getXmlString();
|
|||
|
|
|||
|
// Verify international mixed content
|
|||
|
expect(xmlString).toContain('Müller GmbH');
|
|||
|
expect(xmlString).toContain('München');
|
|||
|
expect(xmlString).toContain('€1.234,56');
|
|||
|
expect(xmlString).toContain('19% MwSt');
|
|||
|
expect(xmlString).toContain('支付条款:');
|
|||
|
expect(xmlString).toContain('30天内付款');
|
|||
|
expect(xmlString).toContain('お支払い:');
|
|||
|
expect(xmlString).toContain('30日以内');
|
|||
|
expect(xmlString).toContain('Zahlung fällig in');
|
|||
|
expect(xmlString).toContain('Tagen');
|
|||
|
expect(xmlString).toContain('Paiement dû dans');
|
|||
|
expect(xmlString).toContain('书');
|
|||
|
expect(xmlString).toContain('本');
|
|||
|
expect(xmlString).toContain('كتاب');
|
|||
|
expect(xmlString).toContain('Stück');
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('intl-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
t.test('Corpus mixed content analysis', async () => {
|
|||
|
const startTime = performance.now();
|
|||
|
let processedCount = 0;
|
|||
|
let mixedContentCount = 0;
|
|||
|
const mixedContentExamples: string[] = [];
|
|||
|
|
|||
|
const files = await corpusLoader.getAllFiles();
|
|||
|
const xmlFiles = files.filter(f => f.endsWith('.xml'));
|
|||
|
|
|||
|
// Sample corpus for mixed content patterns
|
|||
|
const sampleSize = Math.min(60, xmlFiles.length);
|
|||
|
const sample = xmlFiles.slice(0, sampleSize);
|
|||
|
|
|||
|
for (const file of sample) {
|
|||
|
try {
|
|||
|
const content = await corpusLoader.readFile(file);
|
|||
|
let xmlString: string;
|
|||
|
|
|||
|
if (Buffer.isBuffer(content)) {
|
|||
|
xmlString = content.toString('utf8');
|
|||
|
} else {
|
|||
|
xmlString = content;
|
|||
|
}
|
|||
|
|
|||
|
// Look for mixed content patterns
|
|||
|
// Pattern: text followed by element followed by text within same parent
|
|||
|
const mixedPattern = />([^<]+)<[^>]+>[^<]+<\/[^>]+>([^<]+)</;
|
|||
|
if (mixedPattern.test(xmlString)) {
|
|||
|
mixedContentCount++;
|
|||
|
|
|||
|
// Extract example
|
|||
|
const match = xmlString.match(mixedPattern);
|
|||
|
if (match && mixedContentExamples.length < 5) {
|
|||
|
mixedContentExamples.push(`${file}: "${match[0].substring(0, 100)}..."`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Also check for CDATA sections
|
|||
|
if (xmlString.includes('<![CDATA[')) {
|
|||
|
if (!mixedContentExamples.some(ex => ex.includes('CDATA'))) {
|
|||
|
mixedContentExamples.push(`${file}: Contains CDATA sections`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
processedCount++;
|
|||
|
} catch (error) {
|
|||
|
console.log(`Mixed content parsing issue in ${file}:`, error.message);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
console.log(`Mixed content corpus analysis (${processedCount} files):`);
|
|||
|
console.log(`- Files with mixed content patterns: ${mixedContentCount}`);
|
|||
|
if (mixedContentExamples.length > 0) {
|
|||
|
console.log('Mixed content examples:');
|
|||
|
mixedContentExamples.forEach(ex => console.log(` ${ex}`));
|
|||
|
}
|
|||
|
|
|||
|
expect(processedCount).toBeGreaterThan(0);
|
|||
|
|
|||
|
const elapsed = performance.now() - startTime;
|
|||
|
performanceTracker.addMeasurement('corpus-mixed', elapsed);
|
|||
|
});
|
|||
|
|
|||
|
// Print performance summary
|
|||
|
performanceTracker.printSummary();
|
|||
|
|
|||
|
// Performance assertions
|
|||
|
const avgTime = performanceTracker.getAverageTime();
|
|||
|
expect(avgTime).toBeLessThan(150); // Mixed content operations may be slightly slower
|
|||
|
});
|
|||
|
|
|||
|
tap.start();
|