462 lines
16 KiB
TypeScript
462 lines
16 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
import * as plugins from '../plugins.js';
|
||
import { EInvoice } from '../../../ts/index.js';
|
||
import { CorpusLoader } from '../corpus.loader.js';
|
||
import { PerformanceTracker } from '../performance.tracker.js';
|
||
|
||
tap.test('ENC-08: Mixed Content Encoding - should handle mixed content (text and elements) correctly', async (t) => {
|
||
// ENC-08: Verify proper encoding of mixed content scenarios
|
||
// This test ensures text nodes, elements, CDATA, and comments are properly encoded together
|
||
|
||
const performanceTracker = new PerformanceTracker('ENC-08: Mixed Content');
|
||
const corpusLoader = new CorpusLoader();
|
||
|
||
t.test('Basic mixed content', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-BASIC-001</ID>
|
||
<Note>
|
||
This invoice includes <emphasis>important</emphasis> payment terms:
|
||
<term>Net 30 days</term> with <percentage>2%</percentage> early payment discount.
|
||
Please pay by <date>2025-02-25</date>.
|
||
</Note>
|
||
<PaymentTerms>
|
||
<Note>
|
||
Payment due in <days>30</days> days.
|
||
<condition>If paid within <days>10</days> days: <discount>2%</discount> discount</condition>
|
||
<condition>If paid after <days>30</days> days: <penalty>1.5%</penalty> interest</condition>
|
||
</Note>
|
||
</PaymentTerms>
|
||
<InvoiceLine>
|
||
<Note>
|
||
Item includes <quantity>10</quantity> units of <product>Widget A</product>
|
||
at <price currency="EUR">€9.99</price> each.
|
||
Total: <total currency="EUR">€99.90</total>
|
||
</Note>
|
||
</InvoiceLine>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify mixed content is preserved
|
||
expect(xmlString).toContain('This invoice includes');
|
||
expect(xmlString).toContain('<emphasis>important</emphasis>');
|
||
expect(xmlString).toContain('payment terms:');
|
||
expect(xmlString).toContain('<term>Net 30 days</term>');
|
||
expect(xmlString).toContain('with');
|
||
expect(xmlString).toContain('<percentage>2%</percentage>');
|
||
expect(xmlString).toContain('Please pay by');
|
||
expect(xmlString).toContain('<date>2025-02-25</date>');
|
||
|
||
// Verify nested mixed content
|
||
expect(xmlString).toContain('If paid within');
|
||
expect(xmlString).toContain('<days>10</days>');
|
||
expect(xmlString).toContain('days:');
|
||
expect(xmlString).toContain('<discount>2%</discount>');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('basic-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Mixed content with special characters', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-SPECIAL-001</ID>
|
||
<Note>
|
||
Price: <amount>100.00</amount> € (VAT <percentage>19%</percentage> = <vat>19.00</vat> €)
|
||
Total: <total>119.00</total> € for <company>Müller & Söhne GmbH</company>
|
||
</Note>
|
||
<DocumentReference>
|
||
<DocumentDescription>
|
||
See contract <ref>§12.3</ref> for terms & conditions.
|
||
<important>Payment < 30 days</important> required.
|
||
Contact: <email>info@müller-söhne.de</email>
|
||
</DocumentDescription>
|
||
</DocumentReference>
|
||
<PaymentTerms>
|
||
<Note>
|
||
<condition type="discount">≥ 100 items → 5% discount</condition>
|
||
<condition type="penalty">> 30 days → 1.5% interest</condition>
|
||
<formula>Total = Price × Quantity × (1 + VAT%)</formula>
|
||
</Note>
|
||
</PaymentTerms>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify special characters in mixed content
|
||
expect(xmlString).toContain('Price:');
|
||
expect(xmlString).toContain('€');
|
||
expect(xmlString).toContain('Müller & Söhne GmbH');
|
||
expect(xmlString).toContain('§12.3');
|
||
expect(xmlString).toContain('terms & conditions');
|
||
expect(xmlString).toContain('< 30 days');
|
||
expect(xmlString).toContain('info@müller-söhne.de');
|
||
expect(xmlString).toContain('≥ 100 items → 5% discount');
|
||
expect(xmlString).toContain('> 30 days → 1.5% interest');
|
||
expect(xmlString).toContain('×');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('special-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Mixed content with CDATA sections', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-CDATA-001</ID>
|
||
<Note>
|
||
Regular text before CDATA.
|
||
<![CDATA[This section contains <unescaped> tags & special chars: < > & " ']]>
|
||
Text after CDATA with <element>nested element</element>.
|
||
</Note>
|
||
<AdditionalDocumentReference>
|
||
<DocumentDescription>
|
||
HTML content example:
|
||
<![CDATA[
|
||
<html>
|
||
<body>
|
||
<h1>Invoice Details</h1>
|
||
<p>Amount: €100.00</p>
|
||
<p>VAT: 19%</p>
|
||
</body>
|
||
</html>
|
||
]]>
|
||
End of description.
|
||
</DocumentDescription>
|
||
</AdditionalDocumentReference>
|
||
<PaymentTerms>
|
||
<Note>
|
||
Formula: <formula>price * quantity</formula>
|
||
<![CDATA[JavaScript: if (amount > 100) { discount = 5%; }]]>
|
||
Applied to all items.
|
||
</Note>
|
||
</PaymentTerms>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify mixed content with CDATA is handled
|
||
expect(xmlString).toContain('Regular text before CDATA');
|
||
expect(xmlString).toContain('Text after CDATA');
|
||
expect(xmlString).toContain('<element>nested element</element>');
|
||
|
||
// CDATA content should be preserved somehow
|
||
if (xmlString.includes('CDATA')) {
|
||
expect(xmlString).toContain('<![CDATA[');
|
||
expect(xmlString).toContain(']]>');
|
||
} else {
|
||
// Or converted to escaped text
|
||
expect(xmlString).toMatch(/<unescaped>|<unescaped>/);
|
||
}
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('cdata-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Mixed content with comments', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-COMMENTS-001</ID>
|
||
<Note>
|
||
<!-- Start of payment terms -->
|
||
Payment is due in <days>30</days> days.
|
||
<!-- Discount information follows -->
|
||
<discount>Early payment: 2% if paid within 10 days</discount>
|
||
<!-- End of payment terms -->
|
||
</Note>
|
||
<DocumentReference>
|
||
<DocumentDescription>
|
||
See attachment <!-- PDF document --> for details.
|
||
<attachment>invoice.pdf</attachment> <!-- 2 pages -->
|
||
Contact <!-- via email -->: <email>info@example.com</email>
|
||
</DocumentDescription>
|
||
</DocumentReference>
|
||
<InvoiceLine>
|
||
<!-- Line item 1 -->
|
||
<Note>
|
||
Product: <name>Widget</name> <!-- Best seller -->
|
||
Quantity: <qty>10</qty> <!-- In stock -->
|
||
Price: <price>9.99</price> <!-- EUR -->
|
||
</Note>
|
||
</InvoiceLine>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify text content is preserved (comments may or may not be preserved)
|
||
expect(xmlString).toContain('Payment is due in');
|
||
expect(xmlString).toContain('<days>30</days>');
|
||
expect(xmlString).toContain('days.');
|
||
expect(xmlString).toContain('<discount>Early payment: 2% if paid within 10 days</discount>');
|
||
expect(xmlString).toContain('See attachment');
|
||
expect(xmlString).toContain('for details.');
|
||
expect(xmlString).toContain('<attachment>invoice.pdf</attachment>');
|
||
expect(xmlString).toContain('Contact');
|
||
expect(xmlString).toContain('<email>info@example.com</email>');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('comments-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Whitespace preservation in mixed content', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-WHITESPACE-001</ID>
|
||
<Note>Text with multiple spaces and
|
||
newlines should be preserved.
|
||
<element>Indented element</element>
|
||
More text with tabs between words.
|
||
</Note>
|
||
<PaymentTerms>
|
||
<Note xml:space="preserve"> Leading spaces
|
||
<term>Net 30</term> Trailing spaces
|
||
Middle spaces preserved.
|
||
End with spaces </Note>
|
||
</PaymentTerms>
|
||
<DocumentReference>
|
||
<DocumentDescription>Line 1
|
||
<break/>
|
||
Line 2
|
||
<break/>
|
||
Line 3</DocumentDescription>
|
||
</DocumentReference>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Whitespace handling varies by implementation
|
||
expect(xmlString).toContain('Text with');
|
||
expect(xmlString).toContain('spaces');
|
||
expect(xmlString).toContain('<element>Indented element</element>');
|
||
expect(xmlString).toContain('More text with');
|
||
expect(xmlString).toContain('words');
|
||
|
||
// xml:space="preserve" should maintain whitespace
|
||
if (xmlString.includes('xml:space="preserve"')) {
|
||
expect(xmlString).toMatch(/Leading spaces|^\s+Leading/m);
|
||
}
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('whitespace-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Deeply nested mixed content', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-NESTED-001</ID>
|
||
<Note>
|
||
Level 1: Invoice for <customer>
|
||
<name>ABC Corp</name> (Customer ID: <id>C-12345</id>)
|
||
<address>
|
||
Located at <street>123 Main St</street>,
|
||
<city>New York</city>, <state>NY</state> <zip>10001</zip>
|
||
</address>
|
||
</customer> dated <date>2025-01-25</date>.
|
||
</Note>
|
||
<PaymentTerms>
|
||
<Note>
|
||
<terms>
|
||
Standard terms: <standard>
|
||
Net <days>30</days> days from <reference>
|
||
invoice date (<date>2025-01-25</date>)
|
||
</reference>
|
||
</standard>
|
||
<special>
|
||
Special conditions: <condition num="1">
|
||
For orders > <amount currency="EUR">€1000</amount>:
|
||
<discount>5%</discount> discount
|
||
</condition>
|
||
</special>
|
||
</terms>
|
||
</Note>
|
||
</PaymentTerms>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify deeply nested structure is preserved
|
||
expect(xmlString).toContain('Level 1: Invoice for');
|
||
expect(xmlString).toContain('<customer>');
|
||
expect(xmlString).toContain('<name>ABC Corp</name>');
|
||
expect(xmlString).toContain('(Customer ID:');
|
||
expect(xmlString).toContain('<id>C-12345</id>');
|
||
expect(xmlString).toContain('Located at');
|
||
expect(xmlString).toContain('<street>123 Main St</street>');
|
||
expect(xmlString).toContain('<city>New York</city>');
|
||
expect(xmlString).toContain('<state>NY</state>');
|
||
expect(xmlString).toContain('<zip>10001</zip>');
|
||
expect(xmlString).toContain('dated');
|
||
expect(xmlString).toContain('<date>2025-01-25</date>');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('nested-mixed', elapsed);
|
||
});
|
||
|
||
t.test('International mixed content', async () => {
|
||
const startTime = performance.now();
|
||
|
||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||
<UBLVersionID>2.1</UBLVersionID>
|
||
<ID>MIXED-INTL-001</ID>
|
||
<Note>
|
||
Invoice for <company lang="de">Müller GmbH</company> from <city>München</city>.
|
||
Total: <amount currency="EUR">€1.234,56</amount> (inkl. <tax>19% MwSt</tax>).
|
||
支付条款:<terms lang="zh">30天内付款</terms>。
|
||
お支払い: <terms lang="ja">30日以内</terms>。
|
||
</Note>
|
||
<PaymentTerms>
|
||
<Note>
|
||
<multilang>
|
||
<en>Payment due in <days>30</days> days</en>
|
||
<de>Zahlung fällig in <days>30</days> Tagen</de>
|
||
<fr>Paiement dû dans <days>30</days> jours</fr>
|
||
<es>Pago debido en <days>30</days> días</es>
|
||
</multilang>
|
||
</Note>
|
||
</PaymentTerms>
|
||
<InvoiceLine>
|
||
<Note>
|
||
Product: <name lang="multi">
|
||
<en>Book</en> / <de>Buch</de> / <fr>Livre</fr> /
|
||
<zh>书</zh> / <ja>本</ja> / <ar>كتاب</ar>
|
||
</name>
|
||
Price: <price>€25.00</price> per <unit>Stück</unit>
|
||
</Note>
|
||
</InvoiceLine>
|
||
</Invoice>`;
|
||
|
||
const einvoice = new EInvoice();
|
||
await einvoice.loadFromString(xmlContent);
|
||
|
||
const xmlString = einvoice.getXmlString();
|
||
|
||
// Verify international mixed content
|
||
expect(xmlString).toContain('Müller GmbH');
|
||
expect(xmlString).toContain('München');
|
||
expect(xmlString).toContain('€1.234,56');
|
||
expect(xmlString).toContain('19% MwSt');
|
||
expect(xmlString).toContain('支付条款:');
|
||
expect(xmlString).toContain('30天内付款');
|
||
expect(xmlString).toContain('お支払い:');
|
||
expect(xmlString).toContain('30日以内');
|
||
expect(xmlString).toContain('Zahlung fällig in');
|
||
expect(xmlString).toContain('Tagen');
|
||
expect(xmlString).toContain('Paiement dû dans');
|
||
expect(xmlString).toContain('书');
|
||
expect(xmlString).toContain('本');
|
||
expect(xmlString).toContain('كتاب');
|
||
expect(xmlString).toContain('Stück');
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('intl-mixed', elapsed);
|
||
});
|
||
|
||
t.test('Corpus mixed content analysis', async () => {
|
||
const startTime = performance.now();
|
||
let processedCount = 0;
|
||
let mixedContentCount = 0;
|
||
const mixedContentExamples: string[] = [];
|
||
|
||
const files = await corpusLoader.getAllFiles();
|
||
const xmlFiles = files.filter(f => f.endsWith('.xml'));
|
||
|
||
// Sample corpus for mixed content patterns
|
||
const sampleSize = Math.min(60, xmlFiles.length);
|
||
const sample = xmlFiles.slice(0, sampleSize);
|
||
|
||
for (const file of sample) {
|
||
try {
|
||
const content = await corpusLoader.readFile(file);
|
||
let xmlString: string;
|
||
|
||
if (Buffer.isBuffer(content)) {
|
||
xmlString = content.toString('utf8');
|
||
} else {
|
||
xmlString = content;
|
||
}
|
||
|
||
// Look for mixed content patterns
|
||
// Pattern: text followed by element followed by text within same parent
|
||
const mixedPattern = />([^<]+)<[^>]+>[^<]+<\/[^>]+>([^<]+)</;
|
||
if (mixedPattern.test(xmlString)) {
|
||
mixedContentCount++;
|
||
|
||
// Extract example
|
||
const match = xmlString.match(mixedPattern);
|
||
if (match && mixedContentExamples.length < 5) {
|
||
mixedContentExamples.push(`${file}: "${match[0].substring(0, 100)}..."`);
|
||
}
|
||
}
|
||
|
||
// Also check for CDATA sections
|
||
if (xmlString.includes('<![CDATA[')) {
|
||
if (!mixedContentExamples.some(ex => ex.includes('CDATA'))) {
|
||
mixedContentExamples.push(`${file}: Contains CDATA sections`);
|
||
}
|
||
}
|
||
|
||
processedCount++;
|
||
} catch (error) {
|
||
console.log(`Mixed content parsing issue in ${file}:`, error.message);
|
||
}
|
||
}
|
||
|
||
console.log(`Mixed content corpus analysis (${processedCount} files):`);
|
||
console.log(`- Files with mixed content patterns: ${mixedContentCount}`);
|
||
if (mixedContentExamples.length > 0) {
|
||
console.log('Mixed content examples:');
|
||
mixedContentExamples.forEach(ex => console.log(` ${ex}`));
|
||
}
|
||
|
||
expect(processedCount).toBeGreaterThan(0);
|
||
|
||
const elapsed = performance.now() - startTime;
|
||
performanceTracker.addMeasurement('corpus-mixed', elapsed);
|
||
});
|
||
|
||
// Print performance summary
|
||
performanceTracker.printSummary();
|
||
|
||
// Performance assertions
|
||
const avgTime = performanceTracker.getAverageTime();
|
||
expect(avgTime).toBeLessThan(150); // Mixed content operations may be slightly slower
|
||
});
|
||
|
||
tap.start(); |