fix(compliance): improve compliance
This commit is contained in:
@ -1,516 +1,306 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as einvoice from '../../../ts/index.js';
|
||||
import * as plugins from '../../plugins.js';
|
||||
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
||||
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
||||
|
||||
tap.test('PARSE-10: CDATA Section Handling - Process CDATA sections correctly', async (t) => {
|
||||
const performanceTracker = new PerformanceTracker('PARSE-10');
|
||||
tap.test('PARSE-10: CDATA Section Handling in e-invoices', async () => {
|
||||
console.log('Testing CDATA section handling in e-invoices...\n');
|
||||
|
||||
await t.test('Basic CDATA sections', async () => {
|
||||
performanceTracker.startOperation('basic-cdata');
|
||||
|
||||
const cdataTests = [
|
||||
{
|
||||
name: 'Simple CDATA content',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<notes><![CDATA[This is plain text content]]></notes>
|
||||
</invoice>`,
|
||||
expectedContent: 'This is plain text content',
|
||||
description: 'Basic CDATA section'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with special characters',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<description><![CDATA[Price < 100 & quantity > 5]]></description>
|
||||
</invoice>`,
|
||||
expectedContent: 'Price < 100 & quantity > 5',
|
||||
description: 'Special characters preserved'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with XML-like content',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<htmlContent><![CDATA[<p>This is <b>HTML</b> content</p>]]></htmlContent>
|
||||
</invoice>`,
|
||||
expectedContent: '<p>This is <b>HTML</b> content</p>',
|
||||
description: 'XML markup as text'
|
||||
},
|
||||
{
|
||||
name: 'Empty CDATA section',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<empty><![CDATA[]]></empty>
|
||||
</invoice>`,
|
||||
expectedContent: '',
|
||||
description: 'Empty CDATA is valid'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with line breaks',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<address><![CDATA[Line 1
|
||||
Line 2
|
||||
Line 3]]></address>
|
||||
</invoice>`,
|
||||
expectedContent: 'Line 1\nLine 2\nLine 3',
|
||||
description: 'Preserves formatting'
|
||||
}
|
||||
];
|
||||
|
||||
for (const test of cdataTests) {
|
||||
const startTime = performance.now();
|
||||
|
||||
console.log(`${test.name}:`);
|
||||
console.log(` Description: ${test.description}`);
|
||||
console.log(` Expected content: "${test.expectedContent}"`);
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
if (invoice.fromXmlString) {
|
||||
await invoice.fromXmlString(test.xml);
|
||||
console.log(' ✓ CDATA parsed successfully');
|
||||
} else {
|
||||
console.log(' ⚠️ Cannot test without fromXmlString');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
}
|
||||
|
||||
performanceTracker.recordMetric('cdata-parsing', performance.now() - startTime);
|
||||
// Test basic CDATA sections in invoice fields
|
||||
const cdataTests = [
|
||||
{
|
||||
name: 'Simple CDATA content in notes',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>CDATA-001</cbc:ID>
|
||||
<cbc:Note><![CDATA[This is plain text content with special chars: < > & " ']]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: "This is plain text content with special chars: < > & \" '",
|
||||
description: 'Basic CDATA section preserves special characters'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with XML-like content',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>CDATA-002</cbc:ID>
|
||||
<cbc:Note><![CDATA[<html><body>Invoice contains <b>HTML</b> markup</body></html>]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: '<html><body>Invoice contains <b>HTML</b> markup</body></html>',
|
||||
description: 'XML/HTML markup preserved as text in CDATA'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with line breaks and formatting',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
|
||||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
|
||||
<cbc:ID>CDATA-003</cbc:ID>
|
||||
<cac:AccountingSupplierParty>
|
||||
<cac:Party>
|
||||
<cac:PartyName>
|
||||
<cbc:Name><![CDATA[Company & Co.
|
||||
Special Division
|
||||
"International Sales"]]></cbc:Name>
|
||||
</cac:PartyName>
|
||||
</cac:Party>
|
||||
</cac:AccountingSupplierParty>
|
||||
</ubl:Invoice>`,
|
||||
expectedSupplierName: 'Company & Co.\nSpecial Division\n"International Sales"',
|
||||
description: 'CDATA preserves line breaks and special chars in company names'
|
||||
},
|
||||
{
|
||||
name: 'Empty CDATA section',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>CDATA-004</cbc:ID>
|
||||
<cbc:Note><![CDATA[]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: '',
|
||||
description: 'Empty CDATA section is valid'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with code snippets',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>CDATA-005</cbc:ID>
|
||||
<cbc:Note><![CDATA[if (price < 100 && quantity > 5) { discount = 0.1; }]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: 'if (price < 100 && quantity > 5) { discount = 0.1; }',
|
||||
description: 'Code snippets with operators preserved'
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('basic-cdata');
|
||||
});
|
||||
];
|
||||
|
||||
await t.test('CDATA edge cases', async () => {
|
||||
performanceTracker.startOperation('cdata-edge-cases');
|
||||
for (const test of cdataTests) {
|
||||
console.log(`\n${test.name}:`);
|
||||
console.log(` Description: ${test.description}`);
|
||||
|
||||
const edgeCases = [
|
||||
{
|
||||
name: 'Nested CDATA-like content',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<code><![CDATA[if (text.includes("<![CDATA[")) { /* handle nested */ }]]></code>
|
||||
</invoice>`,
|
||||
note: 'CDATA end sequence in content needs escaping',
|
||||
challenge: 'Cannot nest CDATA sections'
|
||||
},
|
||||
{
|
||||
name: 'CDATA end sequence in content',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<script><![CDATA[
|
||||
// This would end CDATA: ]]>
|
||||
// Must be split: ]]]]><![CDATA[>
|
||||
]]></script>
|
||||
</invoice>`,
|
||||
note: 'End sequence must be escaped',
|
||||
challenge: 'Split ]]> into ]] and >'
|
||||
},
|
||||
{
|
||||
name: 'Multiple CDATA sections',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<content>
|
||||
<![CDATA[Part 1]]>
|
||||
Normal text
|
||||
<![CDATA[Part 2]]>
|
||||
</content>
|
||||
</invoice>`,
|
||||
note: 'Multiple CDATA in same element',
|
||||
challenge: 'Proper content concatenation'
|
||||
},
|
||||
{
|
||||
name: 'CDATA in attributes (invalid)',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<item description="<![CDATA[Not allowed]]>">Content</item>
|
||||
</invoice>`,
|
||||
note: 'CDATA not allowed in attributes',
|
||||
challenge: 'Should cause parse error'
|
||||
},
|
||||
{
|
||||
name: 'Whitespace around CDATA',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<invoice>
|
||||
<padded> <![CDATA[Content]]> </padded>
|
||||
</invoice>`,
|
||||
note: 'Whitespace outside CDATA preserved',
|
||||
challenge: 'Handle mixed content correctly'
|
||||
}
|
||||
];
|
||||
|
||||
for (const test of edgeCases) {
|
||||
const startTime = performance.now();
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
await invoice.fromXmlString(test.xml);
|
||||
|
||||
console.log(`\n${test.name}:`);
|
||||
console.log(` Note: ${test.note}`);
|
||||
console.log(` Challenge: ${test.challenge}`);
|
||||
console.log(' ✓ CDATA parsed successfully');
|
||||
console.log(` Invoice ID: ${invoice.id}`);
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
if (invoice.fromXmlString) {
|
||||
await invoice.fromXmlString(test.xml);
|
||||
console.log(' Result: Parsed successfully');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(` Result: ${error.message}`);
|
||||
if (test.expectedNote !== undefined) {
|
||||
const actualNote = invoice.notes?.[0] || '';
|
||||
console.log(` Expected note: "${test.expectedNote}"`);
|
||||
console.log(` Actual note: "${actualNote}"`);
|
||||
expect(actualNote).toEqual(test.expectedNote);
|
||||
}
|
||||
|
||||
performanceTracker.recordMetric('edge-case', performance.now() - startTime);
|
||||
if (test.expectedSupplierName !== undefined) {
|
||||
const actualName = invoice.from?.name || '';
|
||||
console.log(` Expected supplier: "${test.expectedSupplierName}"`);
|
||||
console.log(` Actual supplier: "${actualName}"`);
|
||||
expect(actualName).toEqual(test.expectedSupplierName);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('cdata-edge-cases');
|
||||
});
|
||||
|
||||
await t.test('CDATA vs escaped content comparison', async () => {
|
||||
performanceTracker.startOperation('cdata-vs-escaped');
|
||||
|
||||
const comparisonTests = [
|
||||
{
|
||||
name: 'Special characters',
|
||||
cdata: '<note><![CDATA[Price < 100 & quantity > 5]]></note>',
|
||||
escaped: '<note>Price < 100 & quantity > 5</note>',
|
||||
content: 'Price < 100 & quantity > 5'
|
||||
},
|
||||
{
|
||||
name: 'HTML snippet',
|
||||
cdata: '<html><![CDATA[<div class="invoice">Content</div>]]></html>',
|
||||
escaped: '<html><div class="invoice">Content</div></html>',
|
||||
content: '<div class="invoice">Content</div>'
|
||||
},
|
||||
{
|
||||
name: 'Code snippet',
|
||||
cdata: '<code><![CDATA[if (a && b) { return "result"; }]]></code>',
|
||||
escaped: '<code>if (a && b) { return "result"; }</code>',
|
||||
content: 'if (a && b) { return "result"; }'
|
||||
},
|
||||
{
|
||||
name: 'Quote marks',
|
||||
cdata: '<quote><![CDATA[He said "Hello" and she said \'Hi\']]></quote>',
|
||||
escaped: '<quote>He said "Hello" and she said 'Hi'</quote>',
|
||||
content: 'He said "Hello" and she said \'Hi\''
|
||||
}
|
||||
];
|
||||
|
||||
console.log('CDATA vs Escaped Content:');
|
||||
|
||||
for (const test of comparisonTests) {
|
||||
console.log(`\n${test.name}:`);
|
||||
console.log(` Expected content: "${test.content}"`);
|
||||
console.log(` CDATA approach: More readable, preserves content as-is`);
|
||||
console.log(` Escaped approach: Standard XML, but less readable`);
|
||||
|
||||
// Compare sizes
|
||||
const cdataSize = Buffer.byteLength(test.cdata, 'utf8');
|
||||
const escapedSize = Buffer.byteLength(test.escaped, 'utf8');
|
||||
|
||||
console.log(` Size comparison: CDATA=${cdataSize}B, Escaped=${escapedSize}B`);
|
||||
if (cdataSize < escapedSize) {
|
||||
console.log(` CDATA is ${escapedSize - cdataSize} bytes smaller`);
|
||||
} else {
|
||||
console.log(` Escaped is ${cdataSize - escapedSize} bytes smaller`);
|
||||
}
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('cdata-vs-escaped');
|
||||
});
|
||||
|
||||
await t.test('CDATA in e-invoice contexts', async () => {
|
||||
performanceTracker.startOperation('einvoice-cdata');
|
||||
|
||||
const einvoiceUseCases = [
|
||||
{
|
||||
name: 'Terms and conditions',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice>
|
||||
<PaymentTerms>
|
||||
<Note><![CDATA[
|
||||
Payment Terms & Conditions:
|
||||
1. Payment due within 30 days
|
||||
2. Late payment fee: 2% per month
|
||||
3. Disputes must be raised within 7 days
|
||||
|
||||
For more info visit: https://example.com/terms
|
||||
]]></Note>
|
||||
</PaymentTerms>
|
||||
</Invoice>`,
|
||||
useCase: 'Legal text with special characters'
|
||||
},
|
||||
{
|
||||
name: 'Product description with HTML',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice>
|
||||
<InvoiceLine>
|
||||
<Item>
|
||||
<Description><![CDATA[
|
||||
<h3>Premium Widget</h3>
|
||||
<ul>
|
||||
<li>Dimension: 10cm x 5cm x 3cm</li>
|
||||
<li>Weight: < 500g</li>
|
||||
<li>Price: €99.99</li>
|
||||
</ul>
|
||||
]]></Description>
|
||||
</Item>
|
||||
</InvoiceLine>
|
||||
</Invoice>`,
|
||||
useCase: 'Rich text product descriptions'
|
||||
},
|
||||
{
|
||||
name: 'Base64 encoded attachment',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice>
|
||||
<AdditionalDocumentReference>
|
||||
<Attachment>
|
||||
<EmbeddedDocumentBinaryObject mimeCode="application/pdf">
|
||||
<![CDATA[JVBERi0xLjQKJeLjz9MKCjEgMCBvYmoKPDwKL1R5cGUgL0NhdGFsb2cKL1BhZ2VzIDIgMCBSCj4+CmVuZG9iag==]]>
|
||||
</EmbeddedDocumentBinaryObject>
|
||||
</Attachment>
|
||||
</AdditionalDocumentReference>
|
||||
</Invoice>`,
|
||||
useCase: 'Binary data encoding'
|
||||
},
|
||||
{
|
||||
name: 'Custom XML extensions',
|
||||
xml: `<?xml version="1.0"?>
|
||||
<Invoice>
|
||||
<UBLExtensions>
|
||||
<UBLExtension>
|
||||
<ExtensionContent><![CDATA[
|
||||
<CustomData xmlns="http://example.com/custom">
|
||||
<Field1>Value with < and > chars</Field1>
|
||||
<Field2>Complex & data</Field2>
|
||||
</CustomData>
|
||||
]]></ExtensionContent>
|
||||
</UBLExtension>
|
||||
</UBLExtensions>
|
||||
</Invoice>`,
|
||||
useCase: 'Embedded XML without namespace conflicts'
|
||||
}
|
||||
];
|
||||
|
||||
for (const useCase of einvoiceUseCases) {
|
||||
console.log(`\n${useCase.name}:`);
|
||||
console.log(` Use case: ${useCase.useCase}`);
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
if (invoice.fromXmlString) {
|
||||
await invoice.fromXmlString(useCase.xml);
|
||||
console.log(' ✓ Valid e-invoice usage of CDATA');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(` ⚠️ Parse result: ${error.message}`);
|
||||
}
|
||||
|
||||
performanceTracker.recordMetric('einvoice-usecase', performance.now() - startTime);
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('einvoice-cdata');
|
||||
});
|
||||
|
||||
await t.test('CDATA performance impact', async () => {
|
||||
performanceTracker.startOperation('cdata-performance');
|
||||
|
||||
// Generate test documents with varying CDATA usage
|
||||
const generateInvoiceWithCDATA = (cdataCount: number, cdataSize: number): string => {
|
||||
let xml = '<?xml version="1.0"?>\n<invoice>\n';
|
||||
|
||||
for (let i = 0; i < cdataCount; i++) {
|
||||
const content = 'X'.repeat(cdataSize);
|
||||
xml += ` <field${i}><![CDATA[${content}]]></field${i}>\n`;
|
||||
}
|
||||
|
||||
xml += '</invoice>';
|
||||
return xml;
|
||||
};
|
||||
|
||||
const generateInvoiceEscaped = (fieldCount: number, contentSize: number): string => {
|
||||
let xml = '<?xml version="1.0"?>\n<invoice>\n';
|
||||
|
||||
for (let i = 0; i < fieldCount; i++) {
|
||||
// Content with characters that need escaping
|
||||
const content = 'X&<>X'.repeat(contentSize / 5);
|
||||
const escaped = content.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
xml += ` <field${i}>${escaped}</field${i}>\n`;
|
||||
}
|
||||
|
||||
xml += '</invoice>';
|
||||
return xml;
|
||||
};
|
||||
|
||||
console.log('Performance comparison:');
|
||||
|
||||
const testConfigs = [
|
||||
{ fields: 10, contentSize: 100 },
|
||||
{ fields: 50, contentSize: 500 },
|
||||
{ fields: 100, contentSize: 1000 }
|
||||
];
|
||||
|
||||
for (const config of testConfigs) {
|
||||
console.log(`\n${config.fields} fields, ${config.contentSize} chars each:`);
|
||||
|
||||
// Test CDATA version
|
||||
const cdataXml = generateInvoiceWithCDATA(config.fields, config.contentSize);
|
||||
const cdataSize = Buffer.byteLength(cdataXml, 'utf8');
|
||||
|
||||
const cdataStart = performance.now();
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
if (invoice.fromXmlString) {
|
||||
await invoice.fromXmlString(cdataXml);
|
||||
}
|
||||
} catch (e) {}
|
||||
const cdataTime = performance.now() - cdataStart;
|
||||
|
||||
// Test escaped version
|
||||
const escapedXml = generateInvoiceEscaped(config.fields, config.contentSize);
|
||||
const escapedSize = Buffer.byteLength(escapedXml, 'utf8');
|
||||
|
||||
const escapedStart = performance.now();
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
if (invoice.fromXmlString) {
|
||||
await invoice.fromXmlString(escapedXml);
|
||||
}
|
||||
} catch (e) {}
|
||||
const escapedTime = performance.now() - escapedStart;
|
||||
|
||||
console.log(` CDATA: ${cdataTime.toFixed(2)}ms (${(cdataSize/1024).toFixed(1)}KB)`);
|
||||
console.log(` Escaped: ${escapedTime.toFixed(2)}ms (${(escapedSize/1024).toFixed(1)}KB)`);
|
||||
console.log(` Difference: ${((escapedTime - cdataTime) / cdataTime * 100).toFixed(1)}%`);
|
||||
|
||||
performanceTracker.recordMetric(`perf-${config.fields}fields`, cdataTime);
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('cdata-performance');
|
||||
});
|
||||
|
||||
await t.test('Corpus CDATA usage analysis', async () => {
|
||||
performanceTracker.startOperation('corpus-cdata');
|
||||
|
||||
const corpusLoader = new CorpusLoader();
|
||||
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
|
||||
|
||||
console.log(`\nAnalyzing CDATA usage in ${xmlFiles.length} corpus files...`);
|
||||
|
||||
const cdataStats = {
|
||||
total: 0,
|
||||
filesWithCDATA: 0,
|
||||
totalCDATASections: 0,
|
||||
cdataByElement: new Map<string, number>(),
|
||||
largestCDATA: 0,
|
||||
commonPatterns: new Map<string, number>()
|
||||
};
|
||||
|
||||
const sampleSize = Math.min(100, xmlFiles.length);
|
||||
const sampledFiles = xmlFiles.slice(0, sampleSize);
|
||||
|
||||
for (const file of sampledFiles) {
|
||||
cdataStats.total++;
|
||||
|
||||
try {
|
||||
const content = await plugins.fs.readFile(file.path, 'utf8');
|
||||
|
||||
// Find all CDATA sections
|
||||
const cdataMatches = content.matchAll(/<!\[CDATA\[([\s\S]*?)\]\]>/g);
|
||||
const cdataSections = Array.from(cdataMatches);
|
||||
|
||||
if (cdataSections.length > 0) {
|
||||
cdataStats.filesWithCDATA++;
|
||||
cdataStats.totalCDATASections += cdataSections.length;
|
||||
|
||||
// Analyze each CDATA section
|
||||
for (const match of cdataSections) {
|
||||
const cdataContent = match[1];
|
||||
const cdataLength = cdataContent.length;
|
||||
|
||||
if (cdataLength > cdataStats.largestCDATA) {
|
||||
cdataStats.largestCDATA = cdataLength;
|
||||
}
|
||||
|
||||
// Try to find the parent element
|
||||
const beforeCDATA = content.substring(Math.max(0, match.index! - 100), match.index);
|
||||
const elementMatch = beforeCDATA.match(/<(\w+)[^>]*>\s*$/);
|
||||
if (elementMatch) {
|
||||
const element = elementMatch[1];
|
||||
cdataStats.cdataByElement.set(
|
||||
element,
|
||||
(cdataStats.cdataByElement.get(element) || 0) + 1
|
||||
);
|
||||
}
|
||||
|
||||
// Detect common patterns
|
||||
if (cdataContent.includes('<') && cdataContent.includes('>')) {
|
||||
cdataStats.commonPatterns.set(
|
||||
'XML/HTML content',
|
||||
(cdataStats.commonPatterns.get('XML/HTML content') || 0) + 1
|
||||
);
|
||||
}
|
||||
if (cdataContent.includes('&')) {
|
||||
cdataStats.commonPatterns.set(
|
||||
'Special characters',
|
||||
(cdataStats.commonPatterns.get('Special characters') || 0) + 1
|
||||
);
|
||||
}
|
||||
if (/^[A-Za-z0-9+/=\s]+$/.test(cdataContent.trim())) {
|
||||
cdataStats.commonPatterns.set(
|
||||
'Base64 data',
|
||||
(cdataStats.commonPatterns.get('Base64 data') || 0) + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip files that can't be read
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\nCDATA Usage Statistics:');
|
||||
console.log(`Files analyzed: ${cdataStats.total}`);
|
||||
console.log(`Files with CDATA: ${cdataStats.filesWithCDATA} (${(cdataStats.filesWithCDATA/cdataStats.total*100).toFixed(1)}%)`);
|
||||
console.log(`Total CDATA sections: ${cdataStats.totalCDATASections}`);
|
||||
console.log(`Largest CDATA section: ${cdataStats.largestCDATA} characters`);
|
||||
|
||||
if (cdataStats.cdataByElement.size > 0) {
|
||||
console.log('\nCDATA usage by element:');
|
||||
const sortedElements = Array.from(cdataStats.cdataByElement.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 5);
|
||||
|
||||
for (const [element, count] of sortedElements) {
|
||||
console.log(` <${element}>: ${count} occurrences`);
|
||||
}
|
||||
}
|
||||
|
||||
if (cdataStats.commonPatterns.size > 0) {
|
||||
console.log('\nCommon CDATA content patterns:');
|
||||
for (const [pattern, count] of cdataStats.commonPatterns.entries()) {
|
||||
console.log(` ${pattern}: ${count} occurrences`);
|
||||
}
|
||||
}
|
||||
|
||||
performanceTracker.endOperation('corpus-cdata');
|
||||
});
|
||||
|
||||
// Performance summary
|
||||
console.log('\n' + performanceTracker.getSummary());
|
||||
|
||||
// CDATA best practices
|
||||
console.log('\nCDATA Section Handling Best Practices:');
|
||||
console.log('1. Use CDATA for content with many special characters');
|
||||
console.log('2. Prefer CDATA for embedded HTML/XML snippets');
|
||||
console.log('3. Be aware that CDATA cannot be nested');
|
||||
console.log('4. Handle ]]> sequence in content by splitting sections');
|
||||
console.log('5. Remember CDATA is not allowed in attributes');
|
||||
console.log('6. Consider performance impact for large documents');
|
||||
console.log('7. Use for base64 data and complex text content');
|
||||
console.log('8. Preserve CDATA sections in round-trip operations');
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('PARSE-10: CDATA edge cases and security', async () => {
|
||||
console.log('\nTesting CDATA edge cases and security aspects...\n');
|
||||
|
||||
const edgeCases = [
|
||||
{
|
||||
name: 'CDATA-like content (not actual CDATA)',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>EDGE-001</cbc:ID>
|
||||
<cbc:Note>Text with <![CDATA[ fake CDATA ]]> markers</cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: 'Text with <![CDATA[ fake CDATA ]]> markers',
|
||||
description: 'Escaped CDATA markers are just text'
|
||||
},
|
||||
{
|
||||
name: 'Multiple CDATA sections',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>EDGE-002</cbc:ID>
|
||||
<cbc:Note><![CDATA[Part 1]]> and <![CDATA[Part 2]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: 'Part 1 and Part 2',
|
||||
description: 'Multiple CDATA sections in one element'
|
||||
},
|
||||
{
|
||||
name: 'CDATA with Unicode characters',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>EDGE-003</cbc:ID>
|
||||
<cbc:Note><![CDATA[Unicode: € £ ¥ © ® ™ 中文 العربية]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
expectedNote: 'Unicode: € £ ¥ © ® ™ 中文 العربية',
|
||||
description: 'Unicode characters in CDATA'
|
||||
}
|
||||
];
|
||||
|
||||
for (const test of edgeCases) {
|
||||
console.log(`${test.name}:`);
|
||||
console.log(` Description: ${test.description}`);
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
await invoice.fromXmlString(test.xml);
|
||||
|
||||
const actualNote = invoice.notes?.[0] || '';
|
||||
console.log(` Expected: "${test.expectedNote}"`);
|
||||
console.log(` Actual: "${actualNote}"`);
|
||||
|
||||
if (test.expectedNote) {
|
||||
expect(actualNote).toEqual(test.expectedNote);
|
||||
console.log(' ✓ CDATA edge case handled correctly');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ℹ Result: ${error.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('PARSE-10: CDATA in real invoice scenarios', async () => {
|
||||
console.log('\nTesting CDATA usage in real invoice scenarios...\n');
|
||||
|
||||
// Test CDATA in various invoice contexts
|
||||
const realScenarios = [
|
||||
{
|
||||
name: 'Legal disclaimer with special formatting',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>REAL-001</cbc:ID>
|
||||
<cbc:Note><![CDATA[
|
||||
TERMS & CONDITIONS:
|
||||
1. Payment due within 30 days
|
||||
2. Late payment charge: 1.5% per month
|
||||
3. All prices exclude VAT (currently 19%)
|
||||
|
||||
For questions contact: billing@company.com
|
||||
]]></cbc:Note>
|
||||
</ubl:Invoice>`,
|
||||
description: 'Legal terms with special characters and formatting'
|
||||
},
|
||||
{
|
||||
name: 'Product description with technical specs',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>REAL-002</cbc:ID>
|
||||
<cac:InvoiceLine>
|
||||
<cbc:ID>1</cbc:ID>
|
||||
<cac:Item>
|
||||
<cbc:Name>Technical Component</cbc:Name>
|
||||
<cbc:Description><![CDATA[
|
||||
Component specs:
|
||||
- Voltage: 12V DC
|
||||
- Current: < 2A
|
||||
- Temperature: -20°C to +85°C
|
||||
- Compliance: CE & RoHS
|
||||
- Dimensions: 50mm x 30mm x 15mm
|
||||
]]></cbc:Description>
|
||||
</cac:Item>
|
||||
</cac:InvoiceLine>
|
||||
</ubl:Invoice>`,
|
||||
description: 'Technical specifications with symbols'
|
||||
},
|
||||
{
|
||||
name: 'Address with special formatting',
|
||||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>REAL-003</cbc:ID>
|
||||
<cac:AccountingSupplierParty>
|
||||
<cac:Party>
|
||||
<cac:PartyName>
|
||||
<cbc:Name><![CDATA[Smith & Jones Ltd.]]></cbc:Name>
|
||||
</cac:PartyName>
|
||||
<cac:PostalAddress>
|
||||
<cbc:AdditionalStreetName><![CDATA[Building "A" - 3rd Floor]]></cbc:AdditionalStreetName>
|
||||
</cac:PostalAddress>
|
||||
</cac:Party>
|
||||
</cac:AccountingSupplierParty>
|
||||
</ubl:Invoice>`,
|
||||
description: 'Company name and address with special characters'
|
||||
}
|
||||
];
|
||||
|
||||
for (const scenario of realScenarios) {
|
||||
console.log(`${scenario.name}:`);
|
||||
console.log(` Use case: ${scenario.description}`);
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
await invoice.fromXmlString(scenario.xml);
|
||||
|
||||
console.log(` ✓ Invoice parsed: ID ${invoice.id}`);
|
||||
|
||||
if (invoice.notes?.length > 0) {
|
||||
console.log(` Notes found: ${invoice.notes.length}`);
|
||||
}
|
||||
|
||||
if (invoice.items?.length > 0) {
|
||||
console.log(` Line items: ${invoice.items.length}`);
|
||||
}
|
||||
|
||||
if (invoice.from?.name) {
|
||||
console.log(` Supplier: ${invoice.from.name}`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('PARSE-10: CDATA performance with large content', async () => {
|
||||
console.log('\nTesting CDATA performance with large content...\n');
|
||||
|
||||
// Generate invoices with varying CDATA content sizes
|
||||
const sizes = [
|
||||
{ name: 'Small', chars: 100 },
|
||||
{ name: 'Medium', chars: 1000 },
|
||||
{ name: 'Large', chars: 10000 }
|
||||
];
|
||||
|
||||
for (const size of sizes) {
|
||||
// Generate content with special characters that would need escaping
|
||||
const content = Array(size.chars / 10).fill('Text with <>&" chars ').join('');
|
||||
|
||||
const xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||||
<cbc:ID>PERF-${size.name}</cbc:ID>
|
||||
<cbc:Note><![CDATA[${content}]]></cbc:Note>
|
||||
</ubl:Invoice>`;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const invoice = new einvoice.EInvoice();
|
||||
await invoice.fromXmlString(xml);
|
||||
|
||||
const parseTime = Date.now() - startTime;
|
||||
|
||||
console.log(`${size.name} CDATA (${size.chars} chars):`);
|
||||
console.log(` Parse time: ${parseTime}ms`);
|
||||
console.log(` Note length: ${invoice.notes?.[0]?.length || 0} chars`);
|
||||
console.log(` ✓ Successfully parsed`);
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Run the tests
|
||||
tap.start();
|
Reference in New Issue
Block a user