518 lines
16 KiB
TypeScript
518 lines
16 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
import * as einvoice from '../../../ts/index.js';
|
|
import * as plugins from '../../plugins.js';
|
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|
|
|
tap.test('PARSE-11: Processing Instructions - Handle XML processing instructions', async (t) => {
|
|
const performanceTracker = new PerformanceTracker('PARSE-11');
|
|
|
|
await t.test('Basic processing instructions', async () => {
|
|
performanceTracker.startOperation('basic-pi');
|
|
|
|
const piTests = [
|
|
{
|
|
name: 'XML declaration',
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
<invoice>
|
|
<id>TEST-001</id>
|
|
</invoice>`,
|
|
target: 'xml',
|
|
data: 'version="1.0" encoding="UTF-8"',
|
|
description: 'Standard XML declaration'
|
|
},
|
|
{
|
|
name: 'Stylesheet processing instruction',
|
|
xml: `<?xml version="1.0"?>
|
|
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
|
|
<invoice>
|
|
<id>TEST-002</id>
|
|
</invoice>`,
|
|
target: 'xml-stylesheet',
|
|
data: 'type="text/xsl" href="invoice.xsl"',
|
|
description: 'XSLT stylesheet reference'
|
|
},
|
|
{
|
|
name: 'Multiple processing instructions',
|
|
xml: `<?xml version="1.0"?>
|
|
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
|
|
<?xml-model href="invoice.rnc" type="application/relax-ng-compact-syntax"?>
|
|
<?custom-pi data="value"?>
|
|
<invoice>
|
|
<id>TEST-003</id>
|
|
</invoice>`,
|
|
description: 'Multiple PIs before root element'
|
|
},
|
|
{
|
|
name: 'PI within document',
|
|
xml: `<?xml version="1.0"?>
|
|
<invoice>
|
|
<header>
|
|
<?page-break?>
|
|
<id>TEST-004</id>
|
|
</header>
|
|
<?custom-instruction param="value"?>
|
|
<body>
|
|
<amount>100.00</amount>
|
|
</body>
|
|
</invoice>`,
|
|
description: 'PIs inside document structure'
|
|
},
|
|
{
|
|
name: 'PI with no data',
|
|
xml: `<?xml version="1.0"?>
|
|
<invoice>
|
|
<?break?>
|
|
<id>TEST-005</id>
|
|
<?end?>
|
|
</invoice>`,
|
|
description: 'Processing instructions without parameters'
|
|
}
|
|
];
|
|
|
|
for (const test of piTests) {
|
|
const startTime = performance.now();
|
|
|
|
console.log(`${test.name}:`);
|
|
if (test.target) {
|
|
console.log(` Target: ${test.target}`);
|
|
}
|
|
if (test.data) {
|
|
console.log(` Data: ${test.data}`);
|
|
}
|
|
console.log(` Description: ${test.description}`);
|
|
|
|
try {
|
|
const invoice = new einvoice.EInvoice();
|
|
if (invoice.fromXmlString) {
|
|
await invoice.fromXmlString(test.xml);
|
|
console.log(' ✓ Parsed with processing instructions');
|
|
} else {
|
|
console.log(' ⚠️ Cannot test without fromXmlString');
|
|
}
|
|
} catch (error) {
|
|
console.log(` ✗ Error: ${error.message}`);
|
|
}
|
|
|
|
performanceTracker.recordMetric('pi-parsing', performance.now() - startTime);
|
|
}
|
|
|
|
performanceTracker.endOperation('basic-pi');
|
|
});
|
|
|
|
await t.test('Processing instruction syntax rules', async () => {
|
|
performanceTracker.startOperation('pi-syntax');
|
|
|
|
const syntaxTests = [
|
|
{
|
|
name: 'Valid PI names',
|
|
valid: [
|
|
'<?valid-name data?>',
|
|
'<?name123 data?>',
|
|
'<?my-processor data?>',
|
|
'<?_underscore data?>'
|
|
],
|
|
invalid: [
|
|
'<?123name data?>', // Cannot start with number
|
|
'<?my name data?>', // No spaces in target
|
|
'<?xml data?>', // 'xml' is reserved
|
|
'<? data?>' // Must have target name
|
|
]
|
|
},
|
|
{
|
|
name: 'Reserved target names',
|
|
tests: [
|
|
{ pi: '<?xml version="1.0"?>', valid: true, note: 'XML declaration allowed' },
|
|
{ pi: '<?XML data?>', valid: false, note: 'Case variations of xml reserved' },
|
|
{ pi: '<?XmL data?>', valid: false, note: 'Any case of xml reserved' }
|
|
]
|
|
},
|
|
{
|
|
name: 'PI data requirements',
|
|
tests: [
|
|
{ pi: '<?target?>', valid: true, note: 'Empty data is valid' },
|
|
{ pi: '<?target ?>', valid: true, note: 'Whitespace only is valid' },
|
|
{ pi: '<?target cannot contain ??>', valid: false, note: 'Cannot contain ?>' },
|
|
{ pi: '<?target data with ? and > separately?>', valid: true, note: 'Can contain ? and > separately' }
|
|
]
|
|
}
|
|
];
|
|
|
|
for (const test of syntaxTests) {
|
|
console.log(`\n${test.name}:`);
|
|
|
|
if (test.valid && test.invalid) {
|
|
console.log(' Valid examples:');
|
|
for (const valid of test.valid) {
|
|
console.log(` ✓ ${valid}`);
|
|
}
|
|
console.log(' Invalid examples:');
|
|
for (const invalid of test.invalid) {
|
|
console.log(` ✗ ${invalid}`);
|
|
}
|
|
}
|
|
|
|
if (test.tests) {
|
|
for (const syntaxTest of test.tests) {
|
|
console.log(` ${syntaxTest.pi}`);
|
|
console.log(` ${syntaxTest.valid ? '✓' : '✗'} ${syntaxTest.note}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
performanceTracker.endOperation('pi-syntax');
|
|
});
|
|
|
|
await t.test('Common processing instructions in e-invoices', async () => {
|
|
performanceTracker.startOperation('einvoice-pi');
|
|
|
|
const einvoicePIs = [
|
|
{
|
|
name: 'XSLT transformation',
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
<?xml-stylesheet type="text/xsl" href="https://example.com/invoice-transform.xsl"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<ID>UBL-001</ID>
|
|
</Invoice>`,
|
|
purpose: 'Browser-based invoice rendering',
|
|
common: true
|
|
},
|
|
{
|
|
name: 'Schema validation hint',
|
|
xml: `<?xml version="1.0"?>
|
|
<?xml-model href="http://docs.oasis-open.org/ubl/os-UBL-2.1/xsd/maindoc/UBL-Invoice-2.1.xsd"
|
|
schematypens="http://www.w3.org/2001/XMLSchema"?>
|
|
<Invoice>
|
|
<ID>TEST-001</ID>
|
|
</Invoice>`,
|
|
purpose: 'Schema location for validation',
|
|
common: false
|
|
},
|
|
{
|
|
name: 'PDF generation instructions',
|
|
xml: `<?xml version="1.0"?>
|
|
<?pdf-generator version="2.0" profile="ZUGFeRD"?>
|
|
<?pdf-attachment filename="invoice.xml" relationship="Data"?>
|
|
<Invoice>
|
|
<ID>PDF-001</ID>
|
|
</Invoice>`,
|
|
purpose: 'PDF/A-3 generation hints',
|
|
common: false
|
|
},
|
|
{
|
|
name: 'Digital signature instructions',
|
|
xml: `<?xml version="1.0"?>
|
|
<?signature-method algorithm="RSA-SHA256"?>
|
|
<?signature-transform algorithm="http://www.w3.org/2001/10/xml-exc-c14n#"?>
|
|
<Invoice>
|
|
<ID>SIGNED-001</ID>
|
|
</Invoice>`,
|
|
purpose: 'Signing process configuration',
|
|
common: false
|
|
},
|
|
{
|
|
name: 'Format-specific processing',
|
|
xml: `<?xml version="1.0"?>
|
|
<?facturx-version 1.0?>
|
|
<?zugferd-profile EXTENDED?>
|
|
<rsm:CrossIndustryInvoice>
|
|
<rsm:ExchangedDocument>
|
|
<ram:ID>CII-001</ram:ID>
|
|
</rsm:ExchangedDocument>
|
|
</rsm:CrossIndustryInvoice>`,
|
|
purpose: 'Format-specific metadata',
|
|
common: false
|
|
}
|
|
];
|
|
|
|
for (const pi of einvoicePIs) {
|
|
console.log(`\n${pi.name}:`);
|
|
console.log(` Purpose: ${pi.purpose}`);
|
|
console.log(` Common in e-invoices: ${pi.common ? 'Yes' : 'No'}`);
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
// Extract PIs from XML
|
|
const piMatches = pi.xml.matchAll(/<\?([^?\s]+)([^?]*)\?>/g);
|
|
const pis = Array.from(piMatches);
|
|
|
|
console.log(` Found ${pis.length} processing instructions:`);
|
|
for (const [full, target, data] of pis) {
|
|
if (target !== 'xml') {
|
|
console.log(` <?${target}${data}?>`);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.log(` Error analyzing PIs: ${error.message}`);
|
|
}
|
|
|
|
performanceTracker.recordMetric('einvoice-pi', performance.now() - startTime);
|
|
}
|
|
|
|
performanceTracker.endOperation('einvoice-pi');
|
|
});
|
|
|
|
await t.test('Processing instruction handling strategies', async () => {
|
|
performanceTracker.startOperation('pi-handling');
|
|
|
|
class PIHandler {
|
|
private handlers = new Map<string, (data: string) => void>();
|
|
|
|
register(target: string, handler: (data: string) => void): void {
|
|
this.handlers.set(target, handler);
|
|
}
|
|
|
|
process(xml: string): void {
|
|
const piRegex = /<\?([^?\s]+)([^?]*)\?>/g;
|
|
let match;
|
|
|
|
while ((match = piRegex.exec(xml)) !== null) {
|
|
const [full, target, data] = match;
|
|
|
|
if (target === 'xml') continue; // Skip XML declaration
|
|
|
|
const handler = this.handlers.get(target);
|
|
if (handler) {
|
|
console.log(` Processing <?${target}...?>`);
|
|
handler(data.trim());
|
|
} else {
|
|
console.log(` Ignoring unhandled PI: <?${target}...?>`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const handler = new PIHandler();
|
|
|
|
// Register handlers for common PIs
|
|
handler.register('xml-stylesheet', (data) => {
|
|
const hrefMatch = data.match(/href="([^"]+)"/);
|
|
if (hrefMatch) {
|
|
console.log(` Stylesheet URL: ${hrefMatch[1]}`);
|
|
}
|
|
});
|
|
|
|
handler.register('pdf-generator', (data) => {
|
|
const versionMatch = data.match(/version="([^"]+)"/);
|
|
if (versionMatch) {
|
|
console.log(` PDF generator version: ${versionMatch[1]}`);
|
|
}
|
|
});
|
|
|
|
handler.register('page-break', (data) => {
|
|
console.log(' Page break instruction found');
|
|
});
|
|
|
|
// Test document
|
|
const testXml = `<?xml version="1.0"?>
|
|
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
|
|
<?pdf-generator version="2.0" profile="ZUGFeRD"?>
|
|
<invoice>
|
|
<?page-break?>
|
|
<content>Test</content>
|
|
<?custom-pi unknown="true"?>
|
|
</invoice>`;
|
|
|
|
console.log('Processing instructions found:');
|
|
handler.process(testXml);
|
|
|
|
performanceTracker.endOperation('pi-handling');
|
|
});
|
|
|
|
await t.test('PI security considerations', async () => {
|
|
performanceTracker.startOperation('pi-security');
|
|
|
|
const securityTests = [
|
|
{
|
|
name: 'External resource reference',
|
|
pi: '<?xml-stylesheet href="http://malicious.com/steal-data.xsl"?>',
|
|
risk: 'SSRF, data exfiltration',
|
|
mitigation: 'Validate URLs, use allowlist'
|
|
},
|
|
{
|
|
name: 'Code execution hint',
|
|
pi: '<?execute-script language="javascript" code="alert(1)"?>',
|
|
risk: 'Arbitrary code execution',
|
|
mitigation: 'Never execute PI content as code'
|
|
},
|
|
{
|
|
name: 'File system access',
|
|
pi: '<?include-file path="/etc/passwd"?>',
|
|
risk: 'Local file disclosure',
|
|
mitigation: 'Ignore file system PIs'
|
|
},
|
|
{
|
|
name: 'Parser-specific instructions',
|
|
pi: '<?parser-config disable-security-checks="true"?>',
|
|
risk: 'Security bypass',
|
|
mitigation: 'Ignore parser configuration PIs'
|
|
}
|
|
];
|
|
|
|
console.log('Security considerations for processing instructions:');
|
|
|
|
for (const test of securityTests) {
|
|
console.log(`\n${test.name}:`);
|
|
console.log(` PI: ${test.pi}`);
|
|
console.log(` Risk: ${test.risk}`);
|
|
console.log(` Mitigation: ${test.mitigation}`);
|
|
}
|
|
|
|
console.log('\nBest practices:');
|
|
console.log(' 1. Whitelist allowed PI targets');
|
|
console.log(' 2. Validate all external references');
|
|
console.log(' 3. Never execute PI content as code');
|
|
console.log(' 4. Log suspicious PIs for monitoring');
|
|
console.log(' 5. Consider removing PIs in production');
|
|
|
|
performanceTracker.endOperation('pi-security');
|
|
});
|
|
|
|
await t.test('Corpus PI analysis', async () => {
|
|
performanceTracker.startOperation('corpus-pi');
|
|
|
|
const corpusLoader = new CorpusLoader();
|
|
const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/);
|
|
|
|
console.log(`\nAnalyzing processing instructions in ${xmlFiles.length} corpus files...`);
|
|
|
|
const piStats = {
|
|
total: 0,
|
|
filesWithPIs: 0,
|
|
piByTarget: new Map<string, number>(),
|
|
totalPIs: 0,
|
|
stylesheetRefs: 0,
|
|
otherExternalRefs: 0
|
|
};
|
|
|
|
const sampleSize = Math.min(100, xmlFiles.length);
|
|
const sampledFiles = xmlFiles.slice(0, sampleSize);
|
|
|
|
for (const file of sampledFiles) {
|
|
piStats.total++;
|
|
|
|
try {
|
|
const content = await plugins.fs.readFile(file.path, 'utf8');
|
|
|
|
// Find all PIs except XML declaration
|
|
const piMatches = content.matchAll(/<\?([^?\s]+)([^?]*)\?>/g);
|
|
const pis = Array.from(piMatches).filter(m => m[1] !== 'xml');
|
|
|
|
if (pis.length > 0) {
|
|
piStats.filesWithPIs++;
|
|
piStats.totalPIs += pis.length;
|
|
|
|
for (const [full, target, data] of pis) {
|
|
piStats.piByTarget.set(
|
|
target,
|
|
(piStats.piByTarget.get(target) || 0) + 1
|
|
);
|
|
|
|
// Check for external references
|
|
if (target === 'xml-stylesheet') {
|
|
piStats.stylesheetRefs++;
|
|
} else if (data.includes('href=') || data.includes('src=')) {
|
|
piStats.otherExternalRefs++;
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Skip files that can't be read
|
|
}
|
|
}
|
|
|
|
console.log('\nProcessing Instruction Statistics:');
|
|
console.log(`Files analyzed: ${piStats.total}`);
|
|
console.log(`Files with PIs: ${piStats.filesWithPIs} (${(piStats.filesWithPIs/piStats.total*100).toFixed(1)}%)`);
|
|
console.log(`Total PIs found: ${piStats.totalPIs}`);
|
|
console.log(`Stylesheet references: ${piStats.stylesheetRefs}`);
|
|
console.log(`Other external references: ${piStats.otherExternalRefs}`);
|
|
|
|
if (piStats.piByTarget.size > 0) {
|
|
console.log('\nPI targets found:');
|
|
const sortedTargets = Array.from(piStats.piByTarget.entries())
|
|
.sort((a, b) => b[1] - a[1])
|
|
.slice(0, 10);
|
|
|
|
for (const [target, count] of sortedTargets) {
|
|
console.log(` <?${target}...?>: ${count} occurrences`);
|
|
}
|
|
}
|
|
|
|
performanceTracker.endOperation('corpus-pi');
|
|
});
|
|
|
|
await t.test('PI performance impact', async () => {
|
|
performanceTracker.startOperation('pi-performance');
|
|
|
|
// Generate documents with varying PI counts
|
|
const generateXmlWithPIs = (piCount: number): string => {
|
|
let xml = '<?xml version="1.0"?>\n';
|
|
|
|
// Add various PIs
|
|
for (let i = 0; i < piCount; i++) {
|
|
xml += `<?pi-${i} data="value${i}" param="test"?>\n`;
|
|
}
|
|
|
|
xml += '<invoice>\n';
|
|
|
|
// Add some PIs within document
|
|
for (let i = 0; i < piCount / 2; i++) {
|
|
xml += ` <?internal-pi-${i}?>\n`;
|
|
xml += ` <field${i}>Value ${i}</field${i}>\n`;
|
|
}
|
|
|
|
xml += '</invoice>';
|
|
return xml;
|
|
};
|
|
|
|
console.log('Performance impact of processing instructions:');
|
|
|
|
const testCounts = [0, 10, 50, 100];
|
|
|
|
for (const count of testCounts) {
|
|
const xml = generateXmlWithPIs(count);
|
|
const xmlSize = Buffer.byteLength(xml, 'utf8');
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
const invoice = new einvoice.EInvoice();
|
|
if (invoice.fromXmlString) {
|
|
await invoice.fromXmlString(xml);
|
|
}
|
|
|
|
const parseTime = performance.now() - startTime;
|
|
|
|
console.log(` ${count} PIs (${(xmlSize/1024).toFixed(1)}KB): ${parseTime.toFixed(2)}ms`);
|
|
|
|
if (count > 0) {
|
|
console.log(` Time per PI: ${(parseTime/count).toFixed(3)}ms`);
|
|
}
|
|
|
|
performanceTracker.recordMetric(`pi-count-${count}`, parseTime);
|
|
} catch (error) {
|
|
console.log(` Error with ${count} PIs: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
performanceTracker.endOperation('pi-performance');
|
|
});
|
|
|
|
// Performance summary
|
|
console.log('\n' + performanceTracker.getSummary());
|
|
|
|
// PI best practices
|
|
console.log('\nProcessing Instruction Best Practices:');
|
|
console.log('1. Preserve PIs during document processing');
|
|
console.log('2. Validate external references for security');
|
|
console.log('3. Support common PIs (xml-stylesheet)');
|
|
console.log('4. Allow custom PI handlers for extensibility');
|
|
console.log('5. Ignore unknown PIs gracefully');
|
|
console.log('6. Never execute PI content as code');
|
|
console.log('7. Consider PI impact on performance');
|
|
console.log('8. Document which PIs are supported');
|
|
});
|
|
|
|
tap.start(); |