import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as einvoice from '../../../ts/index.js'; import * as plugins from '../../plugins.js'; import { CorpusLoader } from '../../helpers/corpus.loader.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; tap.test('PARSE-11: Processing Instructions - Handle XML processing instructions', async (t) => { const performanceTracker = new PerformanceTracker('PARSE-11'); await t.test('Basic processing instructions', async () => { performanceTracker.startOperation('basic-pi'); const piTests = [ { name: 'XML declaration', xml: ` TEST-001 `, target: 'xml', data: 'version="1.0" encoding="UTF-8"', description: 'Standard XML declaration' }, { name: 'Stylesheet processing instruction', xml: ` TEST-002 `, target: 'xml-stylesheet', data: 'type="text/xsl" href="invoice.xsl"', description: 'XSLT stylesheet reference' }, { name: 'Multiple processing instructions', xml: ` TEST-003 `, description: 'Multiple PIs before root element' }, { name: 'PI within document', xml: `
TEST-004
100.00
`, description: 'PIs inside document structure' }, { name: 'PI with no data', xml: ` TEST-005 `, description: 'Processing instructions without parameters' } ]; for (const test of piTests) { const startTime = performance.now(); console.log(`${test.name}:`); if (test.target) { console.log(` Target: ${test.target}`); } if (test.data) { console.log(` Data: ${test.data}`); } console.log(` Description: ${test.description}`); try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { await invoice.fromXmlString(test.xml); console.log(' ✓ Parsed with processing instructions'); } else { console.log(' ⚠️ Cannot test without fromXmlString'); } } catch (error) { console.log(` ✗ Error: ${error.message}`); } performanceTracker.recordMetric('pi-parsing', performance.now() - startTime); } performanceTracker.endOperation('basic-pi'); }); await t.test('Processing instruction syntax rules', async () => { performanceTracker.startOperation('pi-syntax'); const syntaxTests = [ { name: 'Valid PI names', valid: [ '', '', '', '' ], invalid: [ '', // Cannot start with number '', // No spaces in target '', // 'xml' is reserved '' // Must have target name ] }, { name: 'Reserved target names', tests: [ { pi: '', valid: true, note: 'XML declaration allowed' }, { pi: '', valid: false, note: 'Case variations of xml reserved' }, { pi: '', valid: false, note: 'Any case of xml reserved' } ] }, { name: 'PI data requirements', tests: [ { pi: '', valid: true, note: 'Empty data is valid' }, { pi: '', valid: true, note: 'Whitespace only is valid' }, { pi: '', valid: false, note: 'Cannot contain ?>' }, { pi: ' separately?>', valid: true, note: 'Can contain ? and > separately' } ] } ]; for (const test of syntaxTests) { console.log(`\n${test.name}:`); if (test.valid && test.invalid) { console.log(' Valid examples:'); for (const valid of test.valid) { console.log(` ✓ ${valid}`); } console.log(' Invalid examples:'); for (const invalid of test.invalid) { console.log(` ✗ ${invalid}`); } } if (test.tests) { for (const syntaxTest of test.tests) { console.log(` ${syntaxTest.pi}`); console.log(` ${syntaxTest.valid ? '✓' : '✗'} ${syntaxTest.note}`); } } } performanceTracker.endOperation('pi-syntax'); }); await t.test('Common processing instructions in e-invoices', async () => { performanceTracker.startOperation('einvoice-pi'); const einvoicePIs = [ { name: 'XSLT transformation', xml: ` UBL-001 `, purpose: 'Browser-based invoice rendering', common: true }, { name: 'Schema validation hint', xml: ` TEST-001 `, purpose: 'Schema location for validation', common: false }, { name: 'PDF generation instructions', xml: ` PDF-001 `, purpose: 'PDF/A-3 generation hints', common: false }, { name: 'Digital signature instructions', xml: ` SIGNED-001 `, purpose: 'Signing process configuration', common: false }, { name: 'Format-specific processing', xml: ` CII-001 `, purpose: 'Format-specific metadata', common: false } ]; for (const pi of einvoicePIs) { console.log(`\n${pi.name}:`); console.log(` Purpose: ${pi.purpose}`); console.log(` Common in e-invoices: ${pi.common ? 'Yes' : 'No'}`); const startTime = performance.now(); try { // Extract PIs from XML const piMatches = pi.xml.matchAll(/<\?([^?\s]+)([^?]*)\?>/g); const pis = Array.from(piMatches); console.log(` Found ${pis.length} processing instructions:`); for (const [full, target, data] of pis) { if (target !== 'xml') { console.log(` `); } } } catch (error) { console.log(` Error analyzing PIs: ${error.message}`); } performanceTracker.recordMetric('einvoice-pi', performance.now() - startTime); } performanceTracker.endOperation('einvoice-pi'); }); await t.test('Processing instruction handling strategies', async () => { performanceTracker.startOperation('pi-handling'); class PIHandler { private handlers = new Map void>(); register(target: string, handler: (data: string) => void): void { this.handlers.set(target, handler); } process(xml: string): void { const piRegex = /<\?([^?\s]+)([^?]*)\?>/g; let match; while ((match = piRegex.exec(xml)) !== null) { const [full, target, data] = match; if (target === 'xml') continue; // Skip XML declaration const handler = this.handlers.get(target); if (handler) { console.log(` Processing `); handler(data.trim()); } else { console.log(` Ignoring unhandled PI: `); } } } } const handler = new PIHandler(); // Register handlers for common PIs handler.register('xml-stylesheet', (data) => { const hrefMatch = data.match(/href="([^"]+)"/); if (hrefMatch) { console.log(` Stylesheet URL: ${hrefMatch[1]}`); } }); handler.register('pdf-generator', (data) => { const versionMatch = data.match(/version="([^"]+)"/); if (versionMatch) { console.log(` PDF generator version: ${versionMatch[1]}`); } }); handler.register('page-break', (data) => { console.log(' Page break instruction found'); }); // Test document const testXml = ` Test `; console.log('Processing instructions found:'); handler.process(testXml); performanceTracker.endOperation('pi-handling'); }); await t.test('PI security considerations', async () => { performanceTracker.startOperation('pi-security'); const securityTests = [ { name: 'External resource reference', pi: '', risk: 'SSRF, data exfiltration', mitigation: 'Validate URLs, use allowlist' }, { name: 'Code execution hint', pi: '', risk: 'Arbitrary code execution', mitigation: 'Never execute PI content as code' }, { name: 'File system access', pi: '', risk: 'Local file disclosure', mitigation: 'Ignore file system PIs' }, { name: 'Parser-specific instructions', pi: '', risk: 'Security bypass', mitigation: 'Ignore parser configuration PIs' } ]; console.log('Security considerations for processing instructions:'); for (const test of securityTests) { console.log(`\n${test.name}:`); console.log(` PI: ${test.pi}`); console.log(` Risk: ${test.risk}`); console.log(` Mitigation: ${test.mitigation}`); } console.log('\nBest practices:'); console.log(' 1. Whitelist allowed PI targets'); console.log(' 2. Validate all external references'); console.log(' 3. Never execute PI content as code'); console.log(' 4. Log suspicious PIs for monitoring'); console.log(' 5. Consider removing PIs in production'); performanceTracker.endOperation('pi-security'); }); await t.test('Corpus PI analysis', async () => { performanceTracker.startOperation('corpus-pi'); const corpusLoader = new CorpusLoader(); const xmlFiles = await corpusLoader.getFiles(/\.(xml|ubl|cii)$/); console.log(`\nAnalyzing processing instructions in ${xmlFiles.length} corpus files...`); const piStats = { total: 0, filesWithPIs: 0, piByTarget: new Map(), totalPIs: 0, stylesheetRefs: 0, otherExternalRefs: 0 }; const sampleSize = Math.min(100, xmlFiles.length); const sampledFiles = xmlFiles.slice(0, sampleSize); for (const file of sampledFiles) { piStats.total++; try { const content = await plugins.fs.readFile(file.path, 'utf8'); // Find all PIs except XML declaration const piMatches = content.matchAll(/<\?([^?\s]+)([^?]*)\?>/g); const pis = Array.from(piMatches).filter(m => m[1] !== 'xml'); if (pis.length > 0) { piStats.filesWithPIs++; piStats.totalPIs += pis.length; for (const [full, target, data] of pis) { piStats.piByTarget.set( target, (piStats.piByTarget.get(target) || 0) + 1 ); // Check for external references if (target === 'xml-stylesheet') { piStats.stylesheetRefs++; } else if (data.includes('href=') || data.includes('src=')) { piStats.otherExternalRefs++; } } } } catch (error) { // Skip files that can't be read } } console.log('\nProcessing Instruction Statistics:'); console.log(`Files analyzed: ${piStats.total}`); console.log(`Files with PIs: ${piStats.filesWithPIs} (${(piStats.filesWithPIs/piStats.total*100).toFixed(1)}%)`); console.log(`Total PIs found: ${piStats.totalPIs}`); console.log(`Stylesheet references: ${piStats.stylesheetRefs}`); console.log(`Other external references: ${piStats.otherExternalRefs}`); if (piStats.piByTarget.size > 0) { console.log('\nPI targets found:'); const sortedTargets = Array.from(piStats.piByTarget.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 10); for (const [target, count] of sortedTargets) { console.log(` : ${count} occurrences`); } } performanceTracker.endOperation('corpus-pi'); }); await t.test('PI performance impact', async () => { performanceTracker.startOperation('pi-performance'); // Generate documents with varying PI counts const generateXmlWithPIs = (piCount: number): string => { let xml = '\n'; // Add various PIs for (let i = 0; i < piCount; i++) { xml += `\n`; } xml += '\n'; // Add some PIs within document for (let i = 0; i < piCount / 2; i++) { xml += ` \n`; xml += ` Value ${i}\n`; } xml += ''; return xml; }; console.log('Performance impact of processing instructions:'); const testCounts = [0, 10, 50, 100]; for (const count of testCounts) { const xml = generateXmlWithPIs(count); const xmlSize = Buffer.byteLength(xml, 'utf8'); const startTime = performance.now(); try { const invoice = new einvoice.EInvoice(); if (invoice.fromXmlString) { await invoice.fromXmlString(xml); } const parseTime = performance.now() - startTime; console.log(` ${count} PIs (${(xmlSize/1024).toFixed(1)}KB): ${parseTime.toFixed(2)}ms`); if (count > 0) { console.log(` Time per PI: ${(parseTime/count).toFixed(3)}ms`); } performanceTracker.recordMetric(`pi-count-${count}`, parseTime); } catch (error) { console.log(` Error with ${count} PIs: ${error.message}`); } } performanceTracker.endOperation('pi-performance'); }); // Performance summary console.log('\n' + performanceTracker.getSummary()); // PI best practices console.log('\nProcessing Instruction Best Practices:'); console.log('1. Preserve PIs during document processing'); console.log('2. Validate external references for security'); console.log('3. Support common PIs (xml-stylesheet)'); console.log('4. Allow custom PI handlers for extensibility'); console.log('5. Ignore unknown PIs gracefully'); console.log('6. Never execute PI content as code'); console.log('7. Consider PI impact on performance'); console.log('8. Document which PIs are supported'); }); tap.start();