einvoice/test/suite/einvoice_parsing/test.parse-11.processing-instructions.ts

400 lines
12 KiB
TypeScript

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
tap.test('PARSE-11: Basic processing instructions', async () => {
const piTests = [
{
name: 'XML declaration',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<invoice>
<id>TEST-001</id>
</invoice>`,
target: 'xml',
data: 'version="1.0" encoding="UTF-8"',
description: 'Standard XML declaration'
},
{
name: 'Stylesheet processing instruction',
xml: `<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
<invoice>
<id>TEST-002</id>
</invoice>`,
target: 'xml-stylesheet',
data: 'type="text/xsl" href="invoice.xsl"',
description: 'XSLT stylesheet reference'
},
{
name: 'Multiple processing instructions',
xml: `<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
<?xml-model href="invoice.rnc" type="application/relax-ng-compact-syntax"?>
<?custom-pi data="value"?>
<invoice>
<id>TEST-003</id>
</invoice>`,
description: 'Multiple PIs before root element'
},
{
name: 'PI within document',
xml: `<?xml version="1.0"?>
<invoice>
<header>
<?page-break?>
<id>TEST-004</id>
</header>
<?custom-instruction param="value"?>
<body>
<amount>100.00</amount>
</body>
</invoice>`,
description: 'PIs inside document structure'
},
{
name: 'PI with no data',
xml: `<?xml version="1.0"?>
<invoice>
<?break?>
<id>TEST-005</id>
<?end?>
</invoice>`,
description: 'Processing instructions without parameters'
}
];
for (const test of piTests) {
console.log(`${test.name}:`);
if (test.target) {
console.log(` Target: ${test.target}`);
}
if (test.data) {
console.log(` Data: ${test.data}`);
}
console.log(` Description: ${test.description}`);
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(test.xml);
console.log(' ✓ Parsed with processing instructions');
} else {
console.log(' ⚠️ Cannot test without fromXmlString');
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
}
});
tap.test('PARSE-11: Processing instruction syntax rules', async () => {
const syntaxTests = [
{
name: 'Valid PI names',
valid: [
'<?valid-name data?>',
'<?name123 data?>',
'<?my-processor data?>',
'<?_underscore data?>'
],
invalid: [
'<?123name data?>', // Cannot start with number
'<?my name data?>', // No spaces in target
'<?xml data?>', // 'xml' is reserved
'<? data?>' // Must have target name
]
},
{
name: 'Reserved target names',
tests: [
{ pi: '<?xml version="1.0"?>', valid: true, note: 'XML declaration allowed' },
{ pi: '<?XML data?>', valid: false, note: 'Case variations of xml reserved' },
{ pi: '<?XmL data?>', valid: false, note: 'Any case of xml reserved' }
]
},
{
name: 'PI data requirements',
tests: [
{ pi: '<?target?>', valid: true, note: 'Empty data is valid' },
{ pi: '<?target ?>', valid: true, note: 'Whitespace only is valid' },
{ pi: '<?target cannot contain ??>', valid: false, note: 'Cannot contain ?>' },
{ pi: '<?target data with ? and > separately?>', valid: true, note: 'Can contain ? and > separately' }
]
}
];
for (const test of syntaxTests) {
console.log(`\n${test.name}:`);
if (test.valid && test.invalid) {
console.log(' Valid examples:');
for (const valid of test.valid) {
console.log(`${valid}`);
}
console.log(' Invalid examples:');
for (const invalid of test.invalid) {
console.log(`${invalid}`);
}
}
if (test.tests) {
for (const syntaxTest of test.tests) {
console.log(` ${syntaxTest.pi}`);
console.log(` ${syntaxTest.valid ? '✓' : '✗'} ${syntaxTest.note}`);
}
}
}
});
tap.test('PARSE-11: Common processing instructions in e-invoices', async () => {
const einvoicePIs = [
{
name: 'XSLT transformation',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="https://example.com/invoice-transform.xsl"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<ID>UBL-001</ID>
</Invoice>`,
purpose: 'Browser-based invoice rendering',
common: true
},
{
name: 'Schema validation hint',
xml: `<?xml version="1.0"?>
<?xml-model href="http://docs.oasis-open.org/ubl/os-UBL-2.1/xsd/maindoc/UBL-Invoice-2.1.xsd"
schematypens="http://www.w3.org/2001/XMLSchema"?>
<Invoice>
<ID>TEST-001</ID>
</Invoice>`,
purpose: 'Schema location for validation',
common: false
},
{
name: 'PDF generation instructions',
xml: `<?xml version="1.0"?>
<?pdf-generator version="2.0" profile="ZUGFeRD"?>
<?pdf-attachment filename="invoice.xml" relationship="Data"?>
<Invoice>
<ID>PDF-001</ID>
</Invoice>`,
purpose: 'PDF/A-3 generation hints',
common: false
},
{
name: 'Digital signature instructions',
xml: `<?xml version="1.0"?>
<?signature-method algorithm="RSA-SHA256"?>
<?signature-transform algorithm="http://www.w3.org/2001/10/xml-exc-c14n#"?>
<Invoice>
<ID>SIGNED-001</ID>
</Invoice>`,
purpose: 'Signing process configuration',
common: false
},
{
name: 'Format-specific processing',
xml: `<?xml version="1.0"?>
<?facturx-version 1.0?>
<?zugferd-profile EXTENDED?>
<rsm:CrossIndustryInvoice>
<rsm:ExchangedDocument>
<ram:ID>CII-001</ram:ID>
</rsm:ExchangedDocument>
</rsm:CrossIndustryInvoice>`,
purpose: 'Format-specific metadata',
common: false
}
];
for (const pi of einvoicePIs) {
console.log(`\n${pi.name}:`);
console.log(` Purpose: ${pi.purpose}`);
console.log(` Common in e-invoices: ${pi.common ? 'Yes' : 'No'}`);
try {
// Extract PIs from XML
const piMatches = pi.xml.matchAll(/<\?([^?\s]+)([^?]*)\?>/g);
const pis = Array.from(piMatches);
console.log(` Found ${pis.length} processing instructions:`);
for (const [full, target, data] of pis) {
if (target !== 'xml') {
console.log(` <?${target}${data}?>`);
}
}
} catch (error) {
console.log(` Error analyzing PIs: ${error.message}`);
}
}
});
tap.test('PARSE-11: Processing instruction handling strategies', async () => {
class PIHandler {
private handlers = new Map<string, (data: string) => void>();
register(target: string, handler: (data: string) => void): void {
this.handlers.set(target, handler);
}
process(xml: string): void {
const piRegex = /<\?([^?\s]+)([^?]*)\?>/g;
let match;
while ((match = piRegex.exec(xml)) !== null) {
const [full, target, data] = match;
if (target === 'xml') continue; // Skip XML declaration
const handler = this.handlers.get(target);
if (handler) {
console.log(` Processing <?${target}...?>`);
handler(data.trim());
} else {
console.log(` Ignoring unhandled PI: <?${target}...?>`);
}
}
}
}
const handler = new PIHandler();
// Register handlers for common PIs
handler.register('xml-stylesheet', (data) => {
const hrefMatch = data.match(/href="([^"]+)"/);
if (hrefMatch) {
console.log(` Stylesheet URL: ${hrefMatch[1]}`);
}
});
handler.register('pdf-generator', (data) => {
const versionMatch = data.match(/version="([^"]+)"/);
if (versionMatch) {
console.log(` PDF generator version: ${versionMatch[1]}`);
}
});
handler.register('page-break', (data) => {
console.log(' Page break instruction found');
});
// Test document
const testXml = `<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="invoice.xsl"?>
<?pdf-generator version="2.0" profile="ZUGFeRD"?>
<invoice>
<?page-break?>
<content>Test</content>
<?custom-pi unknown="true"?>
</invoice>`;
console.log('Processing instructions found:');
handler.process(testXml);
});
tap.test('PARSE-11: PI security considerations', async () => {
const securityTests = [
{
name: 'External resource reference',
pi: '<?xml-stylesheet href="http://malicious.com/steal-data.xsl"?>',
risk: 'SSRF, data exfiltration',
mitigation: 'Validate URLs, use allowlist'
},
{
name: 'Code execution hint',
pi: '<?execute-script language="javascript" code="alert(1)"?>',
risk: 'Arbitrary code execution',
mitigation: 'Never execute PI content as code'
},
{
name: 'File system access',
pi: '<?include-file path="/etc/passwd"?>',
risk: 'Local file disclosure',
mitigation: 'Ignore file system PIs'
},
{
name: 'Parser-specific instructions',
pi: '<?parser-config disable-security-checks="true"?>',
risk: 'Security bypass',
mitigation: 'Ignore parser configuration PIs'
}
];
console.log('Security considerations for processing instructions:');
for (const test of securityTests) {
console.log(`\n${test.name}:`);
console.log(` PI: ${test.pi}`);
console.log(` Risk: ${test.risk}`);
console.log(` Mitigation: ${test.mitigation}`);
}
console.log('\nBest practices:');
console.log(' 1. Whitelist allowed PI targets');
console.log(' 2. Validate all external references');
console.log(' 3. Never execute PI content as code');
console.log(' 4. Log suspicious PIs for monitoring');
console.log(' 5. Consider removing PIs in production');
});
tap.test('PARSE-11: PI performance impact', async () => {
// Generate documents with varying PI counts
const generateXmlWithPIs = (piCount: number): string => {
let xml = '<?xml version="1.0"?>\n';
// Add various PIs
for (let i = 0; i < piCount; i++) {
xml += `<?pi-${i} data="value${i}" param="test"?>\n`;
}
xml += '<invoice>\n';
// Add some PIs within document
for (let i = 0; i < piCount / 2; i++) {
xml += ` <?internal-pi-${i}?>\n`;
xml += ` <field${i}>Value ${i}</field${i}>\n`;
}
xml += '</invoice>';
return xml;
};
console.log('Performance impact of processing instructions:');
const testCounts = [0, 10, 50, 100];
for (const count of testCounts) {
const xml = generateXmlWithPIs(count);
const xmlSize = Buffer.byteLength(xml, 'utf8');
const startTime = performance.now();
try {
const invoice = new einvoice.EInvoice();
if (invoice.fromXmlString) {
await invoice.fromXmlString(xml);
}
const parseTime = performance.now() - startTime;
console.log(` ${count} PIs (${(xmlSize/1024).toFixed(1)}KB): ${parseTime.toFixed(2)}ms`);
if (count > 0) {
console.log(` Time per PI: ${(parseTime/count).toFixed(3)}ms`);
}
} catch (error) {
console.log(` Error with ${count} PIs: ${error.message}`);
}
}
// PI best practices
console.log('\nProcessing Instruction Best Practices:');
console.log('1. Preserve PIs during document processing');
console.log('2. Validate external references for security');
console.log('3. Support common PIs (xml-stylesheet)');
console.log('4. Allow custom PI handlers for extensibility');
console.log('5. Ignore unknown PIs gracefully');
console.log('6. Never execute PI content as code');
console.log('7. Consider PI impact on performance');
console.log('8. Document which PIs are supported');
});
tap.start();