import { tap } from '@git.zone/tstest/tapbundle'; import * as einvoice from '../../../ts/index.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; // Helper function to remove BOM from buffer const removeBOM = (buffer: Buffer): Buffer => { // UTF-8 BOM if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) { return buffer.subarray(3); } // UTF-16 LE BOM if (buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) { return buffer.subarray(2); } // UTF-16 BE BOM if (buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) { return buffer.subarray(2); } // UTF-32 LE BOM if (buffer.length >= 4 && buffer[0] === 0xFF && buffer[1] === 0xFE && buffer[2] === 0x00 && buffer[3] === 0x00) { return buffer.subarray(4); } // UTF-32 BE BOM if (buffer.length >= 4 && buffer[0] === 0x00 && buffer[1] === 0x00 && buffer[2] === 0xFE && buffer[3] === 0xFF) { return buffer.subarray(4); } return buffer; }; tap.test('PARSE-04: Standard BOM detection and removal', async () => { const bomTypes = [ { name: 'UTF-8 BOM', bom: Buffer.from([0xEF, 0xBB, 0xBF]), encoding: 'UTF-8', description: 'Most common BOM in XML files' }, { name: 'UTF-16 LE BOM', bom: Buffer.from([0xFF, 0xFE]), encoding: 'UTF-16LE', description: 'Little-endian UTF-16' }, { name: 'UTF-16 BE BOM', bom: Buffer.from([0xFE, 0xFF]), encoding: 'UTF-16BE', description: 'Big-endian UTF-16' }, { name: 'UTF-32 LE BOM', bom: Buffer.from([0xFF, 0xFE, 0x00, 0x00]), encoding: 'UTF-32LE', description: 'Little-endian UTF-32' }, { name: 'UTF-32 BE BOM', bom: Buffer.from([0x00, 0x00, 0xFE, 0xFF]), encoding: 'UTF-32BE', description: 'Big-endian UTF-32' } ]; for (const bomType of bomTypes) { const { result, metric } = await PerformanceTracker.track( 'bom-processing', async () => { // Create XML with BOM let xmlContent: Buffer; let encodingSupported = true; try { if (bomType.encoding.startsWith('UTF-16')) { // Node.js doesn't support UTF-16 BE directly if (bomType.encoding === 'UTF-16BE') { // Create UTF-8 content instead for testing xmlContent = Buffer.from('TEST-BOM'); encodingSupported = false; } else { const nodeEncoding = bomType.encoding.replace('-', '').toLowerCase(); xmlContent = Buffer.from( 'TEST-BOM', nodeEncoding as BufferEncoding ); } } else if (bomType.encoding.startsWith('UTF-32')) { // UTF-32 not directly supported by Node.js, simulate xmlContent = Buffer.from('TEST-BOM'); encodingSupported = false; } else { xmlContent = Buffer.from('TEST-BOM'); } } catch (e) { // Fallback to UTF-8 if encoding not supported xmlContent = Buffer.from('TEST-BOM'); encodingSupported = false; } const fullContent = Buffer.concat([bomType.bom, xmlContent]); // Test BOM removal const withoutBom = removeBOM(fullContent); const bomRemoved = withoutBom.length === fullContent.length - bomType.bom.length; return { bomBytes: Array.from(bomType.bom).map(b => '0x' + b.toString(16).toUpperCase().padStart(2, '0')).join(' '), totalSize: fullContent.length, bomRemoved, encodingSupported }; } ); console.log(`${bomType.name}:`); console.log(` BOM: ${result.bomBytes}`); console.log(` Encoding: ${bomType.encoding}`); console.log(` Description: ${bomType.description}`); console.log(` Total size: ${result.totalSize} bytes`); console.log(` ${result.bomRemoved ? '✓' : '✗'} BOM ${result.bomRemoved ? 'removed successfully' : 'removal failed'}`); console.log(` Processing time: ${metric.duration.toFixed(2)}ms`); } }); tap.test('PARSE-04: BOM in different positions', async () => { const positionTests = [ { name: 'BOM at start (correct)', content: Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from('TEST-001') ]), valid: true }, { name: 'BOM after XML declaration', content: Buffer.concat([ Buffer.from(''), Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from('TEST-002') ]), valid: false }, { name: 'No BOM', content: Buffer.from('TEST-003'), valid: true }, { name: 'Multiple BOMs', content: Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from('TEST-004') ]), valid: false } ]; for (const test of positionTests) { const { result } = await PerformanceTracker.track( 'bom-position', async () => { const invoice = new einvoice.EInvoice(); try { await invoice.fromXmlString(test.content.toString('utf8')); return { parsed: true, error: null }; } catch (error) { return { parsed: false, error: error.message }; } } ); console.log(`${test.name}: ${result.parsed ? '✓' : '✗'}`); console.log(` Expected ${test.valid ? 'valid' : 'invalid'}, got ${result.parsed ? 'parsed' : 'error'}`); if (!result.parsed) { console.log(` Error: ${result.error}`); } } }); tap.test('PARSE-04: Real invoice files with BOM', async () => { // Test with actual invoice formats that might have BOM const realWorldTests = [ { name: 'UBL with UTF-8 BOM', xml: Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM Buffer.from(` BOM-UBL-001 2024-01-01 Test Supplier Berlin 10115 DE Test Customer Munich 80331 DE 1 1 100.00 Test Product 100.00 `) ]) }, { name: 'ZUGFeRD with UTF-8 BOM', xml: Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM Buffer.from(` BOM-ZUGFERD-001 `) ]) } ]; for (const test of realWorldTests) { const { result } = await PerformanceTracker.track( 'real-world-bom', async () => { const invoice = new einvoice.EInvoice(); try { await invoice.fromXmlString(test.xml.toString('utf8')); return { success: true, id: invoice.id, format: invoice.getFormat() }; } catch (error) { return { success: false, error: error.message }; } } ); console.log(`${test.name}: ${result.success ? '✓' : '✗'}`); if (result.success) { console.log(` Invoice ID: ${result.id}`); console.log(` Format: ${einvoice.InvoiceFormat[result.format]}`); } else { console.log(` Error: ${result.error}`); } } }); tap.test('PARSE-04: BOM encoding conflicts', async () => { const conflictTests = [ { name: 'UTF-16 BOM with UTF-8 declaration', bom: Buffer.from([0xFF, 0xFE]), // UTF-16 LE BOM xml: 'CONFLICT-001', issue: 'BOM indicates UTF-16 but declaration says UTF-8' }, { name: 'UTF-8 BOM with ISO-8859-1 declaration', bom: Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM xml: 'CONFLICT-002', issue: 'BOM indicates UTF-8 but declaration says ISO-8859-1' } ]; for (const test of conflictTests) { const content = Buffer.concat([test.bom, Buffer.from(test.xml)]); const { result } = await PerformanceTracker.track( 'bom-conflict', async () => { const invoice = new einvoice.EInvoice(); try { await invoice.fromXmlString(content.toString('utf8')); return { parsed: true }; } catch (error) { return { parsed: false, error: error.message, isEncodingError: error.message.toLowerCase().includes('encoding') || error.message.toLowerCase().includes('bom') }; } } ); console.log(`${test.name}: ${!result.parsed ? '✓ (correctly rejected)' : '✗ (should have failed)'}`); console.log(` Issue: ${test.issue}`); if (!result.parsed) { console.log(` ${result.isEncodingError ? 'Encoding error detected' : 'Other error'}`); } } }); tap.test('PARSE-04: Performance with BOM', async () => { const sizes = [1, 10, 100]; for (const size of sizes) { // Generate invoice with many line items const lines = []; for (let i = 1; i <= size; i++) { lines.push(` ${i} 1 ${i * 10}.00 Product ${i} `); } const xmlWithBom = Buffer.concat([ Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM Buffer.from(` PERF-BOM-${size} 2024-01-01 Performance Test Supplier Berlin 10115 DE Performance Test Customer Munich 80331 DE ${lines.join('')} `) ]); const { result, metric } = await PerformanceTracker.track( `bom-performance-${size}`, async () => { const invoice = new einvoice.EInvoice(); try { await invoice.fromXmlString(xmlWithBom.toString('utf8')); return { success: true, itemCount: invoice.items?.length || 0 }; } catch (error) { return { success: false, error: error.message }; } } ); const xmlSize = xmlWithBom.length / 1024; // KB console.log(`Parse ${size} items with BOM (${xmlSize.toFixed(1)}KB): ${result.success ? '✓' : '✗'}`); if (result.success) { console.log(` Items parsed: ${result.itemCount}`); console.log(` Parse time: ${metric.duration.toFixed(2)}ms`); console.log(` Speed: ${(xmlSize / metric.duration * 1000).toFixed(2)}KB/s`); } } }); tap.test('PARSE-04: BOM handling summary', async () => { console.log('\nBOM Handling Best Practices:'); console.log('1. Always check for BOM at the beginning of XML files'); console.log('2. Remove BOM before parsing if present'); console.log('3. Handle conflicts between BOM and encoding declaration'); console.log('4. Support UTF-8, UTF-16, and UTF-32 BOMs'); console.log('5. Validate that BOM matches the actual encoding'); const stats = PerformanceTracker.getStats('bom-processing'); if (stats) { console.log(`\nBOM Processing Performance:`); console.log(` Average: ${stats.avg.toFixed(2)}ms`); console.log(` Max: ${stats.max.toFixed(2)}ms`); } }); // Run the tests tap.start();