532 lines
18 KiB
TypeScript
532 lines
18 KiB
TypeScript
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
|
import * as einvoice from '../../../ts/index.js';
|
||
|
import * as plugins from '../../plugins.js';
|
||
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
||
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
||
|
|
||
|
tap.test('PARSE-04: BOM Handling - Process Byte Order Marks correctly across encodings', async (t) => {
|
||
|
const performanceTracker = new PerformanceTracker('PARSE-04');
|
||
|
|
||
|
await t.test('Standard BOM detection and removal', async () => {
|
||
|
performanceTracker.startOperation('standard-bom');
|
||
|
|
||
|
const bomTypes = [
|
||
|
{
|
||
|
name: 'UTF-8 BOM',
|
||
|
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
encoding: 'UTF-8',
|
||
|
description: 'Most common BOM in XML files'
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-16 LE BOM',
|
||
|
bom: Buffer.from([0xFF, 0xFE]),
|
||
|
encoding: 'UTF-16LE',
|
||
|
description: 'Little-endian UTF-16'
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-16 BE BOM',
|
||
|
bom: Buffer.from([0xFE, 0xFF]),
|
||
|
encoding: 'UTF-16BE',
|
||
|
description: 'Big-endian UTF-16'
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-32 LE BOM',
|
||
|
bom: Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
|
||
|
encoding: 'UTF-32LE',
|
||
|
description: 'Little-endian UTF-32'
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-32 BE BOM',
|
||
|
bom: Buffer.from([0x00, 0x00, 0xFE, 0xFF]),
|
||
|
encoding: 'UTF-32BE',
|
||
|
description: 'Big-endian UTF-32'
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const bomType of bomTypes) {
|
||
|
const startTime = performance.now();
|
||
|
|
||
|
// Create XML with BOM
|
||
|
let xmlContent: Buffer;
|
||
|
if (bomType.encoding.startsWith('UTF-16')) {
|
||
|
xmlContent = Buffer.from(
|
||
|
'<?xml version="1.0" encoding="UTF-16"?><invoice><id>TEST-BOM</id></invoice>',
|
||
|
bomType.encoding.toLowerCase() as BufferEncoding
|
||
|
);
|
||
|
} else if (bomType.encoding.startsWith('UTF-32')) {
|
||
|
// UTF-32 not directly supported by Node.js, simulate
|
||
|
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-32"?><invoice><id>TEST-BOM</id></invoice>');
|
||
|
} else {
|
||
|
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>TEST-BOM</id></invoice>');
|
||
|
}
|
||
|
|
||
|
const fullContent = Buffer.concat([bomType.bom, xmlContent]);
|
||
|
|
||
|
console.log(`${bomType.name}:`);
|
||
|
console.log(` BOM: ${Array.from(bomType.bom).map(b => '0x' + b.toString(16).toUpperCase().padStart(2, '0')).join(' ')}`);
|
||
|
console.log(` Encoding: ${bomType.encoding}`);
|
||
|
console.log(` Description: ${bomType.description}`);
|
||
|
console.log(` Total size: ${fullContent.length} bytes`);
|
||
|
|
||
|
// Test BOM removal
|
||
|
const withoutBom = removeBOM(fullContent);
|
||
|
if (withoutBom.length === fullContent.length - bomType.bom.length) {
|
||
|
console.log(' ✓ BOM removed successfully');
|
||
|
} else {
|
||
|
console.log(' ✗ BOM removal failed');
|
||
|
}
|
||
|
|
||
|
performanceTracker.recordMetric('bom-processing', performance.now() - startTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('standard-bom');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM in different positions', async () => {
|
||
|
performanceTracker.startOperation('bom-positions');
|
||
|
|
||
|
const positionTests = [
|
||
|
{
|
||
|
name: 'BOM at start (correct)',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<?xml version="1.0"?><invoice><id>TEST-001</id></invoice>')
|
||
|
]),
|
||
|
valid: true
|
||
|
},
|
||
|
{
|
||
|
name: 'BOM after XML declaration',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from('<?xml version="1.0"?>'),
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<invoice><id>TEST-002</id></invoice>')
|
||
|
]),
|
||
|
valid: false
|
||
|
},
|
||
|
{
|
||
|
name: 'BOM in middle of document',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from('<?xml version="1.0"?><invoice>'),
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<id>TEST-003</id></invoice>')
|
||
|
]),
|
||
|
valid: false
|
||
|
},
|
||
|
{
|
||
|
name: 'Multiple BOMs',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<?xml version="1.0"?><invoice><id>TEST-004</id></invoice>')
|
||
|
]),
|
||
|
valid: false
|
||
|
},
|
||
|
{
|
||
|
name: 'BOM-like bytes in content',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from('<?xml version="1.0"?><invoice><data>'),
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]), // These are actual data, not BOM
|
||
|
Buffer.from('</data></invoice>')
|
||
|
]),
|
||
|
valid: true // Valid XML, but BOM-like bytes are data
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of positionTests) {
|
||
|
const startTime = performance.now();
|
||
|
|
||
|
console.log(`${test.name}:`);
|
||
|
|
||
|
// Check for BOM at start
|
||
|
const hasValidBOM = test.content.length >= 3 &&
|
||
|
test.content[0] === 0xEF &&
|
||
|
test.content[1] === 0xBB &&
|
||
|
test.content[2] === 0xBF &&
|
||
|
test.content.indexOf('<?xml') === 3;
|
||
|
|
||
|
// Find all BOM occurrences
|
||
|
const bomOccurrences = findBOMOccurrences(test.content);
|
||
|
console.log(` BOM occurrences: ${bomOccurrences.length} at positions: ${bomOccurrences.join(', ')}`);
|
||
|
|
||
|
if (test.valid) {
|
||
|
console.log(' ✓ Valid BOM usage');
|
||
|
} else {
|
||
|
console.log(' ✗ Invalid BOM usage');
|
||
|
}
|
||
|
|
||
|
// Try parsing
|
||
|
try {
|
||
|
const invoice = new einvoice.EInvoice();
|
||
|
if (invoice.fromBuffer) {
|
||
|
await invoice.fromBuffer(test.content);
|
||
|
console.log(' Parse result: Success');
|
||
|
}
|
||
|
} catch (error) {
|
||
|
console.log(` Parse result: Failed - ${error.message}`);
|
||
|
}
|
||
|
|
||
|
performanceTracker.recordMetric('bom-position', performance.now() - startTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('bom-positions');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM preservation in round-trip operations', async () => {
|
||
|
performanceTracker.startOperation('bom-roundtrip');
|
||
|
|
||
|
const roundTripTests = [
|
||
|
{
|
||
|
name: 'Preserve UTF-8 BOM',
|
||
|
input: Buffer.concat([
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>RT-001</id></invoice>')
|
||
|
]),
|
||
|
preserveBOM: true
|
||
|
},
|
||
|
{
|
||
|
name: 'Remove UTF-8 BOM',
|
||
|
input: Buffer.concat([
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>RT-002</id></invoice>')
|
||
|
]),
|
||
|
preserveBOM: false
|
||
|
},
|
||
|
{
|
||
|
name: 'Add BOM to BOM-less file',
|
||
|
input: Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>RT-003</id></invoice>'),
|
||
|
preserveBOM: true,
|
||
|
addBOM: true
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of roundTripTests) {
|
||
|
const startTime = performance.now();
|
||
|
|
||
|
console.log(`${test.name}:`);
|
||
|
|
||
|
const inputHasBOM = test.input.length >= 3 &&
|
||
|
test.input[0] === 0xEF &&
|
||
|
test.input[1] === 0xBB &&
|
||
|
test.input[2] === 0xBF;
|
||
|
|
||
|
console.log(` Input has BOM: ${inputHasBOM}`);
|
||
|
console.log(` Preserve BOM: ${test.preserveBOM}`);
|
||
|
|
||
|
// Simulate round-trip
|
||
|
let processed = test.input;
|
||
|
|
||
|
if (!test.preserveBOM && inputHasBOM) {
|
||
|
// Remove BOM
|
||
|
processed = processed.slice(3);
|
||
|
console.log(' Action: Removed BOM');
|
||
|
} else if (test.addBOM && !inputHasBOM) {
|
||
|
// Add BOM
|
||
|
processed = Buffer.concat([Buffer.from([0xEF, 0xBB, 0xBF]), processed]);
|
||
|
console.log(' Action: Added BOM');
|
||
|
} else {
|
||
|
console.log(' Action: No change');
|
||
|
}
|
||
|
|
||
|
const outputHasBOM = processed.length >= 3 &&
|
||
|
processed[0] === 0xEF &&
|
||
|
processed[1] === 0xBB &&
|
||
|
processed[2] === 0xBF;
|
||
|
|
||
|
console.log(` Output has BOM: ${outputHasBOM}`);
|
||
|
|
||
|
performanceTracker.recordMetric('bom-roundtrip', performance.now() - startTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('bom-roundtrip');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM conflicts with encoding declarations', async () => {
|
||
|
performanceTracker.startOperation('bom-conflicts');
|
||
|
|
||
|
const conflictTests = [
|
||
|
{
|
||
|
name: 'UTF-8 BOM with UTF-8 declaration',
|
||
|
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
declaration: 'UTF-8',
|
||
|
conflict: false
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-8 BOM with UTF-16 declaration',
|
||
|
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
declaration: 'UTF-16',
|
||
|
conflict: true
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-16 LE BOM with UTF-8 declaration',
|
||
|
bom: Buffer.from([0xFF, 0xFE]),
|
||
|
declaration: 'UTF-8',
|
||
|
conflict: true
|
||
|
},
|
||
|
{
|
||
|
name: 'UTF-16 BE BOM with UTF-16 declaration',
|
||
|
bom: Buffer.from([0xFE, 0xFF]),
|
||
|
declaration: 'UTF-16',
|
||
|
conflict: false
|
||
|
},
|
||
|
{
|
||
|
name: 'No BOM with any declaration',
|
||
|
bom: Buffer.from([]),
|
||
|
declaration: 'UTF-8',
|
||
|
conflict: false
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of conflictTests) {
|
||
|
const startTime = performance.now();
|
||
|
|
||
|
const xml = `<?xml version="1.0" encoding="${test.declaration}"?><invoice><id>CONFLICT-TEST</id></invoice>`;
|
||
|
const fullContent = Buffer.concat([test.bom, Buffer.from(xml)]);
|
||
|
|
||
|
console.log(`${test.name}:`);
|
||
|
console.log(` BOM type: ${test.bom.length > 0 ? detectBOMType(test.bom) : 'None'}`);
|
||
|
console.log(` Declaration: ${test.declaration}`);
|
||
|
console.log(` Conflict: ${test.conflict ? '✗ Yes' : '✓ No'}`);
|
||
|
|
||
|
if (test.conflict) {
|
||
|
console.log(' Resolution: BOM takes precedence over declaration');
|
||
|
}
|
||
|
|
||
|
performanceTracker.recordMetric('bom-conflict', performance.now() - startTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('bom-conflicts');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM handling in corpus files', async () => {
|
||
|
performanceTracker.startOperation('corpus-bom');
|
||
|
|
||
|
const corpusLoader = new CorpusLoader();
|
||
|
const files = await corpusLoader.getFiles(/\.(xml|cii|ubl)$/);
|
||
|
|
||
|
console.log(`\nAnalyzing BOM usage in ${files.length} corpus files...`);
|
||
|
|
||
|
const bomStats = {
|
||
|
total: 0,
|
||
|
withBOM: 0,
|
||
|
utf8BOM: 0,
|
||
|
utf16BOM: 0,
|
||
|
otherBOM: 0,
|
||
|
multipleBOM: 0,
|
||
|
invalidPosition: 0
|
||
|
};
|
||
|
|
||
|
const sampleSize = Math.min(100, files.length);
|
||
|
const sampledFiles = files.slice(0, sampleSize);
|
||
|
|
||
|
for (const file of sampledFiles) {
|
||
|
bomStats.total++;
|
||
|
|
||
|
try {
|
||
|
const content = await plugins.fs.readFile(file.path);
|
||
|
|
||
|
// Check for BOM
|
||
|
if (content.length >= 3) {
|
||
|
if (content[0] === 0xEF && content[1] === 0xBB && content[2] === 0xBF) {
|
||
|
bomStats.withBOM++;
|
||
|
bomStats.utf8BOM++;
|
||
|
} else if (content.length >= 2) {
|
||
|
if ((content[0] === 0xFF && content[1] === 0xFE) ||
|
||
|
(content[0] === 0xFE && content[1] === 0xFF)) {
|
||
|
bomStats.withBOM++;
|
||
|
bomStats.utf16BOM++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Check for multiple BOMs or BOMs in wrong position
|
||
|
const bomOccurrences = findBOMOccurrences(content);
|
||
|
if (bomOccurrences.length > 1) {
|
||
|
bomStats.multipleBOM++;
|
||
|
}
|
||
|
if (bomOccurrences.length > 0 && bomOccurrences[0] !== 0) {
|
||
|
bomStats.invalidPosition++;
|
||
|
}
|
||
|
} catch (error) {
|
||
|
// Skip files that can't be read
|
||
|
}
|
||
|
}
|
||
|
|
||
|
console.log('\nBOM Statistics:');
|
||
|
console.log(`Total files analyzed: ${bomStats.total}`);
|
||
|
console.log(`Files with BOM: ${bomStats.withBOM} (${(bomStats.withBOM/bomStats.total*100).toFixed(1)}%)`);
|
||
|
console.log(` UTF-8 BOM: ${bomStats.utf8BOM}`);
|
||
|
console.log(` UTF-16 BOM: ${bomStats.utf16BOM}`);
|
||
|
console.log(` Other BOM: ${bomStats.otherBOM}`);
|
||
|
console.log(`Multiple BOMs: ${bomStats.multipleBOM}`);
|
||
|
console.log(`Invalid BOM position: ${bomStats.invalidPosition}`);
|
||
|
|
||
|
performanceTracker.endOperation('corpus-bom');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM security implications', async () => {
|
||
|
performanceTracker.startOperation('bom-security');
|
||
|
|
||
|
const securityTests = [
|
||
|
{
|
||
|
name: 'BOM hiding malicious content',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]),
|
||
|
Buffer.from('<?xml version="1.0"?><!-- '),
|
||
|
Buffer.from([0xEF, 0xBB, 0xBF]), // Hidden BOM in comment
|
||
|
Buffer.from(' --><invoice><script>alert("XSS")</script></invoice>')
|
||
|
]),
|
||
|
risk: 'BOM bytes could be used to bypass filters'
|
||
|
},
|
||
|
{
|
||
|
name: 'Zero-width BOM characters',
|
||
|
content: Buffer.from('<?xml version="1.0"?><invoice>\uFEFF<id>TEST</id></invoice>'),
|
||
|
risk: 'Invisible characters could hide malicious content'
|
||
|
},
|
||
|
{
|
||
|
name: 'BOM-based encoding confusion',
|
||
|
content: Buffer.concat([
|
||
|
Buffer.from([0xFF, 0xFE]), // UTF-16 LE BOM
|
||
|
Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>TEST</id></invoice>')
|
||
|
]),
|
||
|
risk: 'Encoding mismatch could lead to parsing errors'
|
||
|
}
|
||
|
];
|
||
|
|
||
|
for (const test of securityTests) {
|
||
|
const startTime = performance.now();
|
||
|
|
||
|
console.log(`${test.name}:`);
|
||
|
console.log(` Risk: ${test.risk}`);
|
||
|
|
||
|
// Scan for suspicious patterns
|
||
|
const bomCount = findBOMOccurrences(test.content).length;
|
||
|
const hasMultipleBOMs = bomCount > 1;
|
||
|
const hasInvisibleChars = test.content.includes(0xFEFF) ||
|
||
|
test.content.toString().includes('\uFEFF');
|
||
|
|
||
|
console.log(` BOM count: ${bomCount}`);
|
||
|
console.log(` Multiple BOMs: ${hasMultipleBOMs ? '✗ Yes' : '✓ No'}`);
|
||
|
console.log(` Invisible chars: ${hasInvisibleChars ? '✗ Yes' : '✓ No'}`);
|
||
|
|
||
|
if (hasMultipleBOMs || hasInvisibleChars) {
|
||
|
console.log(' ⚠️ Security risk detected');
|
||
|
}
|
||
|
|
||
|
performanceTracker.recordMetric('bom-security', performance.now() - startTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('bom-security');
|
||
|
});
|
||
|
|
||
|
await t.test('BOM handling performance', async () => {
|
||
|
performanceTracker.startOperation('bom-performance');
|
||
|
|
||
|
const sizes = [1000, 10000, 100000]; // 1KB, 10KB, 100KB
|
||
|
|
||
|
for (const size of sizes) {
|
||
|
// Generate content with BOM
|
||
|
const bom = Buffer.from([0xEF, 0xBB, 0xBF]);
|
||
|
const xmlContent = Buffer.from(`<?xml version="1.0"?><invoice><data>${'x'.repeat(size)}</data></invoice>`);
|
||
|
const withBOM = Buffer.concat([bom, xmlContent]);
|
||
|
|
||
|
// Measure BOM detection time
|
||
|
const detectStart = performance.now();
|
||
|
for (let i = 0; i < 1000; i++) {
|
||
|
const hasBOM = withBOM.length >= 3 &&
|
||
|
withBOM[0] === 0xEF &&
|
||
|
withBOM[1] === 0xBB &&
|
||
|
withBOM[2] === 0xBF;
|
||
|
}
|
||
|
const detectTime = performance.now() - detectStart;
|
||
|
|
||
|
// Measure BOM removal time
|
||
|
const removeStart = performance.now();
|
||
|
for (let i = 0; i < 1000; i++) {
|
||
|
const cleaned = removeBOM(withBOM);
|
||
|
}
|
||
|
const removeTime = performance.now() - removeStart;
|
||
|
|
||
|
console.log(`File size ${size} bytes:`);
|
||
|
console.log(` BOM detection: ${(detectTime/1000).toFixed(3)}ms per operation`);
|
||
|
console.log(` BOM removal: ${(removeTime/1000).toFixed(3)}ms per operation`);
|
||
|
|
||
|
performanceTracker.recordMetric(`bom-perf-${size}`, detectTime + removeTime);
|
||
|
}
|
||
|
|
||
|
performanceTracker.endOperation('bom-performance');
|
||
|
});
|
||
|
|
||
|
// Helper functions
|
||
|
function removeBOM(buffer: Buffer): Buffer {
|
||
|
if (buffer.length >= 3 &&
|
||
|
buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
|
||
|
return buffer.slice(3);
|
||
|
}
|
||
|
if (buffer.length >= 2) {
|
||
|
if ((buffer[0] === 0xFF && buffer[1] === 0xFE) ||
|
||
|
(buffer[0] === 0xFE && buffer[1] === 0xFF)) {
|
||
|
return buffer.slice(2);
|
||
|
}
|
||
|
}
|
||
|
if (buffer.length >= 4) {
|
||
|
if ((buffer[0] === 0xFF && buffer[1] === 0xFE &&
|
||
|
buffer[2] === 0x00 && buffer[3] === 0x00) ||
|
||
|
(buffer[0] === 0x00 && buffer[1] === 0x00 &&
|
||
|
buffer[2] === 0xFE && buffer[3] === 0xFF)) {
|
||
|
return buffer.slice(4);
|
||
|
}
|
||
|
}
|
||
|
return buffer;
|
||
|
}
|
||
|
|
||
|
function findBOMOccurrences(buffer: Buffer): number[] {
|
||
|
const positions: number[] = [];
|
||
|
|
||
|
for (let i = 0; i < buffer.length - 2; i++) {
|
||
|
if (buffer[i] === 0xEF && buffer[i+1] === 0xBB && buffer[i+2] === 0xBF) {
|
||
|
positions.push(i);
|
||
|
i += 2; // Skip past this BOM
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return positions;
|
||
|
}
|
||
|
|
||
|
function detectBOMType(bom: Buffer): string {
|
||
|
if (bom.length >= 3 && bom[0] === 0xEF && bom[1] === 0xBB && bom[2] === 0xBF) {
|
||
|
return 'UTF-8';
|
||
|
}
|
||
|
if (bom.length >= 2) {
|
||
|
if (bom[0] === 0xFF && bom[1] === 0xFE) {
|
||
|
if (bom.length >= 4 && bom[2] === 0x00 && bom[3] === 0x00) {
|
||
|
return 'UTF-32LE';
|
||
|
}
|
||
|
return 'UTF-16LE';
|
||
|
}
|
||
|
if (bom[0] === 0xFE && bom[1] === 0xFF) {
|
||
|
return 'UTF-16BE';
|
||
|
}
|
||
|
}
|
||
|
if (bom.length >= 4 && bom[0] === 0x00 && bom[1] === 0x00 &&
|
||
|
bom[2] === 0xFE && bom[3] === 0xFF) {
|
||
|
return 'UTF-32BE';
|
||
|
}
|
||
|
return 'Unknown';
|
||
|
}
|
||
|
|
||
|
// Performance summary
|
||
|
console.log('\n' + performanceTracker.getSummary());
|
||
|
|
||
|
// BOM handling best practices
|
||
|
console.log('\nBOM Handling Best Practices:');
|
||
|
console.log('1. Always check for BOM before parsing XML');
|
||
|
console.log('2. Remove BOM after detection to avoid parsing issues');
|
||
|
console.log('3. Preserve BOM information for round-trip operations if needed');
|
||
|
console.log('4. Handle conflicts between BOM and encoding declarations');
|
||
|
console.log('5. Be aware of security implications of multiple/hidden BOMs');
|
||
|
console.log('6. Test with files both with and without BOM');
|
||
|
console.log('7. Consider BOM handling in performance-critical paths');
|
||
|
console.log('8. Support all common BOM types (UTF-8, UTF-16, UTF-32)');
|
||
|
});
|
||
|
|
||
|
tap.start();
|