einvoice/test/suite/einvoice_parsing/test.parse-04.bom-handling.ts

435 lines
14 KiB
TypeScript
Raw Normal View History

2025-05-28 08:40:26 +00:00
import { tap } from '@git.zone/tstest/tapbundle';
2025-05-25 19:45:37 +00:00
import * as einvoice from '../../../ts/index.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
2025-05-28 08:40:26 +00:00
// Helper function to remove BOM from buffer
const removeBOM = (buffer: Buffer): Buffer => {
// UTF-8 BOM
if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
return buffer.subarray(3);
}
// UTF-16 LE BOM
if (buffer.length >= 2 && buffer[0] === 0xFF && buffer[1] === 0xFE) {
return buffer.subarray(2);
}
// UTF-16 BE BOM
if (buffer.length >= 2 && buffer[0] === 0xFE && buffer[1] === 0xFF) {
return buffer.subarray(2);
}
// UTF-32 LE BOM
if (buffer.length >= 4 && buffer[0] === 0xFF && buffer[1] === 0xFE && buffer[2] === 0x00 && buffer[3] === 0x00) {
return buffer.subarray(4);
}
// UTF-32 BE BOM
if (buffer.length >= 4 && buffer[0] === 0x00 && buffer[1] === 0x00 && buffer[2] === 0xFE && buffer[3] === 0xFF) {
return buffer.subarray(4);
}
return buffer;
};
tap.test('PARSE-04: Standard BOM detection and removal', async () => {
const bomTypes = [
{
name: 'UTF-8 BOM',
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
encoding: 'UTF-8',
description: 'Most common BOM in XML files'
},
{
name: 'UTF-16 LE BOM',
bom: Buffer.from([0xFF, 0xFE]),
encoding: 'UTF-16LE',
description: 'Little-endian UTF-16'
},
{
name: 'UTF-16 BE BOM',
bom: Buffer.from([0xFE, 0xFF]),
encoding: 'UTF-16BE',
description: 'Big-endian UTF-16'
},
{
name: 'UTF-32 LE BOM',
bom: Buffer.from([0xFF, 0xFE, 0x00, 0x00]),
encoding: 'UTF-32LE',
description: 'Little-endian UTF-32'
},
{
name: 'UTF-32 BE BOM',
bom: Buffer.from([0x00, 0x00, 0xFE, 0xFF]),
encoding: 'UTF-32BE',
description: 'Big-endian UTF-32'
}
];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const bomType of bomTypes) {
const { result, metric } = await PerformanceTracker.track(
'bom-processing',
async () => {
// Create XML with BOM
let xmlContent: Buffer;
let encodingSupported = true;
try {
if (bomType.encoding.startsWith('UTF-16')) {
// Node.js doesn't support UTF-16 BE directly
if (bomType.encoding === 'UTF-16BE') {
// Create UTF-8 content instead for testing
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-16"?><invoice><id>TEST-BOM</id></invoice>');
encodingSupported = false;
} else {
const nodeEncoding = bomType.encoding.replace('-', '').toLowerCase();
xmlContent = Buffer.from(
'<?xml version="1.0" encoding="UTF-16"?><invoice><id>TEST-BOM</id></invoice>',
nodeEncoding as BufferEncoding
);
}
} else if (bomType.encoding.startsWith('UTF-32')) {
// UTF-32 not directly supported by Node.js, simulate
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-32"?><invoice><id>TEST-BOM</id></invoice>');
encodingSupported = false;
} else {
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>TEST-BOM</id></invoice>');
}
} catch (e) {
// Fallback to UTF-8 if encoding not supported
xmlContent = Buffer.from('<?xml version="1.0" encoding="UTF-8"?><invoice><id>TEST-BOM</id></invoice>');
encodingSupported = false;
}
const fullContent = Buffer.concat([bomType.bom, xmlContent]);
// Test BOM removal
const withoutBom = removeBOM(fullContent);
const bomRemoved = withoutBom.length === fullContent.length - bomType.bom.length;
return {
bomBytes: Array.from(bomType.bom).map(b => '0x' + b.toString(16).toUpperCase().padStart(2, '0')).join(' '),
totalSize: fullContent.length,
bomRemoved,
encodingSupported
};
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`${bomType.name}:`);
console.log(` BOM: ${result.bomBytes}`);
console.log(` Encoding: ${bomType.encoding}`);
console.log(` Description: ${bomType.description}`);
console.log(` Total size: ${result.totalSize} bytes`);
console.log(` ${result.bomRemoved ? '✓' : '✗'} BOM ${result.bomRemoved ? 'removed successfully' : 'removal failed'}`);
console.log(` Processing time: ${metric.duration.toFixed(2)}ms`);
}
});
tap.test('PARSE-04: BOM in different positions', async () => {
const positionTests = [
{
name: 'BOM at start (correct)',
content: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]),
Buffer.from('<?xml version="1.0"?><invoice><id>TEST-001</id></invoice>')
]),
valid: true
},
{
name: 'BOM after XML declaration',
content: Buffer.concat([
Buffer.from('<?xml version="1.0"?>'),
Buffer.from([0xEF, 0xBB, 0xBF]),
Buffer.from('<invoice><id>TEST-002</id></invoice>')
]),
valid: false
},
{
name: 'No BOM',
content: Buffer.from('<?xml version="1.0"?><invoice><id>TEST-003</id></invoice>'),
valid: true
},
{
name: 'Multiple BOMs',
content: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]),
Buffer.from([0xEF, 0xBB, 0xBF]),
Buffer.from('<?xml version="1.0"?><invoice><id>TEST-004</id></invoice>')
]),
valid: false
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const test of positionTests) {
const { result } = await PerformanceTracker.track(
'bom-position',
async () => {
2025-05-25 19:45:37 +00:00
const invoice = new einvoice.EInvoice();
2025-05-28 08:40:26 +00:00
try {
await invoice.fromXmlString(test.content.toString('utf8'));
return { parsed: true, error: null };
} catch (error) {
return { parsed: false, error: error.message };
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`${test.name}: ${result.parsed ? '✓' : '✗'}`);
console.log(` Expected ${test.valid ? 'valid' : 'invalid'}, got ${result.parsed ? 'parsed' : 'error'}`);
if (!result.parsed) {
console.log(` Error: ${result.error}`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
}
});
tap.test('PARSE-04: Real invoice files with BOM', async () => {
// Test with actual invoice formats that might have BOM
const realWorldTests = [
{
name: 'UBL with UTF-8 BOM',
xml: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from(`<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
<cbc:ID>BOM-UBL-001</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test Product</cbc:Name>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>
</ubl:Invoice>`)
])
},
{
name: 'ZUGFeRD with UTF-8 BOM',
xml: Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from(`<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
<rsm:ExchangedDocument>
<ram:ID>BOM-ZUGFERD-001</ram:ID>
</rsm:ExchangedDocument>
</rsm:CrossIndustryInvoice>`)
])
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const test of realWorldTests) {
const { result } = await PerformanceTracker.track(
'real-world-bom',
async () => {
const invoice = new einvoice.EInvoice();
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
try {
await invoice.fromXmlString(test.xml.toString('utf8'));
return {
success: true,
id: invoice.id,
format: invoice.getFormat()
};
} catch (error) {
return {
success: false,
error: error.message
};
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`${test.name}: ${result.success ? '✓' : '✗'}`);
if (result.success) {
console.log(` Invoice ID: ${result.id}`);
console.log(` Format: ${einvoice.InvoiceFormat[result.format]}`);
} else {
console.log(` Error: ${result.error}`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
}
});
tap.test('PARSE-04: BOM encoding conflicts', async () => {
const conflictTests = [
{
name: 'UTF-16 BOM with UTF-8 declaration',
bom: Buffer.from([0xFF, 0xFE]), // UTF-16 LE BOM
xml: '<?xml version="1.0" encoding="UTF-8"?><invoice><id>CONFLICT-001</id></invoice>',
issue: 'BOM indicates UTF-16 but declaration says UTF-8'
},
{
name: 'UTF-8 BOM with ISO-8859-1 declaration',
bom: Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
xml: '<?xml version="1.0" encoding="ISO-8859-1"?><invoice><id>CONFLICT-002</id></invoice>',
issue: 'BOM indicates UTF-8 but declaration says ISO-8859-1'
}
];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const test of conflictTests) {
const content = Buffer.concat([test.bom, Buffer.from(test.xml)]);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
const { result } = await PerformanceTracker.track(
'bom-conflict',
async () => {
const invoice = new einvoice.EInvoice();
try {
await invoice.fromXmlString(content.toString('utf8'));
return { parsed: true };
} catch (error) {
return {
parsed: false,
error: error.message,
isEncodingError: error.message.toLowerCase().includes('encoding') ||
error.message.toLowerCase().includes('bom')
};
}
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`${test.name}: ${!result.parsed ? '✓ (correctly rejected)' : '✗ (should have failed)'}`);
console.log(` Issue: ${test.issue}`);
if (!result.parsed) {
console.log(` ${result.isEncodingError ? 'Encoding error detected' : 'Other error'}`);
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
});
tap.test('PARSE-04: Performance with BOM', async () => {
const sizes = [1, 10, 100];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const size of sizes) {
// Generate invoice with many line items
const lines = [];
for (let i = 1; i <= size; i++) {
lines.push(`
<cac:InvoiceLine>
<cbc:ID>${i}</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${i * 10}.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Product ${i}</cbc:Name>
</cac:Item>
</cac:InvoiceLine>`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
const xmlWithBom = Buffer.concat([
Buffer.from([0xEF, 0xBB, 0xBF]), // UTF-8 BOM
Buffer.from(`<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
<cbc:ID>PERF-BOM-${size}</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Performance Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Performance Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
${lines.join('')}
</ubl:Invoice>`)
]);
const { result, metric } = await PerformanceTracker.track(
`bom-performance-${size}`,
async () => {
const invoice = new einvoice.EInvoice();
try {
await invoice.fromXmlString(xmlWithBom.toString('utf8'));
return {
success: true,
itemCount: invoice.items?.length || 0
};
} catch (error) {
return {
success: false,
error: error.message
};
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
);
const xmlSize = xmlWithBom.length / 1024; // KB
console.log(`Parse ${size} items with BOM (${xmlSize.toFixed(1)}KB): ${result.success ? '✓' : '✗'}`);
if (result.success) {
console.log(` Items parsed: ${result.itemCount}`);
console.log(` Parse time: ${metric.duration.toFixed(2)}ms`);
console.log(` Speed: ${(xmlSize / metric.duration * 1000).toFixed(2)}KB/s`);
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
});
tap.test('PARSE-04: BOM handling summary', async () => {
2025-05-25 19:45:37 +00:00
console.log('\nBOM Handling Best Practices:');
2025-05-28 08:40:26 +00:00
console.log('1. Always check for BOM at the beginning of XML files');
console.log('2. Remove BOM before parsing if present');
console.log('3. Handle conflicts between BOM and encoding declaration');
console.log('4. Support UTF-8, UTF-16, and UTF-32 BOMs');
console.log('5. Validate that BOM matches the actual encoding');
const stats = PerformanceTracker.getStats('bom-processing');
if (stats) {
console.log(`\nBOM Processing Performance:`);
console.log(` Average: ${stats.avg.toFixed(2)}ms`);
console.log(` Max: ${stats.max.toFixed(2)}ms`);
}
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
// Run the tests
2025-05-25 19:45:37 +00:00
tap.start();