import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
tap.test('PARSE-03: Encoding declaration detection', async () => {
const encodingTests = [
{
name: 'UTF-8 declaration',
xml: '\nTEST-001',
expectedEncoding: 'UTF-8',
actualEncoding: 'UTF-8'
},
{
name: 'UTF-16 declaration',
xml: '\nTEST-002',
expectedEncoding: 'UTF-16',
actualEncoding: 'UTF-8' // Mismatch test
},
{
name: 'ISO-8859-1 declaration',
xml: '\nMüller',
expectedEncoding: 'ISO-8859-1',
actualEncoding: 'ISO-8859-1'
},
{
name: 'Windows-1252 declaration',
xml: '\nSpecial – chars',
expectedEncoding: 'Windows-1252',
actualEncoding: 'Windows-1252'
},
{
name: 'Case variations',
xml: '\nTEST-003',
expectedEncoding: 'UTF-8',
actualEncoding: 'UTF-8'
},
{
name: 'No encoding declaration',
xml: '\nTEST-004',
expectedEncoding: 'UTF-8', // Default
actualEncoding: 'UTF-8'
}
];
for (const test of encodingTests) {
const { result, metric } = await PerformanceTracker.track(
'encoding-detection',
async () => {
// Extract declared encoding
const encodingMatch = test.xml.match(/encoding=["']([^"']+)["']/i);
const declaredEncoding = encodingMatch ? encodingMatch[1].toUpperCase() : 'UTF-8';
return {
declaredEncoding,
matches: declaredEncoding.replace(/-/g, '').toUpperCase() ===
test.expectedEncoding.replace(/-/g, '').toUpperCase()
};
}
);
console.log(`${test.name}:`);
console.log(` Declared: ${result.declaredEncoding}`);
console.log(` Expected: ${test.expectedEncoding}`);
console.log(` ${result.matches ? '✓' : '✗'} Declaration ${result.matches ? 'matches' : 'mismatch'}`);
}
});
tap.test('PARSE-03: BOM (Byte Order Mark) detection', async () => {
const bomTests = [
{
name: 'UTF-8 with BOM',
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
encoding: 'UTF-8',
xml: 'TEST-005'
},
{
name: 'UTF-16 LE BOM',
bom: Buffer.from([0xFF, 0xFE]),
encoding: 'UTF-16LE',
xml: 'TEST-006'
},
{
name: 'UTF-16 BE BOM',
bom: Buffer.from([0xFE, 0xFF]),
encoding: 'UTF-16BE',
xml: 'TEST-007'
}
];
for (const test of bomTests) {
const xmlWithBom = Buffer.concat([test.bom, Buffer.from(test.xml)]);
const { result } = await PerformanceTracker.track(
'bom-detection',
async () => {
const invoice = new einvoice.EInvoice();
try {
// Try parsing with BOM
await invoice.fromXmlString(xmlWithBom.toString('utf8'));
return { success: true, parsed: true };
} catch (error) {
return {
success: false,
error: error.message,
// Check if it's an encoding issue
encodingError: error.message.toLowerCase().includes('encoding') ||
error.message.toLowerCase().includes('utf')
};
}
}
);
console.log(`${test.name}: ${result.parsed ? '✓' : '✗'}`);
if (!result.parsed) {
console.log(` Error: ${result.error}`);
if (result.encodingError) {
console.log(` Likely encoding issue detected`);
}
}
}
});
tap.test('PARSE-03: Special character handling', async () => {
const charTests = [
{
name: 'German umlauts',
xml: `
UMLAUT-TEST
Müller, Schäfer, Köln, Größe
`,
chars: 'üäöß',
expectedChars: 'Müller, Schäfer, Köln, Größe'
},
{
name: 'French accents',
xml: `
ACCENT-TEST
Café, naïve, façade, à côté
`,
chars: 'éèêëàçï',
expectedChars: 'Café, naïve, façade, à côté'
},
{
name: 'Currency symbols',
xml: `
CURRENCY-TEST
€ 100, £ 50, ¥ 1000, $ 75
`,
chars: '€£¥$',
expectedChars: '€ 100, £ 50, ¥ 1000, $ 75'
},
{
name: 'Emoji and Unicode',
xml: `
UNICODE-TEST
Invoice 📄 Payment 💰 Delivered 📦
`,
chars: '📄💰📦',
expectedChars: 'Invoice 📄 Payment 💰 Delivered 📦'
}
];
for (const test of charTests) {
const { result } = await PerformanceTracker.track(
'special-chars',
async () => {
const invoice = new einvoice.EInvoice();
try {
await invoice.fromXmlString(test.xml);
return {
success: true,
notes: invoice.notes,
preserved: invoice.notes && invoice.notes[0] === test.expectedChars
};
} catch (error) {
return { success: false, error: error.message };
}
}
);
console.log(`${test.name}: ${result.success ? '✓' : '✗'}`);
if (result.success && result.notes) {
console.log(` Characters ${result.preserved ? 'preserved' : 'not preserved'}`);
if (result.notes[0]) {
console.log(` Content: ${result.notes[0]}`);
}
}
}
});
tap.test('PARSE-03: XML entities and escaping', async () => {
const entityTests = [
{
name: 'Basic XML entities',
xml: `
ENTITY-TEST-1
Less than < Greater than > Ampersand & Quote " Apostrophe '
`,
expected: 'Less than < Greater than > Ampersand & Quote " Apostrophe \''
},
{
name: 'Numeric entities',
xml: `
ENTITY-TEST-2
Euro € Copyright © Registered ®
`,
expected: 'Euro € Copyright © Registered ®'
},
{
name: 'CDATA sections',
xml: `
CDATA-TEST
Price > 100 & quantity < 50
]]>
`,
expected: 'HTML content: Price > 100 & quantity < 50
'
}
];
for (const test of entityTests) {
const { result } = await PerformanceTracker.track(
'entity-handling',
async () => {
const invoice = new einvoice.EInvoice();
try {
await invoice.fromXmlString(test.xml);
return {
success: true,
notes: invoice.notes,
correct: invoice.notes && invoice.notes[0] === test.expected
};
} catch (error) {
return { success: false, error: error.message };
}
}
);
console.log(`${test.name}: ${result.success && result.correct ? '✓' : '✗'}`);
if (result.success && result.notes) {
console.log(` Expected: ${test.expected}`);
console.log(` Got: ${result.notes[0] || '(empty)'}`);
}
}
});
tap.test('PARSE-03: Mixed encoding scenarios', async () => {
// Test real-world scenarios where encoding might be problematic
const scenarios = [
{
name: 'Mislabeled encoding',
// Says UTF-8 but contains ISO-8859-1 characters
xml: 'Müller GmbH',
issue: 'Declared UTF-8 but might have ISO-8859-1 content'
},
{
name: 'Double-encoded UTF-8',
// UTF-8 encoded twice
xml: 'Müller',
issue: 'Possible double UTF-8 encoding'
},
{
name: 'Mixed line endings with special chars',
xml: '\r\n\nSpecial–chars\r',
issue: 'Mixed CRLF/LF with special characters'
}
];
for (const scenario of scenarios) {
const { result } = await PerformanceTracker.track(
'mixed-encoding',
async () => {
const invoice = new einvoice.EInvoice();
try {
await invoice.fromXmlString(scenario.xml);
return { success: true, handled: true };
} catch (error) {
return {
success: false,
error: error.message,
isEncodingError: error.message.includes('encoding') ||
error.message.includes('character')
};
}
}
);
console.log(`${scenario.name}: ${result.handled || !result.isEncodingError ? '✓' : '✗'}`);
console.log(` Issue: ${scenario.issue}`);
if (!result.success) {
console.log(` Result: ${result.isEncodingError ? 'Encoding error' : 'Other error'}`);
}
}
});
tap.test('PARSE-03: Encoding performance', async () => {
const stats = PerformanceTracker.getStats('encoding-detection');
if (stats) {
console.log('\nEncoding Detection Performance:');
console.log(` Total operations: ${stats.count}`);
console.log(` Average time: ${stats.avg.toFixed(2)}ms`);
console.log(` Max time: ${stats.max.toFixed(2)}ms`);
// Encoding detection should be fast
expect(stats.avg).toBeLessThan(5); // Should detect encoding in < 5ms on average
}
});
// Run the tests
tap.start();