2025-05-25 19:45:37 +00:00
|
|
|
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
|
|
|
import * as einvoice from '../../../ts/index.js';
|
|
|
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
tap.test('PARSE-03: Encoding declaration detection', async () => {
|
|
|
|
|
const encodingTests = [
|
|
|
|
|
{
|
|
|
|
|
name: 'UTF-8 declaration',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-8"?>\n<invoice><id>TEST-001</id></invoice>',
|
|
|
|
|
expectedEncoding: 'UTF-8',
|
|
|
|
|
actualEncoding: 'UTF-8'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'UTF-16 declaration',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-16"?>\n<invoice><id>TEST-002</id></invoice>',
|
|
|
|
|
expectedEncoding: 'UTF-16',
|
|
|
|
|
actualEncoding: 'UTF-8' // Mismatch test
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'ISO-8859-1 declaration',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="ISO-8859-1"?>\n<invoice><supplier>Müller</supplier></invoice>',
|
|
|
|
|
expectedEncoding: 'ISO-8859-1',
|
|
|
|
|
actualEncoding: 'ISO-8859-1'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Windows-1252 declaration',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="Windows-1252"?>\n<invoice><note>Special – chars</note></invoice>',
|
|
|
|
|
expectedEncoding: 'Windows-1252',
|
|
|
|
|
actualEncoding: 'Windows-1252'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Case variations',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="utf-8"?>\n<invoice><id>TEST-003</id></invoice>',
|
|
|
|
|
expectedEncoding: 'UTF-8',
|
|
|
|
|
actualEncoding: 'UTF-8'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'No encoding declaration',
|
|
|
|
|
xml: '<?xml version="1.0"?>\n<invoice><id>TEST-004</id></invoice>',
|
|
|
|
|
expectedEncoding: 'UTF-8', // Default
|
|
|
|
|
actualEncoding: 'UTF-8'
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
];
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
for (const test of encodingTests) {
|
|
|
|
|
const { result, metric } = await PerformanceTracker.track(
|
|
|
|
|
'encoding-detection',
|
|
|
|
|
async () => {
|
|
|
|
|
// Extract declared encoding
|
|
|
|
|
const encodingMatch = test.xml.match(/encoding=["']([^"']+)["']/i);
|
|
|
|
|
const declaredEncoding = encodingMatch ? encodingMatch[1].toUpperCase() : 'UTF-8';
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
declaredEncoding,
|
|
|
|
|
matches: declaredEncoding.replace(/-/g, '').toUpperCase() ===
|
|
|
|
|
test.expectedEncoding.replace(/-/g, '').toUpperCase()
|
|
|
|
|
};
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
console.log(`${test.name}:`);
|
|
|
|
|
console.log(` Declared: ${result.declaredEncoding}`);
|
|
|
|
|
console.log(` Expected: ${test.expectedEncoding}`);
|
|
|
|
|
console.log(` ${result.matches ? '✓' : '✗'} Declaration ${result.matches ? 'matches' : 'mismatch'}`);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
tap.test('PARSE-03: BOM (Byte Order Mark) detection', async () => {
|
|
|
|
|
const bomTests = [
|
|
|
|
|
{
|
|
|
|
|
name: 'UTF-8 with BOM',
|
|
|
|
|
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
|
|
|
|
|
encoding: 'UTF-8',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-8"?><invoice><id>TEST-005</id></invoice>'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'UTF-16 LE BOM',
|
|
|
|
|
bom: Buffer.from([0xFF, 0xFE]),
|
|
|
|
|
encoding: 'UTF-16LE',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-16"?><invoice><id>TEST-006</id></invoice>'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'UTF-16 BE BOM',
|
|
|
|
|
bom: Buffer.from([0xFE, 0xFF]),
|
|
|
|
|
encoding: 'UTF-16BE',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-16"?><invoice><id>TEST-007</id></invoice>'
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
];
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
for (const test of bomTests) {
|
|
|
|
|
const xmlWithBom = Buffer.concat([test.bom, Buffer.from(test.xml)]);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
const { result } = await PerformanceTracker.track(
|
|
|
|
|
'bom-detection',
|
|
|
|
|
async () => {
|
|
|
|
|
const invoice = new einvoice.EInvoice();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
try {
|
|
|
|
|
// Try parsing with BOM
|
|
|
|
|
await invoice.fromXmlString(xmlWithBom.toString('utf8'));
|
|
|
|
|
return { success: true, parsed: true };
|
|
|
|
|
} catch (error) {
|
|
|
|
|
return {
|
|
|
|
|
success: false,
|
|
|
|
|
error: error.message,
|
|
|
|
|
// Check if it's an encoding issue
|
|
|
|
|
encodingError: error.message.toLowerCase().includes('encoding') ||
|
|
|
|
|
error.message.toLowerCase().includes('utf')
|
|
|
|
|
};
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
console.log(`${test.name}: ${result.parsed ? '✓' : '✗'}`);
|
|
|
|
|
if (!result.parsed) {
|
|
|
|
|
console.log(` Error: ${result.error}`);
|
|
|
|
|
if (result.encodingError) {
|
|
|
|
|
console.log(` Likely encoding issue detected`);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
tap.test('PARSE-03: Special character handling', async () => {
|
|
|
|
|
const charTests = [
|
|
|
|
|
{
|
|
|
|
|
name: 'German umlauts',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>UMLAUT-TEST</cbc:ID>
|
|
|
|
|
<cbc:Note>Müller, Schäfer, Köln, Größe</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
chars: 'üäöß',
|
|
|
|
|
expectedChars: 'Müller, Schäfer, Köln, Größe'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'French accents',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>ACCENT-TEST</cbc:ID>
|
|
|
|
|
<cbc:Note>Café, naïve, façade, à côté</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
chars: 'éèêëàçï',
|
|
|
|
|
expectedChars: 'Café, naïve, façade, à côté'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Currency symbols',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>CURRENCY-TEST</cbc:ID>
|
|
|
|
|
<cbc:Note>€ 100, £ 50, ¥ 1000, $ 75</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
chars: '€£¥$',
|
|
|
|
|
expectedChars: '€ 100, £ 50, ¥ 1000, $ 75'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Emoji and Unicode',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>UNICODE-TEST</cbc:ID>
|
|
|
|
|
<cbc:Note>Invoice 📄 Payment 💰 Delivered 📦</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
chars: '📄💰📦',
|
|
|
|
|
expectedChars: 'Invoice 📄 Payment 💰 Delivered 📦'
|
|
|
|
|
}
|
|
|
|
|
];
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
for (const test of charTests) {
|
|
|
|
|
const { result } = await PerformanceTracker.track(
|
|
|
|
|
'special-chars',
|
|
|
|
|
async () => {
|
2025-05-25 19:45:37 +00:00
|
|
|
|
const invoice = new einvoice.EInvoice();
|
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
try {
|
|
|
|
|
await invoice.fromXmlString(test.xml);
|
|
|
|
|
return {
|
|
|
|
|
success: true,
|
|
|
|
|
notes: invoice.notes,
|
|
|
|
|
preserved: invoice.notes && invoice.notes[0] === test.expectedChars
|
|
|
|
|
};
|
|
|
|
|
} catch (error) {
|
|
|
|
|
return { success: false, error: error.message };
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
console.log(`${test.name}: ${result.success ? '✓' : '✗'}`);
|
|
|
|
|
if (result.success && result.notes) {
|
|
|
|
|
console.log(` Characters ${result.preserved ? 'preserved' : 'not preserved'}`);
|
|
|
|
|
if (result.notes[0]) {
|
|
|
|
|
console.log(` Content: ${result.notes[0]}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
tap.test('PARSE-03: XML entities and escaping', async () => {
|
|
|
|
|
const entityTests = [
|
|
|
|
|
{
|
|
|
|
|
name: 'Basic XML entities',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>ENTITY-TEST-1</cbc:ID>
|
|
|
|
|
<cbc:Note>Less than < Greater than > Ampersand & Quote " Apostrophe '</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
expected: 'Less than < Greater than > Ampersand & Quote " Apostrophe \''
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Numeric entities',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>ENTITY-TEST-2</cbc:ID>
|
|
|
|
|
<cbc:Note>Euro € Copyright © Registered ®</cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
expected: 'Euro € Copyright © Registered ®'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'CDATA sections',
|
|
|
|
|
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2" xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
|
|
|
|
<cbc:ID>CDATA-TEST</cbc:ID>
|
|
|
|
|
<cbc:Note><![CDATA[HTML content: <p>Price > 100 & quantity < 50</p>]]></cbc:Note>
|
|
|
|
|
</ubl:Invoice>`,
|
|
|
|
|
expected: 'HTML content: <p>Price > 100 & quantity < 50</p>'
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
];
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
for (const test of entityTests) {
|
|
|
|
|
const { result } = await PerformanceTracker.track(
|
|
|
|
|
'entity-handling',
|
|
|
|
|
async () => {
|
|
|
|
|
const invoice = new einvoice.EInvoice();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
|
|
|
|
try {
|
2025-05-28 08:40:26 +00:00
|
|
|
|
await invoice.fromXmlString(test.xml);
|
|
|
|
|
return {
|
|
|
|
|
success: true,
|
|
|
|
|
notes: invoice.notes,
|
|
|
|
|
correct: invoice.notes && invoice.notes[0] === test.expected
|
|
|
|
|
};
|
2025-05-25 19:45:37 +00:00
|
|
|
|
} catch (error) {
|
2025-05-28 08:40:26 +00:00
|
|
|
|
return { success: false, error: error.message };
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
console.log(`${test.name}: ${result.success && result.correct ? '✓' : '✗'}`);
|
|
|
|
|
if (result.success && result.notes) {
|
|
|
|
|
console.log(` Expected: ${test.expected}`);
|
|
|
|
|
console.log(` Got: ${result.notes[0] || '(empty)'}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
tap.test('PARSE-03: Mixed encoding scenarios', async () => {
|
|
|
|
|
// Test real-world scenarios where encoding might be problematic
|
|
|
|
|
const scenarios = [
|
|
|
|
|
{
|
|
|
|
|
name: 'Mislabeled encoding',
|
|
|
|
|
// Says UTF-8 but contains ISO-8859-1 characters
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-8"?><invoice><supplier>Müller GmbH</supplier></invoice>',
|
|
|
|
|
issue: 'Declared UTF-8 but might have ISO-8859-1 content'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Double-encoded UTF-8',
|
|
|
|
|
// UTF-8 encoded twice
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-8"?><invoice><note>Müller</note></invoice>',
|
|
|
|
|
issue: 'Possible double UTF-8 encoding'
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: 'Mixed line endings with special chars',
|
|
|
|
|
xml: '<?xml version="1.0" encoding="UTF-8"?>\r\n<invoice>\n<note>Special–chars</note>\r</invoice>',
|
|
|
|
|
issue: 'Mixed CRLF/LF with special characters'
|
|
|
|
|
}
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const scenario of scenarios) {
|
|
|
|
|
const { result } = await PerformanceTracker.track(
|
|
|
|
|
'mixed-encoding',
|
|
|
|
|
async () => {
|
|
|
|
|
const invoice = new einvoice.EInvoice();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
try {
|
|
|
|
|
await invoice.fromXmlString(scenario.xml);
|
|
|
|
|
return { success: true, handled: true };
|
|
|
|
|
} catch (error) {
|
|
|
|
|
return {
|
|
|
|
|
success: false,
|
|
|
|
|
error: error.message,
|
|
|
|
|
isEncodingError: error.message.includes('encoding') ||
|
|
|
|
|
error.message.includes('character')
|
|
|
|
|
};
|
2025-05-25 19:45:37 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-28 08:40:26 +00:00
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
console.log(`${scenario.name}: ${result.handled || !result.isEncodingError ? '✓' : '✗'}`);
|
|
|
|
|
console.log(` Issue: ${scenario.issue}`);
|
|
|
|
|
if (!result.success) {
|
|
|
|
|
console.log(` Result: ${result.isEncodingError ? 'Encoding error' : 'Other error'}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
tap.test('PARSE-03: Encoding performance', async () => {
|
|
|
|
|
const stats = PerformanceTracker.getStats('encoding-detection');
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
if (stats) {
|
|
|
|
|
console.log('\nEncoding Detection Performance:');
|
|
|
|
|
console.log(` Total operations: ${stats.count}`);
|
|
|
|
|
console.log(` Average time: ${stats.avg.toFixed(2)}ms`);
|
|
|
|
|
console.log(` Max time: ${stats.max.toFixed(2)}ms`);
|
|
|
|
|
|
|
|
|
|
// Encoding detection should be fast
|
|
|
|
|
expect(stats.avg).toBeLessThan(5); // Should detect encoding in < 5ms on average
|
|
|
|
|
}
|
2025-05-25 19:45:37 +00:00
|
|
|
|
});
|
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
|
// Run the tests
|
2025-05-25 19:45:37 +00:00
|
|
|
|
tap.start();
|