einvoice/test/suite/einvoice_parsing/test.parse-08.xpath-evaluation.ts

374 lines
14 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
import * as plugins from '../../plugins.js';
2025-05-28 18:46:18 +00:00
tap.test('PARSE-08: XPath evaluation for e-invoice data extraction', async () => {
console.log('Testing XPath-like data extraction from e-invoices...\n');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Test extracting specific fields from different invoice formats
const invoiceExtractionTests = [
{
name: 'UBL Invoice field extraction',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>UBL-XPATH-001</cbc:ID>
<cbc:IssueDate>2024-01-15</cbc:IssueDate>
<cbc:DueDate>2024-02-15</cbc:DueDate>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>XPath Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>123 Test Street</cbc:StreetName>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>XPath Test Customer</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test Product A</cbc:Name>
<cbc:Description>Detailed description of product A</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
<cac:InvoiceLine>
<cbc:ID>2</cbc:ID>
<cbc:InvoicedQuantity unitCode="KG">5.5</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">55.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test Product B</cbc:Name>
</cac:Item>
</cac:InvoiceLine>
<cac:LegalMonetaryTotal>
<cbc:TaxInclusiveAmount currencyID="EUR">184.45</cbc:TaxInclusiveAmount>
</cac:LegalMonetaryTotal>
</ubl:Invoice>`,
expectedData: {
id: 'UBL-XPATH-001',
issueDate: '2024-01-15',
dueDate: '2024-02-15',
supplierName: 'XPath Test Supplier',
customerName: 'XPath Test Customer',
lineItemCount: 2,
totalAmount: 184.45
}
},
{
name: 'CII Invoice field extraction',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:qdt="urn:un:unece:uncefact:data:standard:QualifiedDataType:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100"
xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100">
<rsm:ExchangedDocument>
<ram:ID>CII-XPATH-001</ram:ID>
<ram:TypeCode>380</ram:TypeCode>
<ram:IssueDateTime>
<udt:DateTimeString format="102">20240115</udt:DateTimeString>
</ram:IssueDateTime>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>CII XPath Supplier</ram:Name>
</ram:SellerTradeParty>
<ram:BuyerTradeParty>
<ram:Name>CII XPath Customer</ram:Name>
</ram:BuyerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`,
expectedData: {
id: 'CII-XPATH-001',
supplierName: 'CII XPath Supplier',
customerName: 'CII XPath Customer'
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
}
];
for (const test of invoiceExtractionTests) {
console.log(`\n${test.name}:`);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(test.xml);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
console.log(' ✓ Invoice parsed successfully');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Extract and verify data
const extractedData: any = {
id: invoice.id,
issueDate: invoice.issueDate instanceof Date ?
invoice.issueDate.toISOString().split('T')[0] :
invoice.issueDate,
supplierName: invoice.from?.name,
customerName: invoice.to?.name,
lineItemCount: invoice.items?.length || 0
};
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
if (invoice.dueDate) {
extractedData.dueDate = invoice.dueDate instanceof Date ?
invoice.dueDate.toISOString().split('T')[0] :
invoice.dueDate;
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
if (invoice.totalGross) {
extractedData.totalAmount = invoice.totalGross;
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
console.log(' Extracted data:');
Object.entries(extractedData).forEach(([key, value]) => {
if (value !== undefined) {
console.log(` ${key}: ${value}`);
}
});
// Verify expected data
if (test.expectedData) {
Object.entries(test.expectedData).forEach(([key, expectedValue]) => {
if (extractedData[key] !== undefined) {
expect(extractedData[key]).toEqual(expectedValue);
}
});
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
}
});
tap.test('PARSE-08: Complex data extraction scenarios', async () => {
console.log('\nTesting complex data extraction scenarios...\n');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Test extracting nested and repeated data
const complexInvoice = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>COMPLEX-001</cbc:ID>
2025-05-25 19:45:37 +00:00
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
2025-05-28 18:46:18 +00:00
<cbc:Note>First note</cbc:Note>
<cbc:Note>Second note</cbc:Note>
<cbc:Note>Third note with special chars: , ñ, </cbc:Note>
2025-05-25 19:45:37 +00:00
<cac:AccountingSupplierParty>
<cac:Party>
2025-05-28 18:46:18 +00:00
<cac:PartyIdentification>
<cbc:ID schemeID="GLN">1234567890123</cbc:ID>
</cac:PartyIdentification>
<cac:PartyIdentification>
<cbc:ID schemeID="DUNS">123456789</cbc:ID>
</cac:PartyIdentification>
<cac:PartyName>
<cbc:Name>Complex Supplier Corp</cbc:Name>
</cac:PartyName>
<cac:Contact>
<cbc:Name>John Doe</cbc:Name>
<cbc:Telephone>+49 30 12345678</cbc:Telephone>
<cbc:ElectronicMail>john.doe@supplier.com</cbc:ElectronicMail>
</cac:Contact>
2025-05-25 19:45:37 +00:00
</cac:Party>
</cac:AccountingSupplierParty>
2025-05-28 18:46:18 +00:00
${Array.from({length: 5}, (_, i) => `
2025-05-25 19:45:37 +00:00
<cac:InvoiceLine>
2025-05-28 18:46:18 +00:00
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="${i % 2 === 0 ? 'EA' : 'KG'}">${(i + 1) * 2}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 50).toFixed(2)}</cbc:LineExtensionAmount>
<cac:AllowanceCharge>
<cbc:ChargeIndicator>false</cbc:ChargeIndicator>
<cbc:Amount currencyID="EUR">${(i * 5).toFixed(2)}</cbc:Amount>
<cbc:AllowanceChargeReason>Discount ${i + 1}</cbc:AllowanceChargeReason>
</cac:AllowanceCharge>
<cac:Item>
<cbc:Name>Product ${String.fromCharCode(65 + i)}</cbc:Name>
<cac:CommodityClassification>
<cbc:ItemClassificationCode listID="CPV">12345678-${i}</cbc:ItemClassificationCode>
</cac:CommodityClassification>
</cac:Item>
</cac:InvoiceLine>`).join('')}
2025-05-25 19:45:37 +00:00
</ubl:Invoice>`;
2025-05-28 18:46:18 +00:00
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(complexInvoice);
console.log('Complex invoice extraction results:');
console.log(` Invoice ID: ${invoice.id}`);
console.log(` Notes count: ${invoice.notes?.length || 0}`);
if (invoice.notes && invoice.notes.length > 0) {
console.log(' Notes:');
invoice.notes.forEach((note, index) => {
console.log(` ${index + 1}: ${note}`);
});
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
console.log(` Supplier identifiers: ${invoice.from?.identifiers?.length || 0}`);
console.log(` Line items: ${invoice.items?.length || 0}`);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
if (invoice.items && invoice.items.length > 0) {
console.log(' Line item details:');
invoice.items.forEach((item, index) => {
console.log(` Item ${index + 1}: ${item.name || 'Unknown'} - Qty: ${item.quantity || 0}`);
});
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
console.log(' ✓ Complex data extraction successful');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
});
tap.test('PARSE-08: Performance of data extraction', async () => {
console.log('\nTesting data extraction performance...\n');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Generate invoice with many fields to extract
const generateDataRichInvoice = (complexity: string) => {
const itemCount = complexity === 'simple' ? 5 : complexity === 'medium' ? 50 : 200;
const noteCount = complexity === 'simple' ? 3 : complexity === 'medium' ? 10 : 30;
return `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>PERF-${complexity.toUpperCase()}</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
${Array.from({length: noteCount}, (_, i) => `
<cbc:Note>Note ${i + 1} with some content to extract</cbc:Note>`).join('')}
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Performance Test Supplier</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingSupplierParty>
${Array.from({length: itemCount}, (_, i) => `
<cac:InvoiceLine>
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">${i + 1}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 10).toFixed(2)}</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Item ${i + 1}</cbc:Name>
</cac:Item>
</cac:InvoiceLine>`).join('')}
</ubl:Invoice>`;
};
const complexityLevels = ['simple', 'medium', 'complex'];
for (const complexity of complexityLevels) {
const xml = generateDataRichInvoice(complexity);
const startTime = Date.now();
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(xml);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Extract various data points
const extractedData = {
id: invoice.id,
issueDate: invoice.issueDate,
supplierName: invoice.from?.name,
noteCount: invoice.notes?.length || 0,
itemCount: invoice.items?.length || 0,
firstItemName: invoice.items?.[0]?.name,
lastItemName: invoice.items?.[invoice.items.length - 1]?.name
};
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
const extractTime = Date.now() - startTime;
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
console.log(`${complexity.charAt(0).toUpperCase() + complexity.slice(1)} invoice extraction:`);
console.log(` Extraction time: ${extractTime}ms`);
console.log(` Notes extracted: ${extractedData.noteCount}`);
console.log(` Items extracted: ${extractedData.itemCount}`);
console.log(` ✓ All data points extracted successfully`);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
}
});
tap.test('PARSE-08: Special extraction scenarios', async () => {
console.log('\nTesting special extraction scenarios...\n');
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
// Test extracting data with special characters and edge cases
const specialCases = [
{
name: 'Invoice with empty fields',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID></cbc:ID>
<cbc:Note></cbc:Note>
<cbc:Note> </cbc:Note>
</ubl:Invoice>`,
expectedBehavior: 'Handle empty/whitespace fields gracefully'
},
{
name: 'Invoice with CDATA sections',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>CDATA-001</cbc:ID>
<cbc:Note><![CDATA[This contains <special> characters & symbols]]></cbc:Note>
</ubl:Invoice>`,
expectedBehavior: 'Extract CDATA content correctly'
},
{
name: 'Invoice with attributes',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID schemeName="Invoice" schemeID="INT">ATTR-001</cbc:ID>
<cbc:DocumentCurrencyCode listID="ISO4217">EUR</cbc:DocumentCurrencyCode>
</ubl:Invoice>`,
expectedBehavior: 'Consider attribute values in extraction'
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
];
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
for (const testCase of specialCases) {
console.log(`${testCase.name}:`);
console.log(` Expected: ${testCase.expectedBehavior}`);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(testCase.xml);
2025-05-25 19:45:37 +00:00
2025-05-28 18:46:18 +00:00
console.log(` ID extracted: ${invoice.id || '(empty)'}`);
console.log(` Notes: ${invoice.notes?.length || 0} found`);
if (invoice.notes && invoice.notes.length > 0) {
invoice.notes.forEach((note, i) => {
console.log(` Note ${i + 1}: "${note}"`);
});
2025-05-25 19:45:37 +00:00
}
2025-05-28 18:46:18 +00:00
console.log(' ✓ Special case handled successfully');
} catch (error) {
console.log(` Parse result: ${error.message}`);
2025-05-25 19:45:37 +00:00
}
}
});
2025-05-28 18:46:18 +00:00
// Run the tests
2025-05-25 19:45:37 +00:00
tap.start();