einvoice/test/suite/einvoice_parsing/test.parse-08.xpath-evaluation.ts

374 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as einvoice from '../../../ts/index.js';
import * as plugins from '../../plugins.js';
tap.test('PARSE-08: XPath evaluation for e-invoice data extraction', async () => {
console.log('Testing XPath-like data extraction from e-invoices...\n');
// Test extracting specific fields from different invoice formats
const invoiceExtractionTests = [
{
name: 'UBL Invoice field extraction',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>UBL-XPATH-001</cbc:ID>
<cbc:IssueDate>2024-01-15</cbc:IssueDate>
<cbc:DueDate>2024-02-15</cbc:DueDate>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>XPath Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:StreetName>123 Test Street</cbc:StreetName>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>XPath Test Customer</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test Product A</cbc:Name>
<cbc:Description>Detailed description of product A</cbc:Description>
</cac:Item>
</cac:InvoiceLine>
<cac:InvoiceLine>
<cbc:ID>2</cbc:ID>
<cbc:InvoicedQuantity unitCode="KG">5.5</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">55.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test Product B</cbc:Name>
</cac:Item>
</cac:InvoiceLine>
<cac:LegalMonetaryTotal>
<cbc:TaxInclusiveAmount currencyID="EUR">184.45</cbc:TaxInclusiveAmount>
</cac:LegalMonetaryTotal>
</ubl:Invoice>`,
expectedData: {
id: 'UBL-XPATH-001',
issueDate: '2024-01-15',
dueDate: '2024-02-15',
supplierName: 'XPath Test Supplier',
customerName: 'XPath Test Customer',
lineItemCount: 2,
totalAmount: 184.45
}
},
{
name: 'CII Invoice field extraction',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<rsm:CrossIndustryInvoice
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
xmlns:qdt="urn:un:unece:uncefact:data:standard:QualifiedDataType:100"
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100"
xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100">
<rsm:ExchangedDocument>
<ram:ID>CII-XPATH-001</ram:ID>
<ram:TypeCode>380</ram:TypeCode>
<ram:IssueDateTime>
<udt:DateTimeString format="102">20240115</udt:DateTimeString>
</ram:IssueDateTime>
</rsm:ExchangedDocument>
<rsm:SupplyChainTradeTransaction>
<ram:ApplicableHeaderTradeAgreement>
<ram:SellerTradeParty>
<ram:Name>CII XPath Supplier</ram:Name>
</ram:SellerTradeParty>
<ram:BuyerTradeParty>
<ram:Name>CII XPath Customer</ram:Name>
</ram:BuyerTradeParty>
</ram:ApplicableHeaderTradeAgreement>
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>`,
expectedData: {
id: 'CII-XPATH-001',
supplierName: 'CII XPath Supplier',
customerName: 'CII XPath Customer'
}
}
];
for (const test of invoiceExtractionTests) {
console.log(`\n${test.name}:`);
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(test.xml);
console.log(' ✓ Invoice parsed successfully');
// Extract and verify data
const extractedData: any = {
id: invoice.id,
issueDate: invoice.issueDate instanceof Date ?
invoice.issueDate.toISOString().split('T')[0] :
invoice.issueDate,
supplierName: invoice.from?.name,
customerName: invoice.to?.name,
lineItemCount: invoice.items?.length || 0
};
if (invoice.dueDate) {
extractedData.dueDate = invoice.dueDate instanceof Date ?
invoice.dueDate.toISOString().split('T')[0] :
invoice.dueDate;
}
if (invoice.totalGross) {
extractedData.totalAmount = invoice.totalGross;
}
console.log(' Extracted data:');
Object.entries(extractedData).forEach(([key, value]) => {
if (value !== undefined) {
console.log(` ${key}: ${value}`);
}
});
// Verify expected data
if (test.expectedData) {
Object.entries(test.expectedData).forEach(([key, expectedValue]) => {
if (extractedData[key] !== undefined) {
expect(extractedData[key]).toEqual(expectedValue);
}
});
}
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
}
});
tap.test('PARSE-08: Complex data extraction scenarios', async () => {
console.log('\nTesting complex data extraction scenarios...\n');
// Test extracting nested and repeated data
const complexInvoice = `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>COMPLEX-001</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
<cbc:Note>First note</cbc:Note>
<cbc:Note>Second note</cbc:Note>
<cbc:Note>Third note with special chars: €, ñ, 中文</cbc:Note>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyIdentification>
<cbc:ID schemeID="GLN">1234567890123</cbc:ID>
</cac:PartyIdentification>
<cac:PartyIdentification>
<cbc:ID schemeID="DUNS">123456789</cbc:ID>
</cac:PartyIdentification>
<cac:PartyName>
<cbc:Name>Complex Supplier Corp</cbc:Name>
</cac:PartyName>
<cac:Contact>
<cbc:Name>John Doe</cbc:Name>
<cbc:Telephone>+49 30 12345678</cbc:Telephone>
<cbc:ElectronicMail>john.doe@supplier.com</cbc:ElectronicMail>
</cac:Contact>
</cac:Party>
</cac:AccountingSupplierParty>
${Array.from({length: 5}, (_, i) => `
<cac:InvoiceLine>
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="${i % 2 === 0 ? 'EA' : 'KG'}">${(i + 1) * 2}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 50).toFixed(2)}</cbc:LineExtensionAmount>
<cac:AllowanceCharge>
<cbc:ChargeIndicator>false</cbc:ChargeIndicator>
<cbc:Amount currencyID="EUR">${(i * 5).toFixed(2)}</cbc:Amount>
<cbc:AllowanceChargeReason>Discount ${i + 1}</cbc:AllowanceChargeReason>
</cac:AllowanceCharge>
<cac:Item>
<cbc:Name>Product ${String.fromCharCode(65 + i)}</cbc:Name>
<cac:CommodityClassification>
<cbc:ItemClassificationCode listID="CPV">12345678-${i}</cbc:ItemClassificationCode>
</cac:CommodityClassification>
</cac:Item>
</cac:InvoiceLine>`).join('')}
</ubl:Invoice>`;
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(complexInvoice);
console.log('Complex invoice extraction results:');
console.log(` Invoice ID: ${invoice.id}`);
console.log(` Notes count: ${invoice.notes?.length || 0}`);
if (invoice.notes && invoice.notes.length > 0) {
console.log(' Notes:');
invoice.notes.forEach((note, index) => {
console.log(` ${index + 1}: ${note}`);
});
}
console.log(` Supplier identifiers: ${invoice.from?.identifiers?.length || 0}`);
console.log(` Line items: ${invoice.items?.length || 0}`);
if (invoice.items && invoice.items.length > 0) {
console.log(' Line item details:');
invoice.items.forEach((item, index) => {
console.log(` Item ${index + 1}: ${item.name || 'Unknown'} - Qty: ${item.quantity || 0}`);
});
}
console.log(' ✓ Complex data extraction successful');
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
});
tap.test('PARSE-08: Performance of data extraction', async () => {
console.log('\nTesting data extraction performance...\n');
// Generate invoice with many fields to extract
const generateDataRichInvoice = (complexity: string) => {
const itemCount = complexity === 'simple' ? 5 : complexity === 'medium' ? 50 : 200;
const noteCount = complexity === 'simple' ? 3 : complexity === 'medium' ? 10 : 30;
return `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>PERF-${complexity.toUpperCase()}</cbc:ID>
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
${Array.from({length: noteCount}, (_, i) => `
<cbc:Note>Note ${i + 1} with some content to extract</cbc:Note>`).join('')}
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Performance Test Supplier</cbc:Name>
</cac:PartyName>
</cac:Party>
</cac:AccountingSupplierParty>
${Array.from({length: itemCount}, (_, i) => `
<cac:InvoiceLine>
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="EA">${i + 1}</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 10).toFixed(2)}</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Item ${i + 1}</cbc:Name>
</cac:Item>
</cac:InvoiceLine>`).join('')}
</ubl:Invoice>`;
};
const complexityLevels = ['simple', 'medium', 'complex'];
for (const complexity of complexityLevels) {
const xml = generateDataRichInvoice(complexity);
const startTime = Date.now();
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(xml);
// Extract various data points
const extractedData = {
id: invoice.id,
issueDate: invoice.issueDate,
supplierName: invoice.from?.name,
noteCount: invoice.notes?.length || 0,
itemCount: invoice.items?.length || 0,
firstItemName: invoice.items?.[0]?.name,
lastItemName: invoice.items?.[invoice.items.length - 1]?.name
};
const extractTime = Date.now() - startTime;
console.log(`${complexity.charAt(0).toUpperCase() + complexity.slice(1)} invoice extraction:`);
console.log(` Extraction time: ${extractTime}ms`);
console.log(` Notes extracted: ${extractedData.noteCount}`);
console.log(` Items extracted: ${extractedData.itemCount}`);
console.log(` ✓ All data points extracted successfully`);
} catch (error) {
console.log(` ✗ Error: ${error.message}`);
}
}
});
tap.test('PARSE-08: Special extraction scenarios', async () => {
console.log('\nTesting special extraction scenarios...\n');
// Test extracting data with special characters and edge cases
const specialCases = [
{
name: 'Invoice with empty fields',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID></cbc:ID>
<cbc:Note></cbc:Note>
<cbc:Note> </cbc:Note>
</ubl:Invoice>`,
expectedBehavior: 'Handle empty/whitespace fields gracefully'
},
{
name: 'Invoice with CDATA sections',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>CDATA-001</cbc:ID>
<cbc:Note><![CDATA[This contains <special> characters & symbols]]></cbc:Note>
</ubl:Invoice>`,
expectedBehavior: 'Extract CDATA content correctly'
},
{
name: 'Invoice with attributes',
xml: `<?xml version="1.0" encoding="UTF-8"?>
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID schemeName="Invoice" schemeID="INT">ATTR-001</cbc:ID>
<cbc:DocumentCurrencyCode listID="ISO4217">EUR</cbc:DocumentCurrencyCode>
</ubl:Invoice>`,
expectedBehavior: 'Consider attribute values in extraction'
}
];
for (const testCase of specialCases) {
console.log(`${testCase.name}:`);
console.log(` Expected: ${testCase.expectedBehavior}`);
try {
const invoice = new einvoice.EInvoice();
await invoice.fromXmlString(testCase.xml);
console.log(` ID extracted: ${invoice.id || '(empty)'}`);
console.log(` Notes: ${invoice.notes?.length || 0} found`);
if (invoice.notes && invoice.notes.length > 0) {
invoice.notes.forEach((note, i) => {
console.log(` Note ${i + 1}: "${note}"`);
});
}
console.log(' ✓ Special case handled successfully');
} catch (error) {
console.log(` Parse result: ${error.message}`);
}
}
});
// Run the tests
tap.start();