374 lines
14 KiB
TypeScript
374 lines
14 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||
import * as einvoice from '../../../ts/index.js';
|
||
import * as plugins from '../../plugins.js';
|
||
|
||
tap.test('PARSE-08: XPath evaluation for e-invoice data extraction', async () => {
|
||
console.log('Testing XPath-like data extraction from e-invoices...\n');
|
||
|
||
// Test extracting specific fields from different invoice formats
|
||
const invoiceExtractionTests = [
|
||
{
|
||
name: 'UBL Invoice field extraction',
|
||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID>UBL-XPATH-001</cbc:ID>
|
||
<cbc:IssueDate>2024-01-15</cbc:IssueDate>
|
||
<cbc:DueDate>2024-02-15</cbc:DueDate>
|
||
<cac:AccountingSupplierParty>
|
||
<cac:Party>
|
||
<cac:PartyName>
|
||
<cbc:Name>XPath Test Supplier</cbc:Name>
|
||
</cac:PartyName>
|
||
<cac:PostalAddress>
|
||
<cbc:StreetName>123 Test Street</cbc:StreetName>
|
||
<cbc:CityName>Berlin</cbc:CityName>
|
||
<cbc:PostalZone>10115</cbc:PostalZone>
|
||
<cac:Country>
|
||
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
|
||
</cac:Country>
|
||
</cac:PostalAddress>
|
||
</cac:Party>
|
||
</cac:AccountingSupplierParty>
|
||
<cac:AccountingCustomerParty>
|
||
<cac:Party>
|
||
<cac:PartyName>
|
||
<cbc:Name>XPath Test Customer</cbc:Name>
|
||
</cac:PartyName>
|
||
</cac:Party>
|
||
</cac:AccountingCustomerParty>
|
||
<cac:InvoiceLine>
|
||
<cbc:ID>1</cbc:ID>
|
||
<cbc:InvoicedQuantity unitCode="EA">10</cbc:InvoicedQuantity>
|
||
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
|
||
<cac:Item>
|
||
<cbc:Name>Test Product A</cbc:Name>
|
||
<cbc:Description>Detailed description of product A</cbc:Description>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>
|
||
<cac:InvoiceLine>
|
||
<cbc:ID>2</cbc:ID>
|
||
<cbc:InvoicedQuantity unitCode="KG">5.5</cbc:InvoicedQuantity>
|
||
<cbc:LineExtensionAmount currencyID="EUR">55.00</cbc:LineExtensionAmount>
|
||
<cac:Item>
|
||
<cbc:Name>Test Product B</cbc:Name>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>
|
||
<cac:LegalMonetaryTotal>
|
||
<cbc:TaxInclusiveAmount currencyID="EUR">184.45</cbc:TaxInclusiveAmount>
|
||
</cac:LegalMonetaryTotal>
|
||
</ubl:Invoice>`,
|
||
expectedData: {
|
||
id: 'UBL-XPATH-001',
|
||
issueDate: '2024-01-15',
|
||
dueDate: '2024-02-15',
|
||
supplierName: 'XPath Test Supplier',
|
||
customerName: 'XPath Test Customer',
|
||
lineItemCount: 2,
|
||
totalAmount: 184.45
|
||
}
|
||
},
|
||
{
|
||
name: 'CII Invoice field extraction',
|
||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
<rsm:CrossIndustryInvoice
|
||
xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
|
||
xmlns:qdt="urn:un:unece:uncefact:data:standard:QualifiedDataType:100"
|
||
xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100"
|
||
xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100">
|
||
<rsm:ExchangedDocument>
|
||
<ram:ID>CII-XPATH-001</ram:ID>
|
||
<ram:TypeCode>380</ram:TypeCode>
|
||
<ram:IssueDateTime>
|
||
<udt:DateTimeString format="102">20240115</udt:DateTimeString>
|
||
</ram:IssueDateTime>
|
||
</rsm:ExchangedDocument>
|
||
<rsm:SupplyChainTradeTransaction>
|
||
<ram:ApplicableHeaderTradeAgreement>
|
||
<ram:SellerTradeParty>
|
||
<ram:Name>CII XPath Supplier</ram:Name>
|
||
</ram:SellerTradeParty>
|
||
<ram:BuyerTradeParty>
|
||
<ram:Name>CII XPath Customer</ram:Name>
|
||
</ram:BuyerTradeParty>
|
||
</ram:ApplicableHeaderTradeAgreement>
|
||
</rsm:SupplyChainTradeTransaction>
|
||
</rsm:CrossIndustryInvoice>`,
|
||
expectedData: {
|
||
id: 'CII-XPATH-001',
|
||
supplierName: 'CII XPath Supplier',
|
||
customerName: 'CII XPath Customer'
|
||
}
|
||
}
|
||
];
|
||
|
||
for (const test of invoiceExtractionTests) {
|
||
console.log(`\n${test.name}:`);
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
await invoice.fromXmlString(test.xml);
|
||
|
||
console.log(' ✓ Invoice parsed successfully');
|
||
|
||
// Extract and verify data
|
||
const extractedData: any = {
|
||
id: invoice.id,
|
||
issueDate: invoice.issueDate instanceof Date ?
|
||
invoice.issueDate.toISOString().split('T')[0] :
|
||
invoice.issueDate,
|
||
supplierName: invoice.from?.name,
|
||
customerName: invoice.to?.name,
|
||
lineItemCount: invoice.items?.length || 0
|
||
};
|
||
|
||
if (invoice.dueDate) {
|
||
extractedData.dueDate = invoice.dueDate instanceof Date ?
|
||
invoice.dueDate.toISOString().split('T')[0] :
|
||
invoice.dueDate;
|
||
}
|
||
|
||
if (invoice.totalGross) {
|
||
extractedData.totalAmount = invoice.totalGross;
|
||
}
|
||
|
||
console.log(' Extracted data:');
|
||
Object.entries(extractedData).forEach(([key, value]) => {
|
||
if (value !== undefined) {
|
||
console.log(` ${key}: ${value}`);
|
||
}
|
||
});
|
||
|
||
// Verify expected data
|
||
if (test.expectedData) {
|
||
Object.entries(test.expectedData).forEach(([key, expectedValue]) => {
|
||
if (extractedData[key] !== undefined) {
|
||
expect(extractedData[key]).toEqual(expectedValue);
|
||
}
|
||
});
|
||
}
|
||
|
||
} catch (error) {
|
||
console.log(` ✗ Error: ${error.message}`);
|
||
}
|
||
}
|
||
});
|
||
|
||
tap.test('PARSE-08: Complex data extraction scenarios', async () => {
|
||
console.log('\nTesting complex data extraction scenarios...\n');
|
||
|
||
// Test extracting nested and repeated data
|
||
const complexInvoice = `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID>COMPLEX-001</cbc:ID>
|
||
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
||
<cbc:Note>First note</cbc:Note>
|
||
<cbc:Note>Second note</cbc:Note>
|
||
<cbc:Note>Third note with special chars: €, ñ, 中文</cbc:Note>
|
||
<cac:AccountingSupplierParty>
|
||
<cac:Party>
|
||
<cac:PartyIdentification>
|
||
<cbc:ID schemeID="GLN">1234567890123</cbc:ID>
|
||
</cac:PartyIdentification>
|
||
<cac:PartyIdentification>
|
||
<cbc:ID schemeID="DUNS">123456789</cbc:ID>
|
||
</cac:PartyIdentification>
|
||
<cac:PartyName>
|
||
<cbc:Name>Complex Supplier Corp</cbc:Name>
|
||
</cac:PartyName>
|
||
<cac:Contact>
|
||
<cbc:Name>John Doe</cbc:Name>
|
||
<cbc:Telephone>+49 30 12345678</cbc:Telephone>
|
||
<cbc:ElectronicMail>john.doe@supplier.com</cbc:ElectronicMail>
|
||
</cac:Contact>
|
||
</cac:Party>
|
||
</cac:AccountingSupplierParty>
|
||
${Array.from({length: 5}, (_, i) => `
|
||
<cac:InvoiceLine>
|
||
<cbc:ID>${i + 1}</cbc:ID>
|
||
<cbc:InvoicedQuantity unitCode="${i % 2 === 0 ? 'EA' : 'KG'}">${(i + 1) * 2}</cbc:InvoicedQuantity>
|
||
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 50).toFixed(2)}</cbc:LineExtensionAmount>
|
||
<cac:AllowanceCharge>
|
||
<cbc:ChargeIndicator>false</cbc:ChargeIndicator>
|
||
<cbc:Amount currencyID="EUR">${(i * 5).toFixed(2)}</cbc:Amount>
|
||
<cbc:AllowanceChargeReason>Discount ${i + 1}</cbc:AllowanceChargeReason>
|
||
</cac:AllowanceCharge>
|
||
<cac:Item>
|
||
<cbc:Name>Product ${String.fromCharCode(65 + i)}</cbc:Name>
|
||
<cac:CommodityClassification>
|
||
<cbc:ItemClassificationCode listID="CPV">12345678-${i}</cbc:ItemClassificationCode>
|
||
</cac:CommodityClassification>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>`).join('')}
|
||
</ubl:Invoice>`;
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
await invoice.fromXmlString(complexInvoice);
|
||
|
||
console.log('Complex invoice extraction results:');
|
||
console.log(` Invoice ID: ${invoice.id}`);
|
||
console.log(` Notes count: ${invoice.notes?.length || 0}`);
|
||
|
||
if (invoice.notes && invoice.notes.length > 0) {
|
||
console.log(' Notes:');
|
||
invoice.notes.forEach((note, index) => {
|
||
console.log(` ${index + 1}: ${note}`);
|
||
});
|
||
}
|
||
|
||
console.log(` Supplier identifiers: ${invoice.from?.identifiers?.length || 0}`);
|
||
console.log(` Line items: ${invoice.items?.length || 0}`);
|
||
|
||
if (invoice.items && invoice.items.length > 0) {
|
||
console.log(' Line item details:');
|
||
invoice.items.forEach((item, index) => {
|
||
console.log(` Item ${index + 1}: ${item.name || 'Unknown'} - Qty: ${item.quantity || 0}`);
|
||
});
|
||
}
|
||
|
||
console.log(' ✓ Complex data extraction successful');
|
||
|
||
} catch (error) {
|
||
console.log(` ✗ Error: ${error.message}`);
|
||
}
|
||
});
|
||
|
||
tap.test('PARSE-08: Performance of data extraction', async () => {
|
||
console.log('\nTesting data extraction performance...\n');
|
||
|
||
// Generate invoice with many fields to extract
|
||
const generateDataRichInvoice = (complexity: string) => {
|
||
const itemCount = complexity === 'simple' ? 5 : complexity === 'medium' ? 50 : 200;
|
||
const noteCount = complexity === 'simple' ? 3 : complexity === 'medium' ? 10 : 30;
|
||
|
||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID>PERF-${complexity.toUpperCase()}</cbc:ID>
|
||
<cbc:IssueDate>2024-01-01</cbc:IssueDate>
|
||
${Array.from({length: noteCount}, (_, i) => `
|
||
<cbc:Note>Note ${i + 1} with some content to extract</cbc:Note>`).join('')}
|
||
<cac:AccountingSupplierParty>
|
||
<cac:Party>
|
||
<cac:PartyName>
|
||
<cbc:Name>Performance Test Supplier</cbc:Name>
|
||
</cac:PartyName>
|
||
</cac:Party>
|
||
</cac:AccountingSupplierParty>
|
||
${Array.from({length: itemCount}, (_, i) => `
|
||
<cac:InvoiceLine>
|
||
<cbc:ID>${i + 1}</cbc:ID>
|
||
<cbc:InvoicedQuantity unitCode="EA">${i + 1}</cbc:InvoicedQuantity>
|
||
<cbc:LineExtensionAmount currencyID="EUR">${((i + 1) * 10).toFixed(2)}</cbc:LineExtensionAmount>
|
||
<cac:Item>
|
||
<cbc:Name>Item ${i + 1}</cbc:Name>
|
||
</cac:Item>
|
||
</cac:InvoiceLine>`).join('')}
|
||
</ubl:Invoice>`;
|
||
};
|
||
|
||
const complexityLevels = ['simple', 'medium', 'complex'];
|
||
|
||
for (const complexity of complexityLevels) {
|
||
const xml = generateDataRichInvoice(complexity);
|
||
const startTime = Date.now();
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
await invoice.fromXmlString(xml);
|
||
|
||
// Extract various data points
|
||
const extractedData = {
|
||
id: invoice.id,
|
||
issueDate: invoice.issueDate,
|
||
supplierName: invoice.from?.name,
|
||
noteCount: invoice.notes?.length || 0,
|
||
itemCount: invoice.items?.length || 0,
|
||
firstItemName: invoice.items?.[0]?.name,
|
||
lastItemName: invoice.items?.[invoice.items.length - 1]?.name
|
||
};
|
||
|
||
const extractTime = Date.now() - startTime;
|
||
|
||
console.log(`${complexity.charAt(0).toUpperCase() + complexity.slice(1)} invoice extraction:`);
|
||
console.log(` Extraction time: ${extractTime}ms`);
|
||
console.log(` Notes extracted: ${extractedData.noteCount}`);
|
||
console.log(` Items extracted: ${extractedData.itemCount}`);
|
||
console.log(` ✓ All data points extracted successfully`);
|
||
|
||
} catch (error) {
|
||
console.log(` ✗ Error: ${error.message}`);
|
||
}
|
||
}
|
||
});
|
||
|
||
tap.test('PARSE-08: Special extraction scenarios', async () => {
|
||
console.log('\nTesting special extraction scenarios...\n');
|
||
|
||
// Test extracting data with special characters and edge cases
|
||
const specialCases = [
|
||
{
|
||
name: 'Invoice with empty fields',
|
||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID></cbc:ID>
|
||
<cbc:Note></cbc:Note>
|
||
<cbc:Note> </cbc:Note>
|
||
</ubl:Invoice>`,
|
||
expectedBehavior: 'Handle empty/whitespace fields gracefully'
|
||
},
|
||
{
|
||
name: 'Invoice with CDATA sections',
|
||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID>CDATA-001</cbc:ID>
|
||
<cbc:Note><![CDATA[This contains <special> characters & symbols]]></cbc:Note>
|
||
</ubl:Invoice>`,
|
||
expectedBehavior: 'Extract CDATA content correctly'
|
||
},
|
||
{
|
||
name: 'Invoice with attributes',
|
||
xml: `<?xml version="1.0" encoding="UTF-8"?>
|
||
<ubl:Invoice xmlns:ubl="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
|
||
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
|
||
<cbc:ID schemeName="Invoice" schemeID="INT">ATTR-001</cbc:ID>
|
||
<cbc:DocumentCurrencyCode listID="ISO4217">EUR</cbc:DocumentCurrencyCode>
|
||
</ubl:Invoice>`,
|
||
expectedBehavior: 'Consider attribute values in extraction'
|
||
}
|
||
];
|
||
|
||
for (const testCase of specialCases) {
|
||
console.log(`${testCase.name}:`);
|
||
console.log(` Expected: ${testCase.expectedBehavior}`);
|
||
|
||
try {
|
||
const invoice = new einvoice.EInvoice();
|
||
await invoice.fromXmlString(testCase.xml);
|
||
|
||
console.log(` ID extracted: ${invoice.id || '(empty)'}`);
|
||
console.log(` Notes: ${invoice.notes?.length || 0} found`);
|
||
|
||
if (invoice.notes && invoice.notes.length > 0) {
|
||
invoice.notes.forEach((note, i) => {
|
||
console.log(` Note ${i + 1}: "${note}"`);
|
||
});
|
||
}
|
||
|
||
console.log(' ✓ Special case handled successfully');
|
||
|
||
} catch (error) {
|
||
console.log(` ℹ Parse result: ${error.message}`);
|
||
}
|
||
}
|
||
});
|
||
|
||
// Run the tests
|
||
tap.start(); |