feat(invoices): add line_items extraction and normalization for invoice parsing
This commit is contained in:
@@ -1,5 +1,14 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-01-20 - 1.16.0 - feat(invoices)
|
||||||
|
add line_items extraction and normalization for invoice parsing
|
||||||
|
|
||||||
|
- Introduce ILineItem interface and add line_items array to IInvoice.
|
||||||
|
- Add extractLineItems helper to normalize item fields (position, product, description, quantity, unit_price, total_price).
|
||||||
|
- Include line_items in parsed invoice output and sample JSON in test, defaulting to [] when absent.
|
||||||
|
- Update logging to include extracted line item count.
|
||||||
|
- Clarify test instructions to extract items from invoice tables and skip subtotal/total rows.
|
||||||
|
|
||||||
## 2026-01-20 - 1.15.3 - fix(tests(nanonets))
|
## 2026-01-20 - 1.15.3 - fix(tests(nanonets))
|
||||||
allow '/' when normalizing invoice strings in tests
|
allow '/' when normalizing invoice strings in tests
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,15 @@ const smartAi = new SmartAi({
|
|||||||
// DualAgentOrchestrator for structured task execution
|
// DualAgentOrchestrator for structured task execution
|
||||||
let orchestrator: DualAgentOrchestrator;
|
let orchestrator: DualAgentOrchestrator;
|
||||||
|
|
||||||
|
interface ILineItem {
|
||||||
|
position: number;
|
||||||
|
product: string;
|
||||||
|
description: string;
|
||||||
|
quantity: number;
|
||||||
|
unit_price: number;
|
||||||
|
total_price: number;
|
||||||
|
}
|
||||||
|
|
||||||
interface IInvoice {
|
interface IInvoice {
|
||||||
invoice_number: string;
|
invoice_number: string;
|
||||||
invoice_date: string;
|
invoice_date: string;
|
||||||
@@ -50,6 +59,7 @@ interface IInvoice {
|
|||||||
net_amount: number;
|
net_amount: number;
|
||||||
vat_amount: number;
|
vat_amount: number;
|
||||||
total_amount: number;
|
total_amount: number;
|
||||||
|
line_items: ILineItem[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface IImageData {
|
interface IImageData {
|
||||||
@@ -80,6 +90,7 @@ const JSON_EXTRACTION_PROMPT = `Extract key fields from the invoice. Return ONLY
|
|||||||
WHERE TO FIND DATA:
|
WHERE TO FIND DATA:
|
||||||
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . Also be sure to not omit special chars like / - and sp on. They are part of the invoice number.
|
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . Also be sure to not omit special chars like / - and sp on. They are part of the invoice number.
|
||||||
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
||||||
|
- line_items: Look in the TABLE(s) with columns like Pos, Product, Description, Quantity, Unit Price, Price
|
||||||
|
|
||||||
RULES:
|
RULES:
|
||||||
1. Use common sense.
|
1. Use common sense.
|
||||||
@@ -89,9 +100,21 @@ RULES:
|
|||||||
5. net_amount: Total before tax
|
5. net_amount: Total before tax
|
||||||
6. vat_amount: Tax amount
|
6. vat_amount: Tax amount
|
||||||
7. total_amount: Final total with tax
|
7. total_amount: Final total with tax
|
||||||
|
8. line_items: Array of items from the invoice table. Skip subtotal/total rows.
|
||||||
|
|
||||||
JSON only:
|
JSON format:
|
||||||
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}
|
{
|
||||||
|
"invoice_number": "X",
|
||||||
|
"invoice_date": "YYYY-MM-DD",
|
||||||
|
"vendor_name": "X",
|
||||||
|
"currency": "EUR",
|
||||||
|
"net_amount": 0,
|
||||||
|
"vat_amount": 0,
|
||||||
|
"total_amount": 0,
|
||||||
|
"line_items": [
|
||||||
|
{"position": 1, "product": "X", "description": "X", "quantity": 1, "unit_price": 0, "total_price": 0}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
Double check for valid JSON syntax. use the json validate tool.
|
Double check for valid JSON syntax. use the json validate tool.
|
||||||
|
|
||||||
@@ -340,6 +363,21 @@ function extractCurrency(s: string | undefined): string {
|
|||||||
return 'EUR';
|
return 'EUR';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract and normalize line items array
|
||||||
|
*/
|
||||||
|
function extractLineItems(items: unknown): ILineItem[] {
|
||||||
|
if (!Array.isArray(items)) return [];
|
||||||
|
return items.map((item: Record<string, unknown>, index: number) => ({
|
||||||
|
position: typeof item.position === 'number' ? item.position : index + 1,
|
||||||
|
product: String(item.product || '').trim(),
|
||||||
|
description: String(item.description || '').trim(),
|
||||||
|
quantity: parseAmount(item.quantity as string | number) || 1,
|
||||||
|
unit_price: parseAmount(item.unit_price as string | number),
|
||||||
|
total_price: parseAmount(item.total_price as string | number),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to extract valid JSON from a response string
|
* Try to extract valid JSON from a response string
|
||||||
*/
|
*/
|
||||||
@@ -448,6 +486,7 @@ ${JSON_EXTRACTION_PROMPT}`;
|
|||||||
net_amount: parseAmount(jsonData.net_amount as string | number),
|
net_amount: parseAmount(jsonData.net_amount as string | number),
|
||||||
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
||||||
total_amount: parseAmount(jsonData.total_amount as string | number),
|
total_amount: parseAmount(jsonData.total_amount as string | number),
|
||||||
|
line_items: extractLineItems(jsonData.line_items),
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||||
@@ -471,6 +510,7 @@ async function extractInvoice(markdown: string, docName: string): Promise<IInvoi
|
|||||||
net_amount: 0,
|
net_amount: 0,
|
||||||
vat_amount: 0,
|
vat_amount: 0,
|
||||||
total_amount: 0,
|
total_amount: 0,
|
||||||
|
line_items: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
console.log(` [${docName}] Extracted: ${invoice.invoice_number}`);
|
console.log(` [${docName}] Extracted: ${invoice.invoice_number}`);
|
||||||
@@ -703,7 +743,7 @@ for (const tc of testCases) {
|
|||||||
const elapsedMs = Date.now() - startTime;
|
const elapsedMs = Date.now() - startTime;
|
||||||
processingTimes.push(elapsedMs);
|
processingTimes.push(elapsedMs);
|
||||||
|
|
||||||
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency}`);
|
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency} | ${extracted.line_items.length} items`);
|
||||||
|
|
||||||
const result = compareInvoice(extracted, expected);
|
const result = compareInvoice(extracted, expected);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user