|
|
|
@@ -42,6 +42,15 @@ const smartAi = new SmartAi({
|
|
|
|
// DualAgentOrchestrator for structured task execution
|
|
|
|
// DualAgentOrchestrator for structured task execution
|
|
|
|
let orchestrator: DualAgentOrchestrator;
|
|
|
|
let orchestrator: DualAgentOrchestrator;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface ILineItem {
|
|
|
|
|
|
|
|
position: number;
|
|
|
|
|
|
|
|
product: string;
|
|
|
|
|
|
|
|
description: string;
|
|
|
|
|
|
|
|
quantity: number;
|
|
|
|
|
|
|
|
unit_price: number;
|
|
|
|
|
|
|
|
total_price: number;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
interface IInvoice {
|
|
|
|
interface IInvoice {
|
|
|
|
invoice_number: string;
|
|
|
|
invoice_number: string;
|
|
|
|
invoice_date: string;
|
|
|
|
invoice_date: string;
|
|
|
|
@@ -50,6 +59,7 @@ interface IInvoice {
|
|
|
|
net_amount: number;
|
|
|
|
net_amount: number;
|
|
|
|
vat_amount: number;
|
|
|
|
vat_amount: number;
|
|
|
|
total_amount: number;
|
|
|
|
total_amount: number;
|
|
|
|
|
|
|
|
line_items: ILineItem[];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
interface IImageData {
|
|
|
|
interface IImageData {
|
|
|
|
@@ -80,6 +90,7 @@ const JSON_EXTRACTION_PROMPT = `Extract key fields from the invoice. Return ONLY
|
|
|
|
WHERE TO FIND DATA:
|
|
|
|
WHERE TO FIND DATA:
|
|
|
|
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . Also be sure to not omit special chars like / - and sp on. They are part of the invoice number.
|
|
|
|
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . Also be sure to not omit special chars like / - and sp on. They are part of the invoice number.
|
|
|
|
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
|
|
|
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
|
|
|
|
|
|
|
- line_items: Look in the TABLE(s) with columns like Pos, Product, Description, Quantity, Unit Price, Price
|
|
|
|
|
|
|
|
|
|
|
|
RULES:
|
|
|
|
RULES:
|
|
|
|
1. Use common sense.
|
|
|
|
1. Use common sense.
|
|
|
|
@@ -89,9 +100,21 @@ RULES:
|
|
|
|
5. net_amount: Total before tax
|
|
|
|
5. net_amount: Total before tax
|
|
|
|
6. vat_amount: Tax amount
|
|
|
|
6. vat_amount: Tax amount
|
|
|
|
7. total_amount: Final total with tax
|
|
|
|
7. total_amount: Final total with tax
|
|
|
|
|
|
|
|
8. line_items: Array of items from the invoice table. Skip subtotal/total rows.
|
|
|
|
|
|
|
|
|
|
|
|
JSON only:
|
|
|
|
JSON format:
|
|
|
|
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}
|
|
|
|
{
|
|
|
|
|
|
|
|
"invoice_number": "X",
|
|
|
|
|
|
|
|
"invoice_date": "YYYY-MM-DD",
|
|
|
|
|
|
|
|
"vendor_name": "X",
|
|
|
|
|
|
|
|
"currency": "EUR",
|
|
|
|
|
|
|
|
"net_amount": 0,
|
|
|
|
|
|
|
|
"vat_amount": 0,
|
|
|
|
|
|
|
|
"total_amount": 0,
|
|
|
|
|
|
|
|
"line_items": [
|
|
|
|
|
|
|
|
{"position": 1, "product": "X", "description": "X", "quantity": 1, "unit_price": 0, "total_price": 0}
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Double check for valid JSON syntax. use the json validate tool.
|
|
|
|
Double check for valid JSON syntax. use the json validate tool.
|
|
|
|
|
|
|
|
|
|
|
|
@@ -310,7 +333,7 @@ function extractInvoiceNumber(s: string | undefined): string {
|
|
|
|
const match = clean.match(pattern);
|
|
|
|
const match = clean.match(pattern);
|
|
|
|
if (match) return match[1];
|
|
|
|
if (match) return match[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return clean.replace(/[^A-Z0-9-]/gi, '').trim() || clean;
|
|
|
|
return clean.replace(/[^A-Z0-9\/-]/gi, '').trim() || clean;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
@@ -340,6 +363,21 @@ function extractCurrency(s: string | undefined): string {
|
|
|
|
return 'EUR';
|
|
|
|
return 'EUR';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
* Extract and normalize line items array
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
function extractLineItems(items: unknown): ILineItem[] {
|
|
|
|
|
|
|
|
if (!Array.isArray(items)) return [];
|
|
|
|
|
|
|
|
return items.map((item: Record<string, unknown>, index: number) => ({
|
|
|
|
|
|
|
|
position: typeof item.position === 'number' ? item.position : index + 1,
|
|
|
|
|
|
|
|
product: String(item.product || '').trim(),
|
|
|
|
|
|
|
|
description: String(item.description || '').trim(),
|
|
|
|
|
|
|
|
quantity: parseAmount(item.quantity as string | number) || 1,
|
|
|
|
|
|
|
|
unit_price: parseAmount(item.unit_price as string | number),
|
|
|
|
|
|
|
|
total_price: parseAmount(item.total_price as string | number),
|
|
|
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
* Try to extract valid JSON from a response string
|
|
|
|
* Try to extract valid JSON from a response string
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
@@ -448,6 +486,7 @@ ${JSON_EXTRACTION_PROMPT}`;
|
|
|
|
net_amount: parseAmount(jsonData.net_amount as string | number),
|
|
|
|
net_amount: parseAmount(jsonData.net_amount as string | number),
|
|
|
|
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
|
|
|
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
|
|
|
total_amount: parseAmount(jsonData.total_amount as string | number),
|
|
|
|
total_amount: parseAmount(jsonData.total_amount as string | number),
|
|
|
|
|
|
|
|
line_items: extractLineItems(jsonData.line_items),
|
|
|
|
};
|
|
|
|
};
|
|
|
|
} catch (error) {
|
|
|
|
} catch (error) {
|
|
|
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
|
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
|
|
@@ -471,6 +510,7 @@ async function extractInvoice(markdown: string, docName: string): Promise<IInvoi
|
|
|
|
net_amount: 0,
|
|
|
|
net_amount: 0,
|
|
|
|
vat_amount: 0,
|
|
|
|
vat_amount: 0,
|
|
|
|
total_amount: 0,
|
|
|
|
total_amount: 0,
|
|
|
|
|
|
|
|
line_items: [],
|
|
|
|
};
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.log(` [${docName}] Extracted: ${invoice.invoice_number}`);
|
|
|
|
console.log(` [${docName}] Extracted: ${invoice.invoice_number}`);
|
|
|
|
@@ -703,7 +743,7 @@ for (const tc of testCases) {
|
|
|
|
const elapsedMs = Date.now() - startTime;
|
|
|
|
const elapsedMs = Date.now() - startTime;
|
|
|
|
processingTimes.push(elapsedMs);
|
|
|
|
processingTimes.push(elapsedMs);
|
|
|
|
|
|
|
|
|
|
|
|
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency}`);
|
|
|
|
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency} | ${extracted.line_items.length} items`);
|
|
|
|
|
|
|
|
|
|
|
|
const result = compareInvoice(extracted, expected);
|
|
|
|
const result = compareInvoice(extracted, expected);
|
|
|
|
|
|
|
|
|
|
|
|
|