Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 70913c4b3e | |||
| 2ed419f6e4 | |||
| 45cb87e9e7 | |||
| 74a5b37e92 | |||
| 2bdcc74df0 | |||
| 981c031c6e |
23
changelog.md
23
changelog.md
@@ -1,5 +1,28 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-01-20 - 1.16.0 - feat(invoices)
|
||||
add line_items extraction and normalization for invoice parsing
|
||||
|
||||
- Introduce ILineItem interface and add line_items array to IInvoice.
|
||||
- Add extractLineItems helper to normalize item fields (position, product, description, quantity, unit_price, total_price).
|
||||
- Include line_items in parsed invoice output and sample JSON in test, defaulting to [] when absent.
|
||||
- Update logging to include extracted line item count.
|
||||
- Clarify test instructions to extract items from invoice tables and skip subtotal/total rows.
|
||||
|
||||
## 2026-01-20 - 1.15.3 - fix(tests(nanonets))
|
||||
allow '/' when normalizing invoice strings in tests
|
||||
|
||||
- Adjust regex in test/test.invoices.nanonets.ts to preserve forward slashes when cleaning invoice values
|
||||
- Changed pattern from [^A-Z0-9-] to [^A-Z0-9\/-] to prevent accidental removal of '/' characters in invoice identifiers
|
||||
|
||||
## 2026-01-20 - 1.15.2 - fix(dev-deps)
|
||||
bump devDependencies @push.rocks/smartagent to ^1.6.2 and @push.rocks/smartai to ^0.13.3
|
||||
|
||||
- Bumped @push.rocks/smartagent from ^1.5.4 to ^1.6.2 in devDependencies
|
||||
- Bumped @push.rocks/smartai from ^0.13.2 to ^0.13.3 in devDependencies
|
||||
- Updated test/test.invoices.nanonets.ts JSON extraction prompt: instruct not to omit special characters in invoice_number and to use the json validate tool
|
||||
- No breaking changes; only dev dependency updates and test prompt adjustments
|
||||
|
||||
## 2026-01-20 - 1.15.1 - fix(tests)
|
||||
enable progress events in invoice tests and bump @push.rocks/smartagent devDependency to ^1.5.4
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@host.today/ht-docker-ai",
|
||||
"version": "1.15.1",
|
||||
"version": "1.16.0",
|
||||
"type": "module",
|
||||
"private": false,
|
||||
"description": "Docker images for AI vision-language models including MiniCPM-V 4.5",
|
||||
@@ -15,8 +15,8 @@
|
||||
"devDependencies": {
|
||||
"@git.zone/tsrun": "^2.0.1",
|
||||
"@git.zone/tstest": "^3.1.5",
|
||||
"@push.rocks/smartagent": "^1.5.4",
|
||||
"@push.rocks/smartai": "^0.13.2"
|
||||
"@push.rocks/smartagent": "^1.6.2",
|
||||
"@push.rocks/smartai": "^0.13.3"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
22
pnpm-lock.yaml
generated
22
pnpm-lock.yaml
generated
@@ -19,11 +19,11 @@ importers:
|
||||
specifier: ^3.1.5
|
||||
version: 3.1.6(socks@2.8.7)(typescript@5.9.3)
|
||||
'@push.rocks/smartagent':
|
||||
specifier: ^1.5.4
|
||||
version: 1.5.4(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
specifier: ^1.6.2
|
||||
version: 1.6.2(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
'@push.rocks/smartai':
|
||||
specifier: ^0.13.2
|
||||
version: 0.13.2(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
specifier: ^0.13.3
|
||||
version: 0.13.3(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
|
||||
packages:
|
||||
|
||||
@@ -868,11 +868,11 @@ packages:
|
||||
'@push.rocks/qenv@6.1.3':
|
||||
resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==}
|
||||
|
||||
'@push.rocks/smartagent@1.5.4':
|
||||
resolution: {integrity: sha512-W5DoG0MUY6oAvxMcMC0M0ZmMmjs6zs8WcDnz5jVcg74vD3oaSguJjcokjB4L4Tv1dzEhq0ArDOnaC2C0Lf3z7w==}
|
||||
'@push.rocks/smartagent@1.6.2':
|
||||
resolution: {integrity: sha512-JaYZ7tRbmS0fVrF73Z+RF9plJ/Va0H+81zvEACT8YZRf+WhhIT+P7kKh7IcTRrgudtA7aw6eVXUmOGCMeszm3Q==}
|
||||
|
||||
'@push.rocks/smartai@0.13.2':
|
||||
resolution: {integrity: sha512-FqnHh31tU0Nkr/g25UMAjjE7gZVN8cuioRlSq1xo19rS9kyMiux+UzpylO2tgXF5S+lTsw5cGtfP0BUfrxlTGg==}
|
||||
'@push.rocks/smartai@0.13.3':
|
||||
resolution: {integrity: sha512-VDZzHs101hpGMmUaectuLfcME4kHpuOS7o5ffuGk5lYl383foyAN71+5v441jpk/gLDNf2KhDACR/d2O4n90Ag==}
|
||||
|
||||
'@push.rocks/smartarchive@5.2.1':
|
||||
resolution: {integrity: sha512-TNv5q6QuBRX7jrzffiyb6A8AALNAr0kyAcJswa0l3ahBP1Q6zszNo9xOVXmW2gKX2KShtO/Y+Cn0i46n8lbnaQ==}
|
||||
@@ -5206,9 +5206,9 @@ snapshots:
|
||||
'@push.rocks/smartlog': 3.1.10
|
||||
'@push.rocks/smartpath': 6.0.0
|
||||
|
||||
'@push.rocks/smartagent@1.5.4(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)':
|
||||
'@push.rocks/smartagent@1.6.2(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)':
|
||||
dependencies:
|
||||
'@push.rocks/smartai': 0.13.2(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
'@push.rocks/smartai': 0.13.3(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)
|
||||
'@push.rocks/smartbrowser': 2.0.8(typescript@5.9.3)
|
||||
'@push.rocks/smartdeno': 1.2.0
|
||||
'@push.rocks/smartfs': 1.3.1
|
||||
@@ -5230,7 +5230,7 @@ snapshots:
|
||||
- ws
|
||||
- zod
|
||||
|
||||
'@push.rocks/smartai@0.13.2(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)':
|
||||
'@push.rocks/smartai@0.13.3(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)':
|
||||
dependencies:
|
||||
'@anthropic-ai/sdk': 0.71.2(zod@3.25.76)
|
||||
'@mistralai/mistralai': 1.12.0
|
||||
|
||||
@@ -42,6 +42,15 @@ const smartAi = new SmartAi({
|
||||
// DualAgentOrchestrator for structured task execution
|
||||
let orchestrator: DualAgentOrchestrator;
|
||||
|
||||
interface ILineItem {
|
||||
position: number;
|
||||
product: string;
|
||||
description: string;
|
||||
quantity: number;
|
||||
unit_price: number;
|
||||
total_price: number;
|
||||
}
|
||||
|
||||
interface IInvoice {
|
||||
invoice_number: string;
|
||||
invoice_date: string;
|
||||
@@ -50,6 +59,7 @@ interface IInvoice {
|
||||
net_amount: number;
|
||||
vat_amount: number;
|
||||
total_amount: number;
|
||||
line_items: ILineItem[];
|
||||
}
|
||||
|
||||
interface IImageData {
|
||||
@@ -78,8 +88,9 @@ Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number>.`;
|
||||
const JSON_EXTRACTION_PROMPT = `Extract key fields from the invoice. Return ONLY valid JSON.
|
||||
|
||||
WHERE TO FIND DATA:
|
||||
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* .
|
||||
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . Also be sure to not omit special chars like / - and sp on. They are part of the invoice number.
|
||||
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
||||
- line_items: Look in the TABLE(s) with columns like Pos, Product, Description, Quantity, Unit Price, Price
|
||||
|
||||
RULES:
|
||||
1. Use common sense.
|
||||
@@ -89,11 +100,23 @@ RULES:
|
||||
5. net_amount: Total before tax
|
||||
6. vat_amount: Tax amount
|
||||
7. total_amount: Final total with tax
|
||||
8. line_items: Array of items from the invoice table. Skip subtotal/total rows.
|
||||
|
||||
JSON only:
|
||||
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}
|
||||
JSON format:
|
||||
{
|
||||
"invoice_number": "X",
|
||||
"invoice_date": "YYYY-MM-DD",
|
||||
"vendor_name": "X",
|
||||
"currency": "EUR",
|
||||
"net_amount": 0,
|
||||
"vat_amount": 0,
|
||||
"total_amount": 0,
|
||||
"line_items": [
|
||||
{"position": 1, "product": "X", "description": "X", "quantity": 1, "unit_price": 0, "total_price": 0}
|
||||
]
|
||||
}
|
||||
|
||||
Double check for valid JSON syntax.
|
||||
Double check for valid JSON syntax. use the json validate tool.
|
||||
|
||||
`;
|
||||
|
||||
@@ -310,7 +333,7 @@ function extractInvoiceNumber(s: string | undefined): string {
|
||||
const match = clean.match(pattern);
|
||||
if (match) return match[1];
|
||||
}
|
||||
return clean.replace(/[^A-Z0-9-]/gi, '').trim() || clean;
|
||||
return clean.replace(/[^A-Z0-9\/-]/gi, '').trim() || clean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -340,6 +363,21 @@ function extractCurrency(s: string | undefined): string {
|
||||
return 'EUR';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and normalize line items array
|
||||
*/
|
||||
function extractLineItems(items: unknown): ILineItem[] {
|
||||
if (!Array.isArray(items)) return [];
|
||||
return items.map((item: Record<string, unknown>, index: number) => ({
|
||||
position: typeof item.position === 'number' ? item.position : index + 1,
|
||||
product: String(item.product || '').trim(),
|
||||
description: String(item.description || '').trim(),
|
||||
quantity: parseAmount(item.quantity as string | number) || 1,
|
||||
unit_price: parseAmount(item.unit_price as string | number),
|
||||
total_price: parseAmount(item.total_price as string | number),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to extract valid JSON from a response string
|
||||
*/
|
||||
@@ -448,6 +486,7 @@ ${JSON_EXTRACTION_PROMPT}`;
|
||||
net_amount: parseAmount(jsonData.net_amount as string | number),
|
||||
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
||||
total_amount: parseAmount(jsonData.total_amount as string | number),
|
||||
line_items: extractLineItems(jsonData.line_items),
|
||||
};
|
||||
} catch (error) {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
@@ -471,6 +510,7 @@ async function extractInvoice(markdown: string, docName: string): Promise<IInvoi
|
||||
net_amount: 0,
|
||||
vat_amount: 0,
|
||||
total_amount: 0,
|
||||
line_items: [],
|
||||
};
|
||||
}
|
||||
console.log(` [${docName}] Extracted: ${invoice.invoice_number}`);
|
||||
@@ -703,7 +743,7 @@ for (const tc of testCases) {
|
||||
const elapsedMs = Date.now() - startTime;
|
||||
processingTimes.push(elapsedMs);
|
||||
|
||||
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency}`);
|
||||
console.log(` Extracted: ${extracted.invoice_number} | ${extracted.invoice_date} | ${extracted.total_amount} ${extracted.currency} | ${extracted.line_items.length} items`);
|
||||
|
||||
const result = compareInvoice(extracted, expected);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user