diff --git a/changelog.md b/changelog.md index 71ec8be..40deb90 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2026-01-20 - 1.15.0 - feat(tests) +integrate SmartAi/DualAgentOrchestrator into extraction tests and add JSON self-validation + +- Integrate SmartAi and DualAgentOrchestrator into bankstatement and invoice tests to perform structured extraction with streaming +- Register and use JsonValidatorTool to validate outputs (json.validate) and enforce validation before task completion +- Add tryExtractJson parsing fallback, improved extraction prompts, retries and clearer parsing/logging +- Initialize and teardown SmartAi and orchestrator in test setup/summary, and enable onToken streaming handlers for real-time output +- Bump devDependencies: @push.rocks/smartagent to ^1.3.0 and @push.rocks/smartai to ^0.12.0 + ## 2026-01-20 - 1.14.3 - fix(repo) no changes detected in the diff; no files modified and no release required diff --git a/package.json b/package.json index 64f3305..3127978 100644 --- a/package.json +++ b/package.json @@ -15,8 +15,8 @@ "devDependencies": { "@git.zone/tsrun": "^2.0.1", "@git.zone/tstest": "^3.1.5", - "@push.rocks/smartagent": "^1.2.8", - "@push.rocks/smartai": "^0.11.1" + "@push.rocks/smartagent": "^1.3.0", + "@push.rocks/smartai": "^0.12.0" }, "repository": { "type": "git", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a3d55e7..0c87b95 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -17,13 +17,13 @@ importers: version: 2.0.1 '@git.zone/tstest': specifier: ^3.1.5 - version: 3.1.5(socks@2.8.7)(typescript@5.9.3) + version: 3.1.6(socks@2.8.7)(typescript@5.9.3) '@push.rocks/smartagent': - specifier: ^1.2.8 - version: 1.2.8(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) + specifier: ^1.3.0 + version: 1.3.0(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) '@push.rocks/smartai': - specifier: ^0.11.1 - version: 0.11.1(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) + specifier: ^0.12.0 + version: 0.12.0(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) packages: @@ -231,11 +231,15 @@ packages: resolution: {integrity: sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==} engines: {node: '>=6.9.0'} + '@babel/runtime@7.28.6': + resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==} + engines: {node: '>=6.9.0'} + '@borewit/text-codec@0.2.1': resolution: {integrity: sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==} - '@cloudflare/workers-types@4.20260118.0': - resolution: {integrity: sha512-t+2Q421kAQqwBzMUDvgg2flp8zFVxOpiAyZPbyNcnPxMDHf0z3B7LqBIVQawwI6ntZinbk9f4oUmaA5bGeYwlg==} + '@cloudflare/workers-types@4.20260120.0': + resolution: {integrity: sha512-B8pueG+a5S+mdK3z8oKu1ShcxloZ7qWb68IEyLLaepvdryIbNC7JVPcY0bWsjS56UQVKc5fnyRge3yZIwc9bxw==} '@configvault.io/interfaces@1.0.17': resolution: {integrity: sha512-bEcCUR2VBDJsTin8HQh8Uw/mlYl2v8A3jMIaQ+MTB9Hrqd6CZL2dL7iJdWyFl/3EIX+LDxWFR+Oq7liIq7w+1Q==} @@ -422,8 +426,8 @@ packages: resolution: {integrity: sha512-NEcnsjvlC1o3Z6SS3VhKCf6Ev+Sh4EAinmggslrIR/ppMrvjDbXNFXoyr3PB+GLeSAR0JRZ1fGvVYjpEzjBdIg==} hasBin: true - '@git.zone/tstest@3.1.5': - resolution: {integrity: sha512-gXDFaZczq3ulwJVB4RN9wjV5bbF5cdibjuVesncYvhQtEO6zf6AwLHeN07atIgy18iBJgpvsCEwLGsO5XrtiIg==} + '@git.zone/tstest@3.1.6': + resolution: {integrity: sha512-xRGc6wO4rJ6mohPCMIBDRH+oNjiIvX6Jeo8v/Y5o5VyKSHFmqol7FCKSBrojMcqgBpESnLHFPJAAOmT9W3JV8Q==} hasBin: true '@happy-dom/global-registrator@15.11.7': @@ -864,11 +868,11 @@ packages: '@push.rocks/qenv@6.1.3': resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==} - '@push.rocks/smartagent@1.2.8': - resolution: {integrity: sha512-mnvZNmdS7Kg7uUJVDlVlfWls7KjWT/lXd76HP805ih3OIjujpIgYBjepZlZ1jp+xoAVGA80vBDtbWoxE6IZKiw==} + '@push.rocks/smartagent@1.3.0': + resolution: {integrity: sha512-MuiJVJcl9Pdr03k1zVwgxTqprbIHKwqPqXdOmYFYn0xYnixOX1tBUYkGsu6xIntXq8t4WazBJiF9hCiMpDTiRA==} - '@push.rocks/smartai@0.11.1': - resolution: {integrity: sha512-GPOfy4h0ItHfQabfpmzmZYiSYAtXHpGdY6IJkjyW+IN10sgMQEbCpr/2u7MkgEDJHJYQZ49BWZH7/7GdtSxwrg==} + '@push.rocks/smartai@0.12.0': + resolution: {integrity: sha512-T4HRaSSxO6TQGGXlQeswX2eYkB+gMu0FbKF9qCUri6FdRlYzmPDn19jgPrPJxyg5m3oj6TzflvfYwcBCFlWo/A==} '@push.rocks/smartarchive@5.2.1': resolution: {integrity: sha512-TNv5q6QuBRX7jrzffiyb6A8AALNAr0kyAcJswa0l3ahBP1Q6zszNo9xOVXmW2gKX2KShtO/Y+Cn0i46n8lbnaQ==} @@ -2018,8 +2022,8 @@ packages: supports-color: optional: true - decode-named-character-reference@1.2.0: - resolution: {integrity: sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==} + decode-named-character-reference@1.3.0: + resolution: {integrity: sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==} define-data-property@1.1.4: resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==} @@ -3751,7 +3755,7 @@ snapshots: '@api.global/typedrequest': 3.2.5 '@api.global/typedrequest-interfaces': 3.0.19 '@api.global/typedsocket': 3.1.1 - '@cloudflare/workers-types': 4.20260118.0 + '@cloudflare/workers-types': 4.20260120.0 '@design.estate/dees-comms': 1.0.30 '@push.rocks/lik': 6.2.2 '@push.rocks/smartchok': 1.2.0 @@ -4307,9 +4311,11 @@ snapshots: '@babel/runtime@7.28.4': {} + '@babel/runtime@7.28.6': {} + '@borewit/text-codec@0.2.1': {} - '@cloudflare/workers-types@4.20260118.0': {} + '@cloudflare/workers-types@4.20260120.0': {} '@configvault.io/interfaces@1.0.17': dependencies: @@ -4486,7 +4492,7 @@ snapshots: '@push.rocks/smartshell': 3.3.0 tsx: 4.21.0 - '@git.zone/tstest@3.1.5(socks@2.8.7)(typescript@5.9.3)': + '@git.zone/tstest@3.1.6(socks@2.8.7)(typescript@5.9.3)': dependencies: '@api.global/typedserver': 3.0.80 '@git.zone/tsbundle': 2.8.1 @@ -5200,9 +5206,9 @@ snapshots: '@push.rocks/smartlog': 3.1.10 '@push.rocks/smartpath': 6.0.0 - '@push.rocks/smartagent@1.2.8(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)': + '@push.rocks/smartagent@1.3.0(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)': dependencies: - '@push.rocks/smartai': 0.11.1(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) + '@push.rocks/smartai': 0.12.0(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76) '@push.rocks/smartbrowser': 2.0.8(typescript@5.9.3) '@push.rocks/smartdeno': 1.2.0 '@push.rocks/smartfs': 1.3.1 @@ -5224,7 +5230,7 @@ snapshots: - ws - zod - '@push.rocks/smartai@0.11.1(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)': + '@push.rocks/smartai@0.12.0(typescript@5.9.3)(ws@8.19.0)(zod@3.25.76)': dependencies: '@anthropic-ai/sdk': 0.71.2(zod@3.25.76) '@mistralai/mistralai': 1.12.0 @@ -6905,7 +6911,7 @@ snapshots: dependencies: ms: 2.1.3 - decode-named-character-reference@1.2.0: + decode-named-character-reference@1.3.0: dependencies: character-entities: 2.0.2 @@ -7565,7 +7571,7 @@ snapshots: json-schema-to-ts@3.1.1: dependencies: - '@babel/runtime': 7.28.4 + '@babel/runtime': 7.28.6 ts-algebra: 2.0.0 jsonfile@6.2.0: @@ -7676,7 +7682,7 @@ snapshots: dependencies: '@types/mdast': 4.0.4 '@types/unist': 3.0.3 - decode-named-character-reference: 1.2.0 + decode-named-character-reference: 1.3.0 devlop: 1.1.0 mdast-util-to-string: 4.0.0 micromark: 4.0.2 @@ -7798,7 +7804,7 @@ snapshots: micromark-core-commonmark@2.0.3: dependencies: - decode-named-character-reference: 1.2.0 + decode-named-character-reference: 1.3.0 devlop: 1.1.0 micromark-factory-destination: 2.0.1 micromark-factory-label: 2.0.1 @@ -7938,7 +7944,7 @@ snapshots: micromark-util-decode-string@2.0.1: dependencies: - decode-named-character-reference: 1.2.0 + decode-named-character-reference: 1.3.0 micromark-util-character: 2.1.1 micromark-util-decode-numeric-character-reference: 2.0.2 micromark-util-symbol: 2.0.1 @@ -7976,7 +7982,7 @@ snapshots: dependencies: '@types/debug': 4.1.12 debug: 4.4.3 - decode-named-character-reference: 1.2.0 + decode-named-character-reference: 1.3.0 devlop: 1.1.0 micromark-core-commonmark: 2.0.3 micromark-factory-space: 2.0.1 diff --git a/test/test.bankstatements.minicpm.ts b/test/test.bankstatements.minicpm.ts index b16fa27..7d0b9b1 100644 --- a/test/test.bankstatements.minicpm.ts +++ b/test/test.bankstatements.minicpm.ts @@ -1,9 +1,10 @@ /** - * Bank statement extraction using MiniCPM-V (visual extraction) + * Bank statement extraction using MiniCPM-V via smartagent DualAgentOrchestrator * - * JSON per-page approach with streaming output: - * 1. Ask for structured JSON of all transactions per page - * 2. Single pass extraction (no consensus) + * Uses vision-capable orchestrator with JsonValidatorTool for self-validation: + * 1. Process each page with the orchestrator + * 2. Driver extracts transactions and validates JSON before completing + * 3. Streaming output during extraction */ import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as fs from 'fs'; @@ -11,6 +12,8 @@ import * as path from 'path'; import { execSync } from 'child_process'; import * as os from 'os'; import { ensureMiniCpm } from './helpers/docker.js'; +import { SmartAi } from '@push.rocks/smartai'; +import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent'; const OLLAMA_URL = 'http://localhost:11434'; const MODEL = 'openbmb/minicpm-v4.5:q8_0'; @@ -21,21 +24,9 @@ interface ITransaction { amount: number; } -const JSON_PROMPT = `Extract ALL transactions from this bank statement page as a JSON array. - -IMPORTANT RULES: -1. Each transaction has: date, description/counterparty, and an amount -2. Amount is NEGATIVE for money going OUT (debits, payments, withdrawals) -3. Amount is POSITIVE for money coming IN (credits, deposits, refunds) -4. Date format: YYYY-MM-DD -5. Do NOT include: opening balance, closing balance, subtotals, headers, or summary rows -6. Only include actual transactions with a specific date and amount - -Return ONLY this JSON format, no explanation: -[ - {"date": "2021-06-01", "counterparty": "COMPANY NAME", "amount": -25.99}, - {"date": "2021-06-02", "counterparty": "DEPOSIT FROM", "amount": 100.00} -]`; +// SmartAi instance and orchestrator (initialized in setup) +let smartAi: SmartAi; +let orchestrator: DualAgentOrchestrator; /** * Convert PDF to PNG images using ImageMagick @@ -65,231 +56,31 @@ function convertPdfToImages(pdfPath: string): string[] { } } -/** - * Query for JSON extraction with streaming output - */ -async function queryJson(image: string, queryId: string): Promise { - const startTime = Date.now(); - process.stdout.write(` [${queryId}] `); +const EXTRACTION_PROMPT = `Extract ALL transactions from this bank statement page as a JSON array. - const response = await fetch(`${OLLAMA_URL}/api/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: MODEL, - messages: [{ - role: 'user', - content: JSON_PROMPT, - images: [image], - }], - stream: true, - options: { - num_ctx: 32768, - num_predict: 4000, - temperature: 0.1, - }, - }), - }); +IMPORTANT RULES: +1. Each transaction has: date, counterparty (description), and an amount +2. Amount is NEGATIVE for money going OUT (debits, payments, withdrawals) +3. Amount is POSITIVE for money coming IN (credits, deposits, refunds) +4. Date format: YYYY-MM-DD +5. Do NOT include: opening balance, closing balance, subtotals, headers, or summary rows +6. Only include actual transactions with a specific date and amount - if (!response.ok) { - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - process.stdout.write(`ERROR: ${response.status} (${elapsed}s)\n`); - throw new Error(`Ollama API error: ${response.status}`); - } +Before completing, validate your JSON output: - let content = ''; - const reader = response.body!.getReader(); - const decoder = new TextDecoder(); + + json + validate + {"jsonString": "YOUR_JSON_ARRAY_HERE"} + - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; +Output format (must be a valid JSON array): +[ + {"date": "2021-06-01", "counterparty": "COMPANY NAME", "amount": -25.99}, + {"date": "2021-06-02", "counterparty": "DEPOSIT FROM", "amount": 100.00} +] - const chunk = decoder.decode(value, { stream: true }); - for (const line of chunk.split('\n').filter(l => l.trim())) { - try { - const json = JSON.parse(line); - const token = json.message?.content || ''; - if (token) { - process.stdout.write(token); - content += token; - } - } catch { - // Ignore parse errors for partial chunks - } - } - } - } finally { - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - process.stdout.write(` (${elapsed}s)\n`); - } - - return content.trim(); -} - -/** - * Sanitize JSON string - fix common issues from vision model output - */ -function sanitizeJson(jsonStr: string): string { - let s = jsonStr; - - // Fix +number (e.g., +93.80 -> 93.80) - JSON doesn't allow + prefix - // Handle various whitespace patterns - s = s.replace(/"amount"\s*:\s*\+/g, '"amount": '); - s = s.replace(/:\s*\+(\d)/g, ': $1'); - - // Fix European number format with thousands separator (e.g., 1.000.00 -> 1000.00) - // Pattern: "amount": X.XXX.XX where X.XXX is thousands and .XX is decimal - s = s.replace(/"amount"\s*:\s*(-?)(\d{1,3})\.(\d{3})\.(\d{2})\b/g, '"amount": $1$2$3.$4'); - // Also handle larger numbers like 10.000.00 - s = s.replace(/"amount"\s*:\s*(-?)(\d{1,3})\.(\d{3})\.(\d{3})\.(\d{2})\b/g, '"amount": $1$2$3$4.$5'); - - // Fix trailing commas before ] or } - s = s.replace(/,\s*([}\]])/g, '$1'); - - // Fix unescaped newlines inside strings (replace with space) - s = s.replace(/"([^"\\]*)\n([^"]*)"/g, '"$1 $2"'); - - // Fix unescaped tabs inside strings - s = s.replace(/"([^"\\]*)\t([^"]*)"/g, '"$1 $2"'); - - // Fix unescaped backslashes (but not already escaped ones) - s = s.replace(/\\(?!["\\/bfnrtu])/g, '\\\\'); - - // Fix common issues with counterparty names containing special chars - s = s.replace(/"counterparty":\s*"([^"]*)'([^"]*)"/g, '"counterparty": "$1$2"'); - - // Remove control characters except newlines (which we handle above) - s = s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, ' '); - - return s; -} - -/** - * Parse JSON response into transactions - */ -function parseJsonResponse(response: string, queryId: string): ITransaction[] { - console.log(` [${queryId}] Parsing response...`); - - // Try to find JSON in markdown code block - const codeBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/); - let jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim(); - - if (codeBlockMatch) { - console.log(` [${queryId}] Found JSON in code block`); - } - - // Sanitize JSON (fix +number issue) - jsonStr = sanitizeJson(jsonStr); - - try { - const parsed = JSON.parse(jsonStr); - if (Array.isArray(parsed)) { - const txs = parsed.map(tx => ({ - date: String(tx.date || ''), - counterparty: String(tx.counterparty || tx.description || ''), - amount: parseAmount(tx.amount), - })); - console.log(` [${queryId}] Parsed ${txs.length} transactions (direct)`); - return txs; - } - console.log(` [${queryId}] Parsed JSON is not an array`); - } catch (e) { - const errMsg = (e as Error).message; - console.log(` [${queryId}] Direct parse failed: ${errMsg}`); - - // Log problematic section with context - const posMatch = errMsg.match(/position (\d+)/); - if (posMatch) { - const pos = parseInt(posMatch[1]); - const start = Math.max(0, pos - 40); - const end = Math.min(jsonStr.length, pos + 40); - const context = jsonStr.substring(start, end); - const marker = ' '.repeat(pos - start) + '^'; - console.log(` [${queryId}] Context around error position ${pos}:`); - console.log(` [${queryId}] ...${context}...`); - console.log(` [${queryId}] ${marker}`); - } - - // Try to find JSON array pattern - const arrayMatch = jsonStr.match(/\[[\s\S]*\]/); - if (arrayMatch) { - console.log(` [${queryId}] Found array pattern, trying to parse...`); - const sanitizedArray = sanitizeJson(arrayMatch[0]); - try { - const parsed = JSON.parse(sanitizedArray); - if (Array.isArray(parsed)) { - const txs = parsed.map(tx => ({ - date: String(tx.date || ''), - counterparty: String(tx.counterparty || tx.description || ''), - amount: parseAmount(tx.amount), - })); - console.log(` [${queryId}] Parsed ${txs.length} transactions (array match)`); - return txs; - } - } catch (e2) { - const errMsg2 = (e2 as Error).message; - console.log(` [${queryId}] Array parse failed: ${errMsg2}`); - const posMatch2 = errMsg2.match(/position (\d+)/); - if (posMatch2) { - const pos2 = parseInt(posMatch2[1]); - console.log(` [${queryId}] Context around error: ...${sanitizedArray.substring(Math.max(0, pos2 - 30), pos2 + 30)}...`); - } - - // Try to extract individual objects from the malformed array - console.log(` [${queryId}] Attempting object-by-object extraction...`); - const extracted = extractTransactionsFromMalformedJson(sanitizedArray, queryId); - if (extracted.length > 0) { - console.log(` [${queryId}] Recovered ${extracted.length} transactions via object extraction`); - return extracted; - } - } - } else { - console.log(` [${queryId}] No array pattern found in response`); - console.log(` [${queryId}] Raw response preview: ${response.substring(0, 200)}...`); - } - } - - console.log(` [${queryId}] PARSE FAILED - returning empty array`); - return []; -} - -/** - * Extract transactions from malformed JSON by parsing objects individually - */ -function extractTransactionsFromMalformedJson(jsonStr: string, queryId: string): ITransaction[] { - const transactions: ITransaction[] = []; - - // Match individual transaction objects - const objectPattern = /\{\s*"date"\s*:\s*"([^"]+)"\s*,\s*"counterparty"\s*:\s*"([^"]+)"\s*,\s*"amount"\s*:\s*([+-]?\d+\.?\d*)\s*\}/g; - let match; - - while ((match = objectPattern.exec(jsonStr)) !== null) { - transactions.push({ - date: match[1], - counterparty: match[2], - amount: parseFloat(match[3]), - }); - } - - // Also try with different field orders (amount before counterparty, etc.) - if (transactions.length === 0) { - const altPattern = /\{\s*"date"\s*:\s*"([^"]+)"[^}]*"amount"\s*:\s*([+-]?\d+\.?\d*)[^}]*\}/g; - while ((match = altPattern.exec(jsonStr)) !== null) { - // Try to extract counterparty from the match - const counterpartyMatch = match[0].match(/"counterparty"\s*:\s*"([^"]+)"/); - const descMatch = match[0].match(/"description"\s*:\s*"([^"]+)"/); - transactions.push({ - date: match[1], - counterparty: counterpartyMatch?.[1] || descMatch?.[1] || 'UNKNOWN', - amount: parseFloat(match[2]), - }); - } - } - - return transactions; -} +Only complete after validation passes. Output the final JSON array in tags.`; /** * Parse amount from various formats @@ -309,20 +100,92 @@ function parseAmount(value: unknown): number { } /** - * Extract transactions from a single page (single pass) + * Extract JSON from response (handles markdown code blocks and task_complete tags) + */ +function extractJsonFromResponse(response: string): unknown[] | null { + // Try to find JSON in task_complete tags + const completeMatch = response.match(/([\s\S]*?)<\/task_complete>/); + if (completeMatch) { + const content = completeMatch[1].trim(); + // Try to find JSON in the content + const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/); + const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : content; + try { + const parsed = JSON.parse(jsonStr); + if (Array.isArray(parsed)) return parsed; + } catch { + // Try to find JSON array pattern + const jsonMatch = jsonStr.match(/\[[\s\S]*\]/); + if (jsonMatch) { + try { + const parsed = JSON.parse(jsonMatch[0]); + if (Array.isArray(parsed)) return parsed; + } catch { + return null; + } + } + } + } + + // Try to find JSON in markdown code block + const codeBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/); + const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim(); + + try { + const parsed = JSON.parse(jsonStr); + if (Array.isArray(parsed)) return parsed; + } catch { + // Try to find JSON array pattern + const jsonMatch = jsonStr.match(/\[[\s\S]*\]/); + if (jsonMatch) { + try { + const parsed = JSON.parse(jsonMatch[0]); + if (Array.isArray(parsed)) return parsed; + } catch { + return null; + } + } + } + return null; +} + +/** + * Parse JSON response into transactions + */ +function parseJsonToTransactions(response: string): ITransaction[] { + const parsed = extractJsonFromResponse(response); + if (!parsed || !Array.isArray(parsed)) return []; + + return parsed.map((tx: any) => ({ + date: String(tx.date || ''), + counterparty: String(tx.counterparty || tx.description || ''), + amount: parseAmount(tx.amount), + })); +} + +/** + * Extract transactions from a single page using smartagent orchestrator */ async function extractTransactionsFromPage(image: string, pageNum: number): Promise { console.log(`\n ======== Page ${pageNum} ========`); - const queryId = `P${pageNum}`; - const response = await queryJson(image, queryId); - const transactions = parseJsonResponse(response, queryId); + const startTime = Date.now(); + + const result = await orchestrator.run(EXTRACTION_PROMPT, { images: [image] }); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(`\n [Page ${pageNum}] Completed in ${elapsed}s (${result.iterations} iterations, status: ${result.status})`); + + const transactions = parseJsonToTransactions(result.result); console.log(` [Page ${pageNum}] Extracted ${transactions.length} transactions:`); - for (let i = 0; i < transactions.length; i++) { + for (let i = 0; i < Math.min(transactions.length, 10); i++) { const tx = transactions[i]; console.log(` ${(i + 1).toString().padStart(2)}. ${tx.date} | ${tx.counterparty.substring(0, 30).padEnd(30)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`); } + if (transactions.length > 10) { + console.log(` ... and ${transactions.length - 10} more transactions`); + } return transactions; } @@ -331,7 +194,7 @@ async function extractTransactionsFromPage(image: string, pageNum: number): Prom * Extract all transactions from bank statement */ async function extractTransactions(images: string[]): Promise { - console.log(` [Vision] Processing ${images.length} page(s) with ${MODEL} (single pass)`); + console.log(` [Vision] Processing ${images.length} page(s) with smartagent DualAgentOrchestrator`); const allTransactions: ITransaction[] = []; @@ -426,6 +289,80 @@ tap.test('setup: ensure Docker containers are running', async () => { console.log('\n[Setup] All containers ready!\n'); }); +tap.test('setup: initialize smartagent orchestrator', async () => { + console.log('[Setup] Initializing SmartAi and DualAgentOrchestrator...'); + + smartAi = new SmartAi({ + ollama: { + baseUrl: OLLAMA_URL, + model: MODEL, + defaultOptions: { + num_ctx: 32768, + num_predict: 4000, + temperature: 0.1, + }, + defaultTimeout: 300000, // 5 minutes for vision tasks + }, + }); + + await smartAi.start(); + + orchestrator = new DualAgentOrchestrator({ + smartAiInstance: smartAi, + defaultProvider: 'ollama', + guardianPolicyPrompt: `You are a Guardian agent overseeing bank statement extraction tasks. + +APPROVE all tool calls that: +- Use the json.validate action to verify JSON output +- Are reasonable attempts to complete the extraction task + +REJECT tool calls that: +- Attempt to access external resources +- Try to execute arbitrary code +- Are clearly unrelated to bank statement extraction`, + driverSystemMessage: `You are an AI assistant that extracts bank transactions from statement images. + +Your task is to analyze bank statement images and extract transaction data. +You have access to a json.validate tool to verify your JSON output. + +IMPORTANT: Always validate your JSON before completing the task. + +## Tool Usage Format +When you need to validate JSON, output: + + + json + validate + {"jsonString": "YOUR_JSON_ARRAY"} + + +## Completion Format +After validation passes, complete the task: + + +[{"date": "YYYY-MM-DD", "counterparty": "...", "amount": -123.45}, ...] +`, + maxIterations: 5, + maxConsecutiveRejections: 3, + onToken: (token, source) => { + if (source === 'driver') { + process.stdout.write(token); + } + }, + onProgress: (event) => { + if (event.logLevel === 'error') { + console.error(event.logMessage); + } + }, + }); + + // Register the JsonValidatorTool + orchestrator.registerTool(new JsonValidatorTool()); + + await orchestrator.start(); + console.log('[Setup] Orchestrator initialized!\n'); +}); + tap.test('should have MiniCPM-V model loaded', async () => { const response = await fetch(`${OLLAMA_URL}/api/tags`); const data = await response.json(); @@ -434,7 +371,7 @@ tap.test('should have MiniCPM-V model loaded', async () => { }); const testCases = findTestCases(); -console.log(`\nFound ${testCases.length} bank statement test cases (MiniCPM-V)\n`); +console.log(`\nFound ${testCases.length} bank statement test cases (smartagent + MiniCPM-V)\n`); let passedCount = 0; let failedCount = 0; @@ -466,7 +403,10 @@ for (const testCase of testCases) { // Log counterparty variations (names that differ but date/amount matched) if (result.variations.length > 0) { console.log(` Counterparty variations (${result.variations.length}):`); - result.variations.forEach((v) => console.log(` ${v}`)); + result.variations.slice(0, 5).forEach((v) => console.log(` ${v}`)); + if (result.variations.length > 5) { + console.log(` ... and ${result.variations.length - 5} more variations`); + } } expect(result.matches).toEqual(result.total); @@ -474,12 +414,20 @@ for (const testCase of testCases) { }); } +tap.test('cleanup: stop orchestrator', async () => { + if (orchestrator) { + await orchestrator.stop(); + } + console.log('[Cleanup] Orchestrator stopped'); +}); + tap.test('summary', async () => { const total = testCases.length; console.log(`\n======================================================`); - console.log(` Bank Statement Summary (${MODEL})`); + console.log(` Bank Statement Summary`); + console.log(` (smartagent + ${MODEL})`); console.log(`======================================================`); - console.log(` Method: JSON per-page (single pass)`); + console.log(` Method: DualAgentOrchestrator with vision`); console.log(` Passed: ${passedCount}/${total}`); console.log(` Failed: ${failedCount}/${total}`); console.log(`======================================================\n`); diff --git a/test/test.bankstatements.nanonets.ts b/test/test.bankstatements.nanonets.ts index b3254fc..337a581 100644 --- a/test/test.bankstatements.nanonets.ts +++ b/test/test.bankstatements.nanonets.ts @@ -11,7 +11,9 @@ import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; import * as os from 'os'; -import { ensureNanonetsOcr, ensureMiniCpm, removeContainer, isContainerRunning } from './helpers/docker.js'; +import { ensureNanonetsOcr, ensureMiniCpm, isContainerRunning } from './helpers/docker.js'; +import { SmartAi } from '@push.rocks/smartai'; +import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent'; const NANONETS_URL = 'http://localhost:8000/v1'; const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B'; @@ -22,6 +24,22 @@ const EXTRACTION_MODEL = 'gpt-oss:20b'; // Temp directory for storing markdown between stages const TEMP_MD_DIR = path.join(os.tmpdir(), 'nanonets-markdown'); +// SmartAi instance for Ollama with optimized settings +const smartAi = new SmartAi({ + ollama: { + baseUrl: OLLAMA_URL, + model: EXTRACTION_MODEL, + defaultOptions: { + num_ctx: 32768, // Larger context for long statements + thinking + temperature: 0, // Deterministic for JSON extraction + }, + defaultTimeout: 600000, // 10 minute timeout for large documents + }, +}); + +// DualAgentOrchestrator for structured task execution +let orchestrator: DualAgentOrchestrator; + interface ITransaction { date: string; counterparty: string; @@ -252,95 +270,107 @@ async function ensureExtractionModel(): Promise { } /** - * Extract transactions from markdown using GPT-OSS 20B (streaming) + * Try to extract valid JSON from a response string + */ +function tryExtractJson(response: string): unknown[] | null { + // Remove thinking tags + let clean = response.replace(/[\s\S]*?<\/think>/g, '').trim(); + + // Try task_complete tags first + const completeMatch = clean.match(/([\s\S]*?)<\/task_complete>/); + if (completeMatch) { + clean = completeMatch[1].trim(); + } + + // Try code block + const codeBlockMatch = clean.match(/```(?:json)?\s*([\s\S]*?)```/); + const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : clean; + + try { + const parsed = JSON.parse(jsonStr); + if (Array.isArray(parsed)) return parsed; + } catch { + // Try to find JSON array + const jsonMatch = jsonStr.match(/\[[\s\S]*\]/); + if (jsonMatch) { + try { + const parsed = JSON.parse(sanitizeJson(jsonMatch[0])); + if (Array.isArray(parsed)) return parsed; + } catch { + return null; + } + } + return null; + } + return null; +} + +/** + * Extract transactions from markdown using smartagent DualAgentOrchestrator + * Validates JSON and retries if invalid */ async function extractTransactionsFromMarkdown(markdown: string, queryId: string): Promise { const startTime = Date.now(); - console.log(` [${queryId}] Statement: ${markdown.length} chars, Prompt: ${JSON_EXTRACTION_PROMPT.length} chars`); + console.log(` [${queryId}] Statement: ${markdown.length} chars`); - const response = await fetch(`${OLLAMA_URL}/api/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: EXTRACTION_MODEL, - messages: [ - { role: 'user', content: 'Hi there, how are you?' }, - { role: 'assistant', content: 'Good, how can I help you today?' }, - { role: 'user', content: `Here is a bank statement document:\n\n${markdown}` }, - { role: 'assistant', content: 'I have read the bank statement document you provided. I can see all the transaction data. What would you like me to do with it?' }, - { role: 'user', content: JSON_EXTRACTION_PROMPT }, - ], - stream: true, - options: { - num_ctx: 32768, // Larger context for long statements + thinking - temperature: 0, // Deterministic for JSON extraction - }, - }), - signal: AbortSignal.timeout(600000), // 10 minute timeout - }); + // Build the extraction task with document context + const taskPrompt = `Extract all transactions from this bank statement document and output ONLY the JSON array: - if (!response.ok) { - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - console.log(` [${queryId}] ERROR: ${response.status} (${elapsed}s)`); - throw new Error(`Ollama API error: ${response.status}`); - } +${markdown} - // Stream the response - let content = ''; - let thinkingContent = ''; - let thinkingStarted = false; - let outputStarted = false; - const reader = response.body!.getReader(); - const decoder = new TextDecoder(); +${JSON_EXTRACTION_PROMPT} + +Before completing, validate your JSON using the json.validate tool: + + + json + validate + {"jsonString": "YOUR_JSON_ARRAY_HERE"} + + +Only complete after validation passes. Output the final JSON array in tags.`; try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; + const result = await orchestrator.run(taskPrompt); + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`); - const chunk = decoder.decode(value, { stream: true }); + // Try to parse JSON from result + let jsonData: unknown[] | null = null; + let responseText = result.result || ''; - // Each line is a JSON object - for (const line of chunk.split('\n').filter(l => l.trim())) { - try { - const json = JSON.parse(line); + if (result.success && responseText) { + jsonData = tryExtractJson(responseText); + } - // Stream thinking tokens - const thinking = json.message?.thinking || ''; - if (thinking) { - if (!thinkingStarted) { - process.stdout.write(` [${queryId}] THINKING: `); - thinkingStarted = true; - } - process.stdout.write(thinking); - thinkingContent += thinking; - } - - // Stream content tokens - const token = json.message?.content || ''; - if (token) { - if (!outputStarted) { - if (thinkingStarted) process.stdout.write('\n'); - process.stdout.write(` [${queryId}] OUTPUT: `); - outputStarted = true; - } - process.stdout.write(token); - content += token; - } - } catch { - // Ignore parse errors for partial chunks - } + // Fallback: try parsing from history + if (!jsonData && result.history?.length > 0) { + const lastMessage = result.history[result.history.length - 1]; + if (lastMessage?.content) { + responseText = lastMessage.content; + jsonData = tryExtractJson(responseText); } } - } finally { - if (thinkingStarted || outputStarted) process.stdout.write('\n'); + + if (!jsonData) { + console.log(` [${queryId}] Failed to parse JSON`); + return []; + } + + // Convert to transactions + const txs = jsonData.map((tx: any) => ({ + date: String(tx.date || ''), + counterparty: String(tx.counterparty || tx.description || ''), + amount: parseAmount(tx.amount), + })); + console.log(` [${queryId}] Parsed ${txs.length} transactions`); + return txs; + } catch (error) { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`); + throw error; } - - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - console.log(` [${queryId}] Done: ${thinkingContent.length} thinking chars, ${content.length} output chars (${elapsed}s)`); - - return parseJsonResponse(content, queryId); } /** @@ -591,6 +621,53 @@ tap.test('Stage 2: Setup Ollama + GPT-OSS 20B', async () => { const extractionOk = await ensureExtractionModel(); expect(extractionOk).toBeTrue(); + + // Initialize SmartAi and DualAgentOrchestrator + console.log(' [SmartAgent] Starting SmartAi...'); + await smartAi.start(); + + console.log(' [SmartAgent] Creating DualAgentOrchestrator...'); + orchestrator = new DualAgentOrchestrator({ + smartAiInstance: smartAi, + defaultProvider: 'ollama', + guardianPolicyPrompt: ` + JSON EXTRACTION POLICY: + - APPROVE all JSON extraction tasks + - APPROVE all json.validate tool calls + - This is a read-only operation - no file system or network access needed + - The task is to extract structured transaction data from document text + `, + driverSystemMessage: `You are a precise JSON extraction assistant. Your only job is to extract transaction data from bank statements. + +CRITICAL RULES: +1. Output valid JSON array with the exact format requested +2. Amounts should be NEGATIVE for debits/withdrawals, POSITIVE for credits/deposits +3. IMPORTANT: Before completing, validate your JSON using the json.validate tool: + + + json + validate + {"jsonString": "YOUR_JSON_ARRAY"} + + +4. Only complete after validation passes + +When done, wrap your JSON array in tags.`, + maxIterations: 5, + // Enable streaming for real-time progress visibility + onToken: (token, source) => { + if (source === 'driver') { + process.stdout.write(token); + } + }, + }); + + // Register JsonValidatorTool for self-validation + orchestrator.registerTool(new JsonValidatorTool()); + + console.log(' [SmartAgent] Starting orchestrator...'); + await orchestrator.start(); + console.log(' [SmartAgent] Ready for extraction'); }); let passedCount = 0; @@ -642,11 +719,19 @@ for (const tc of testCases) { } tap.test('Summary', async () => { + // Cleanup orchestrator and SmartAi + if (orchestrator) { + console.log('\n [SmartAgent] Stopping orchestrator...'); + await orchestrator.stop(); + } + console.log(' [SmartAgent] Stopping SmartAi...'); + await smartAi.stop(); + console.log(`\n======================================================`); - console.log(` Bank Statement Summary (Nanonets + GPT-OSS 20B Sequential)`); + console.log(` Bank Statement Summary (Nanonets + SmartAgent)`); console.log(`======================================================`); console.log(` Stage 1: Nanonets-OCR-s (document -> markdown)`); - console.log(` Stage 2: GPT-OSS 20B (markdown -> JSON)`); + console.log(` Stage 2: GPT-OSS 20B + SmartAgent (markdown -> JSON)`); console.log(` Passed: ${passedCount}/${testCases.length}`); console.log(` Failed: ${failedCount}/${testCases.length}`); console.log(`======================================================\n`); diff --git a/test/test.invoices.minicpm.ts b/test/test.invoices.minicpm.ts index a319f36..0d0dbc1 100644 --- a/test/test.invoices.minicpm.ts +++ b/test/test.invoices.minicpm.ts @@ -1,10 +1,10 @@ /** - * Invoice extraction test using MiniCPM-V (visual extraction) + * Invoice extraction test using MiniCPM-V via smartagent DualAgentOrchestrator * - * Consensus approach: - * 1. Pass 1: Fast JSON extraction - * 2. Pass 2: Confirm with thinking enabled - * 3. If mismatch: repeat until consensus or max attempts + * Uses vision-capable orchestrator with JsonValidatorTool for self-validation: + * 1. Pass images to the orchestrator + * 2. Driver extracts invoice data and validates JSON before completing + * 3. If validation fails, driver retries within the same task */ import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as fs from 'fs'; @@ -12,6 +12,8 @@ import * as path from 'path'; import { execSync } from 'child_process'; import * as os from 'os'; import { ensureMiniCpm } from './helpers/docker.js'; +import { SmartAi } from '@push.rocks/smartai'; +import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent'; const OLLAMA_URL = 'http://localhost:11434'; const MODEL = 'openbmb/minicpm-v4.5:q8_0'; @@ -26,6 +28,10 @@ interface IInvoice { total_amount: number; } +// SmartAi instance and orchestrator (initialized in setup) +let smartAi: SmartAi; +let orchestrator: DualAgentOrchestrator; + /** * Convert PDF to PNG images using ImageMagick */ @@ -54,7 +60,9 @@ function convertPdfToImages(pdfPath: string): string[] { } } -const JSON_PROMPT = `Extract invoice data from this image. Return ONLY a JSON object with these exact fields: +const EXTRACTION_PROMPT = `Extract invoice data from the provided image(s). + +IMPORTANT: You must output a valid JSON object with these exact fields: { "invoice_number": "the invoice number (not VAT ID, not customer ID)", "invoice_date": "YYYY-MM-DD format", @@ -64,150 +72,16 @@ const JSON_PROMPT = `Extract invoice data from this image. Return ONLY a JSON ob "vat_amount": 0.00, "total_amount": 0.00 } -Return only the JSON, no explanation.`; -/** - * Query MiniCPM-V for JSON output (fast, no thinking) with streaming - */ -async function queryJsonFast(images: string[]): Promise { - const startTime = Date.now(); - process.stdout.write(` [Fast] `); +Before completing, use the json.validate tool to verify your output is valid JSON with all required fields. - const response = await fetch(`${OLLAMA_URL}/api/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: MODEL, - messages: [{ - role: 'user', - content: JSON_PROMPT, - images: images, - }], - stream: true, - options: { - num_ctx: 32768, - num_predict: 1000, - temperature: 0.1, - }, - }), - }); + + json + validate + {"jsonString": "YOUR_JSON_HERE", "requiredFields": ["invoice_number", "invoice_date", "vendor_name", "currency", "net_amount", "vat_amount", "total_amount"]} + - if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); - } - - let content = ''; - const reader = response.body!.getReader(); - const decoder = new TextDecoder(); - - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - const chunk = decoder.decode(value, { stream: true }); - for (const line of chunk.split('\n').filter(l => l.trim())) { - try { - const json = JSON.parse(line); - const token = json.message?.content || ''; - if (token) { - process.stdout.write(token); - content += token; - } - } catch { - // Ignore parse errors for partial chunks - } - } - } - } finally { - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - process.stdout.write(` (${elapsed}s)\n`); - } - - return content.trim(); -} - -/** - * Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate) with streaming - */ -async function queryJsonWithThinking(images: string[]): Promise { - const startTime = Date.now(); - process.stdout.write(` [Think] `); - - const response = await fetch(`${OLLAMA_URL}/api/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: MODEL, - messages: [{ - role: 'user', - content: `Think carefully about this invoice image, then ${JSON_PROMPT}`, - images: images, - }], - stream: true, - options: { - num_ctx: 32768, - num_predict: 2000, - temperature: 0.1, - }, - }), - }); - - if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); - } - - let content = ''; - let thinkingContent = ''; - let thinkingStarted = false; - let outputStarted = false; - const reader = response.body!.getReader(); - const decoder = new TextDecoder(); - - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - const chunk = decoder.decode(value, { stream: true }); - for (const line of chunk.split('\n').filter(l => l.trim())) { - try { - const json = JSON.parse(line); - - // Stream thinking tokens - const thinking = json.message?.thinking || ''; - if (thinking) { - if (!thinkingStarted) { - process.stdout.write(`THINKING: `); - thinkingStarted = true; - } - process.stdout.write(thinking); - thinkingContent += thinking; - } - - // Stream content tokens - const token = json.message?.content || ''; - if (token) { - if (!outputStarted) { - if (thinkingStarted) process.stdout.write('\n [Think] '); - process.stdout.write(`OUTPUT: `); - outputStarted = true; - } - process.stdout.write(token); - content += token; - } - } catch { - // Ignore parse errors for partial chunks - } - } - } - } finally { - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); - process.stdout.write(` (${elapsed}s)\n`); - } - - return content.trim(); -} +Only complete the task after validation passes. Output the final JSON in tags.`; /** * Parse amount from string (handles European format) @@ -273,9 +147,31 @@ function extractCurrency(s: string | undefined): string { } /** - * Extract JSON from response (handles markdown code blocks) + * Extract JSON from response (handles markdown code blocks and task_complete tags) */ function extractJsonFromResponse(response: string): Record | null { + // Try to find JSON in task_complete tags + const completeMatch = response.match(/([\s\S]*?)<\/task_complete>/); + if (completeMatch) { + const content = completeMatch[1].trim(); + // Try to find JSON in the content + const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/); + const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : content; + try { + return JSON.parse(jsonStr); + } catch { + // Try to find JSON object pattern + const jsonMatch = jsonStr.match(/\{[\s\S]*\}/); + if (jsonMatch) { + try { + return JSON.parse(jsonMatch[0]); + } catch { + return null; + } + } + } + } + // Try to find JSON in markdown code block const codeBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/); const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim(); @@ -315,76 +211,27 @@ function parseJsonToInvoice(response: string): IInvoice | null { } /** - * Compare two invoices for consensus (key fields must match) - */ -function invoicesMatch(a: IInvoice, b: IInvoice): boolean { - const numMatch = a.invoice_number.toLowerCase() === b.invoice_number.toLowerCase(); - const dateMatch = a.invoice_date === b.invoice_date; - const totalMatch = Math.abs(a.total_amount - b.total_amount) < 0.02; - return numMatch && dateMatch && totalMatch; -} - -/** - * Extract invoice data using consensus approach: - * 1. Pass 1: Fast JSON extraction - * 2. Pass 2: Confirm with thinking enabled - * 3. If mismatch: repeat until consensus or max 5 attempts + * Extract invoice data using smartagent orchestrator with vision */ async function extractInvoiceFromImages(images: string[]): Promise { - console.log(` [Vision] Processing ${images.length} page(s) with ${MODEL} (consensus)`); + console.log(` [Vision] Processing ${images.length} page(s) with smartagent DualAgentOrchestrator`); - const MAX_ATTEMPTS = 5; - let attempt = 0; + const startTime = Date.now(); - while (attempt < MAX_ATTEMPTS) { - attempt++; - console.log(` [Attempt ${attempt}/${MAX_ATTEMPTS}]`); + const result = await orchestrator.run(EXTRACTION_PROMPT, { images }); - // PASS 1: Fast JSON extraction - console.log(` [Pass 1] Fast extraction...`); - const fastResponse = await queryJsonFast(images); - const fastInvoice = parseJsonToInvoice(fastResponse); + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(` [Vision] Completed in ${elapsed}s (${result.iterations} iterations, status: ${result.status})`); - if (!fastInvoice) { - console.log(` [Pass 1] JSON parsing failed, retrying...`); - continue; - } - console.log(` [Pass 1] Result: ${fastInvoice.invoice_number} | ${fastInvoice.invoice_date} | ${fastInvoice.total_amount} ${fastInvoice.currency}`); + const invoice = parseJsonToInvoice(result.result); - // PASS 2: Confirm with thinking - console.log(` [Pass 2] Thinking confirmation...`); - const thinkResponse = await queryJsonWithThinking(images); - const thinkInvoice = parseJsonToInvoice(thinkResponse); - - if (!thinkInvoice) { - console.log(` [Pass 2] JSON parsing failed, retrying...`); - continue; - } - console.log(` [Pass 2] Result: ${thinkInvoice.invoice_number} | ${thinkInvoice.invoice_date} | ${thinkInvoice.total_amount} ${thinkInvoice.currency}`); - - // Check consensus - if (invoicesMatch(fastInvoice, thinkInvoice)) { - console.log(` [Consensus] MATCH - using result`); - return thinkInvoice; // Prefer thinking result - } - - console.log(` [Consensus] MISMATCH - repeating...`); - console.log(` Fast: ${fastInvoice.invoice_number} | ${fastInvoice.invoice_date} | ${fastInvoice.total_amount}`); - console.log(` Think: ${thinkInvoice.invoice_number} | ${thinkInvoice.invoice_date} | ${thinkInvoice.total_amount}`); + if (invoice) { + console.log(` [Result] ${invoice.invoice_number} | ${invoice.invoice_date} | ${invoice.total_amount} ${invoice.currency}`); + return invoice; } - // Max attempts reached - do one final thinking pass and use that - console.log(` [Final] Max attempts reached, using final thinking pass`); - const finalResponse = await queryJsonWithThinking(images); - const finalInvoice = parseJsonToInvoice(finalResponse); - - if (finalInvoice) { - console.log(` [Final] Result: ${finalInvoice.invoice_number} | ${finalInvoice.invoice_date} | ${finalInvoice.total_amount} ${finalInvoice.currency}`); - return finalInvoice; - } - - // Return empty invoice if all else fails - console.log(` [Final] All parsing failed, returning empty`); + // Return empty invoice if parsing failed + console.log(` [Result] Parsing failed, returning empty invoice`); return { invoice_number: '', invoice_date: '', @@ -493,6 +340,79 @@ tap.test('setup: ensure Docker containers are running', async () => { console.log('\n[Setup] All containers ready!\n'); }); +tap.test('setup: initialize smartagent orchestrator', async () => { + console.log('[Setup] Initializing SmartAi and DualAgentOrchestrator...'); + + smartAi = new SmartAi({ + ollama: { + baseUrl: OLLAMA_URL, + model: MODEL, + defaultOptions: { + num_ctx: 32768, + temperature: 0.1, + }, + defaultTimeout: 300000, // 5 minutes for vision tasks + }, + }); + + await smartAi.start(); + + orchestrator = new DualAgentOrchestrator({ + smartAiInstance: smartAi, + defaultProvider: 'ollama', + guardianPolicyPrompt: `You are a Guardian agent overseeing invoice extraction tasks. + +APPROVE all tool calls that: +- Use the json.validate action to verify JSON output +- Are reasonable attempts to complete the extraction task + +REJECT tool calls that: +- Attempt to access external resources +- Try to execute arbitrary code +- Are clearly unrelated to invoice extraction`, + driverSystemMessage: `You are an AI assistant that extracts invoice data from images. + +Your task is to analyze invoice images and extract structured data. +You have access to a json.validate tool to verify your JSON output. + +IMPORTANT: Always validate your JSON before completing the task. + +## Tool Usage Format +When you need to validate JSON, output: + + + json + validate + {"jsonString": "YOUR_JSON", "requiredFields": ["invoice_number", "invoice_date", "vendor_name", "currency", "net_amount", "vat_amount", "total_amount"]} + + +## Completion Format +After validation passes, complete the task: + + +{"invoice_number": "...", "invoice_date": "YYYY-MM-DD", ...} +`, + maxIterations: 5, + maxConsecutiveRejections: 3, + onToken: (token, source) => { + if (source === 'driver') { + process.stdout.write(token); + } + }, + onProgress: (event) => { + if (event.logLevel === 'error') { + console.error(event.logMessage); + } + }, + }); + + // Register the JsonValidatorTool + orchestrator.registerTool(new JsonValidatorTool()); + + await orchestrator.start(); + console.log('[Setup] Orchestrator initialized!\n'); +}); + tap.test('should have MiniCPM-V model loaded', async () => { const response = await fetch(`${OLLAMA_URL}/api/tags`); const data = await response.json(); @@ -501,7 +421,7 @@ tap.test('should have MiniCPM-V model loaded', async () => { }); const testCases = findTestCases(); -console.log(`\nFound ${testCases.length} invoice test cases (MiniCPM-V)\n`); +console.log(`\nFound ${testCases.length} invoice test cases (smartagent + MiniCPM-V)\n`); let passedCount = 0; let failedCount = 0; @@ -538,6 +458,13 @@ for (const testCase of testCases) { }); } +tap.test('cleanup: stop orchestrator', async () => { + if (orchestrator) { + await orchestrator.stop(); + } + console.log('[Cleanup] Orchestrator stopped'); +}); + tap.test('summary', async () => { const totalInvoices = testCases.length; const accuracy = totalInvoices > 0 ? (passedCount / totalInvoices) * 100 : 0; @@ -545,9 +472,10 @@ tap.test('summary', async () => { const avgTimeSec = processingTimes.length > 0 ? totalTimeMs / processingTimes.length / 1000 : 0; console.log(`\n========================================`); - console.log(` Invoice Extraction Summary (${MODEL})`); + console.log(` Invoice Extraction Summary`); + console.log(` (smartagent + ${MODEL})`); console.log(`========================================`); - console.log(` Method: Consensus (fast + thinking)`); + console.log(` Method: DualAgentOrchestrator with vision`); console.log(` Passed: ${passedCount}/${totalInvoices}`); console.log(` Failed: ${failedCount}/${totalInvoices}`); console.log(` Accuracy: ${accuracy.toFixed(1)}%`); diff --git a/test/test.invoices.nanonets.ts b/test/test.invoices.nanonets.ts index 107cc5a..f0c8dae 100644 --- a/test/test.invoices.nanonets.ts +++ b/test/test.invoices.nanonets.ts @@ -13,7 +13,7 @@ import { execSync } from 'child_process'; import * as os from 'os'; import { ensureNanonetsOcr, ensureMiniCpm, isContainerRunning } from './helpers/docker.js'; import { SmartAi } from '@push.rocks/smartai'; -import { DualAgentOrchestrator } from '@push.rocks/smartagent'; +import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent'; const NANONETS_URL = 'http://localhost:8000/v1'; const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B'; @@ -76,11 +76,11 @@ Page numbers should be wrapped in brackets. Ex: 14.`; const JSON_EXTRACTION_PROMPT = `Extract key fields from the invoice. Return ONLY valid JSON. WHERE TO FIND DATA: -- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer") +- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* . - net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag") RULES: -1. invoice_number: Extract ONLY the value (e.g., "R0015632540"), NOT the label "Invoice no." +1. Use common sense. 2. invoice_date: Convert to YYYY-MM-DD format (e.g., "14/04/2022" → "2022-04-14") 3. vendor_name: The company issuing the invoice 4. currency: EUR, USD, or GBP @@ -643,18 +643,27 @@ tap.test('Stage 2: Setup Ollama + GPT-OSS 20B', async () => { guardianPolicyPrompt: ` JSON EXTRACTION POLICY: - APPROVE all JSON extraction tasks + - APPROVE all json.validate tool calls - This is a read-only operation - no file system or network access needed - The task is to extract structured data from document text `, driverSystemMessage: `You are a precise JSON extraction assistant. Your only job is to extract invoice data from documents. CRITICAL RULES: -1. Output ONLY valid JSON - no markdown, no explanations, no thinking -2. Use the exact format requested -3. If you cannot find a value, use empty string "" or 0 for numbers +1. Output valid JSON with the exact format requested +2. If you cannot find a value, use empty string "" or 0 for numbers +3. IMPORTANT: Before completing, validate your JSON using the json.validate tool: + + + json + validate + {"jsonString": "YOUR_JSON", "requiredFields": ["invoice_number", "invoice_date", "vendor_name", "currency", "net_amount", "vat_amount", "total_amount"]} + + +4. Only complete after validation passes When done, wrap your JSON in tags.`, - maxIterations: 3, + maxIterations: 5, // Enable streaming for real-time progress visibility onToken: (token, source) => { if (source === 'driver') { @@ -663,7 +672,9 @@ When done, wrap your JSON in tags.`, }, }); - // No tools needed for JSON extraction + // Register JsonValidatorTool for self-validation + orchestrator.registerTool(new JsonValidatorTool()); + console.log(' [SmartAgent] Starting orchestrator...'); await orchestrator.start(); console.log(' [SmartAgent] Ready for extraction');