/** * Bank statement extraction using MiniCPM-V via smartagent DualAgentOrchestrator * * Uses vision-capable orchestrator with JsonValidatorTool for self-validation: * 1. Process each page with the orchestrator * 2. Driver extracts transactions and validates JSON before completing * 3. Streaming output during extraction */ import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; import * as os from 'os'; import { ensureMiniCpm } from './helpers/docker.js'; import { SmartAi } from '@push.rocks/smartai'; import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent'; const OLLAMA_URL = 'http://localhost:11434'; const MODEL = 'openbmb/minicpm-v4.5:q8_0'; interface ITransaction { date: string; counterparty: string; amount: number; } // SmartAi instance and orchestrator (initialized in setup) let smartAi: SmartAi; let orchestrator: DualAgentOrchestrator; /** * Convert PDF to PNG images using ImageMagick */ function convertPdfToImages(pdfPath: string): string[] { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-')); const outputPattern = path.join(tempDir, 'page-%d.png'); try { execSync( `convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`, { stdio: 'pipe' } ); const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort(); const images: string[] = []; for (const file of files) { const imagePath = path.join(tempDir, file); const imageData = fs.readFileSync(imagePath); images.push(imageData.toString('base64')); } return images; } finally { fs.rmSync(tempDir, { recursive: true, force: true }); } } const EXTRACTION_PROMPT = `Extract ALL transactions from this bank statement page as a JSON array. IMPORTANT RULES: 1. Each transaction has: date, counterparty (description), and an amount 2. Amount is NEGATIVE for money going OUT (debits, payments, withdrawals) 3. Amount is POSITIVE for money coming IN (credits, deposits, refunds) 4. Date format: YYYY-MM-DD 5. Do NOT include: opening balance, closing balance, subtotals, headers, or summary rows 6. Only include actual transactions with a specific date and amount Before completing, validate your JSON output: json validate {"jsonString": "YOUR_JSON_ARRAY_HERE"} Output format (must be a valid JSON array): [ {"date": "2021-06-01", "counterparty": "COMPANY NAME", "amount": -25.99}, {"date": "2021-06-02", "counterparty": "DEPOSIT FROM", "amount": 100.00} ] Only complete after validation passes. Output the final JSON array in tags.`; /** * Parse amount from various formats */ function parseAmount(value: unknown): number { if (typeof value === 'number') return value; if (typeof value !== 'string') return 0; let s = value.replace(/[€$£\s]/g, '').replace('−', '-').replace('–', '-'); // European format: comma is decimal if (s.includes(',') && s.indexOf(',') > s.lastIndexOf('.')) { s = s.replace(/\./g, '').replace(',', '.'); } else { s = s.replace(/,/g, ''); } return parseFloat(s) || 0; } /** * Extract JSON from response (handles markdown code blocks and task_complete tags) */ function extractJsonFromResponse(response: string): unknown[] | null { // Try to find JSON in task_complete tags const completeMatch = response.match(/([\s\S]*?)<\/task_complete>/); if (completeMatch) { const content = completeMatch[1].trim(); // Try to find JSON in the content const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/); const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : content; try { const parsed = JSON.parse(jsonStr); if (Array.isArray(parsed)) return parsed; } catch { // Try to find JSON array pattern const jsonMatch = jsonStr.match(/\[[\s\S]*\]/); if (jsonMatch) { try { const parsed = JSON.parse(jsonMatch[0]); if (Array.isArray(parsed)) return parsed; } catch { return null; } } } } // Try to find JSON in markdown code block const codeBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/); const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim(); try { const parsed = JSON.parse(jsonStr); if (Array.isArray(parsed)) return parsed; } catch { // Try to find JSON array pattern const jsonMatch = jsonStr.match(/\[[\s\S]*\]/); if (jsonMatch) { try { const parsed = JSON.parse(jsonMatch[0]); if (Array.isArray(parsed)) return parsed; } catch { return null; } } } return null; } /** * Parse JSON response into transactions */ function parseJsonToTransactions(response: string): ITransaction[] { const parsed = extractJsonFromResponse(response); if (!parsed || !Array.isArray(parsed)) return []; return parsed.map((tx: any) => ({ date: String(tx.date || ''), counterparty: String(tx.counterparty || tx.description || ''), amount: parseAmount(tx.amount), })); } /** * Extract transactions from a single page using smartagent orchestrator */ async function extractTransactionsFromPage(image: string, pageNum: number): Promise { console.log(`\n ======== Page ${pageNum} ========`); const startTime = Date.now(); const result = await orchestrator.run(EXTRACTION_PROMPT, { images: [image] }); const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(`\n [Page ${pageNum}] Completed in ${elapsed}s (${result.iterations} iterations, status: ${result.status})`); const transactions = parseJsonToTransactions(result.result); console.log(` [Page ${pageNum}] Extracted ${transactions.length} transactions:`); for (let i = 0; i < Math.min(transactions.length, 10); i++) { const tx = transactions[i]; console.log(` ${(i + 1).toString().padStart(2)}. ${tx.date} | ${tx.counterparty.substring(0, 30).padEnd(30)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`); } if (transactions.length > 10) { console.log(` ... and ${transactions.length - 10} more transactions`); } return transactions; } /** * Extract all transactions from bank statement */ async function extractTransactions(images: string[]): Promise { console.log(` [Vision] Processing ${images.length} page(s) with smartagent DualAgentOrchestrator`); const allTransactions: ITransaction[] = []; for (let i = 0; i < images.length; i++) { const pageTransactions = await extractTransactionsFromPage(images[i], i + 1); allTransactions.push(...pageTransactions); } console.log(` [Vision] Total: ${allTransactions.length} transactions`); return allTransactions; } /** * Compare extracted transactions against expected */ function compareTransactions( extracted: ITransaction[], expected: ITransaction[] ): { matches: number; total: number; errors: string[]; variations: string[] } { const errors: string[] = []; const variations: string[] = []; let matches = 0; for (let i = 0; i < expected.length; i++) { const exp = expected[i]; const ext = extracted[i]; if (!ext) { errors.push(`Missing transaction ${i}: ${exp.date} ${exp.counterparty}`); continue; } const dateMatch = ext.date === exp.date; const amountMatch = Math.abs(ext.amount - exp.amount) < 0.01; if (dateMatch && amountMatch) { matches++; // Track counterparty variations (date and amount match but name differs) if (ext.counterparty !== exp.counterparty) { variations.push( `[${i}] "${exp.counterparty}" → "${ext.counterparty}"` ); } } else { errors.push( `Mismatch at ${i}: expected ${exp.date}/${exp.amount}, got ${ext.date}/${ext.amount}` ); } } if (extracted.length > expected.length) { errors.push(`Extra transactions: ${extracted.length - expected.length}`); } return { matches, total: expected.length, errors, variations }; } /** * Find all test cases (PDF + JSON pairs) in .nogit/ */ function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> { const testDir = path.join(process.cwd(), '.nogit'); if (!fs.existsSync(testDir)) { return []; } const files = fs.readdirSync(testDir); const pdfFiles = files.filter((f: string) => f.endsWith('.pdf')); const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = []; for (const pdf of pdfFiles) { const baseName = pdf.replace('.pdf', ''); const jsonFile = `${baseName}.json`; if (files.includes(jsonFile)) { testCases.push({ name: baseName, pdfPath: path.join(testDir, pdf), jsonPath: path.join(testDir, jsonFile), }); } } return testCases.sort((a, b) => a.name.localeCompare(b.name)); } // Tests tap.test('setup: ensure Docker containers are running', async () => { console.log('\n[Setup] Checking Docker containers...\n'); const minicpmOk = await ensureMiniCpm(); expect(minicpmOk).toBeTrue(); console.log('\n[Setup] All containers ready!\n'); }); tap.test('setup: initialize smartagent orchestrator', async () => { console.log('[Setup] Initializing SmartAi and DualAgentOrchestrator...'); smartAi = new SmartAi({ ollama: { baseUrl: OLLAMA_URL, model: MODEL, defaultOptions: { num_ctx: 32768, num_predict: 4000, temperature: 0.1, }, defaultTimeout: 300000, // 5 minutes for vision tasks }, }); await smartAi.start(); orchestrator = new DualAgentOrchestrator({ smartAiInstance: smartAi, defaultProvider: 'ollama', guardianPolicyPrompt: `You are a Guardian agent overseeing bank statement extraction tasks. APPROVE all tool calls that: - Use the json.validate action to verify JSON output - Are reasonable attempts to complete the extraction task REJECT tool calls that: - Attempt to access external resources - Try to execute arbitrary code - Are clearly unrelated to bank statement extraction`, driverSystemMessage: `You are an AI assistant that extracts bank transactions from statement images. Your task is to analyze bank statement images and extract transaction data. You have access to a json.validate tool to verify your JSON output. IMPORTANT: Always validate your JSON before completing the task. ## Tool Usage Format When you need to validate JSON, output: json validate {"jsonString": "YOUR_JSON_ARRAY"} ## Completion Format After validation passes, complete the task: [{"date": "YYYY-MM-DD", "counterparty": "...", "amount": -123.45}, ...] `, maxIterations: 5, maxConsecutiveRejections: 3, onToken: (token, source) => { if (source === 'driver') { process.stdout.write(token); } }, onProgress: (event) => { if (event.logLevel === 'error') { console.error(event.logMessage); } }, }); // Register the JsonValidatorTool orchestrator.registerTool(new JsonValidatorTool()); await orchestrator.start(); console.log('[Setup] Orchestrator initialized!\n'); }); tap.test('should have MiniCPM-V model loaded', async () => { const response = await fetch(`${OLLAMA_URL}/api/tags`); const data = await response.json(); const modelNames = data.models.map((m: { name: string }) => m.name); expect(modelNames.some((name: string) => name.includes('minicpm'))).toBeTrue(); }); const testCases = findTestCases(); console.log(`\nFound ${testCases.length} bank statement test cases (smartagent + MiniCPM-V)\n`); let passedCount = 0; let failedCount = 0; for (const testCase of testCases) { tap.test(`should extract: ${testCase.name}`, async () => { const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8')); console.log(`\n=== ${testCase.name} ===`); console.log(`Expected: ${expected.length} transactions`); const images = convertPdfToImages(testCase.pdfPath); console.log(` Pages: ${images.length}`); const extracted = await extractTransactions(images); console.log(` Extracted: ${extracted.length} transactions`); const result = compareTransactions(extracted, expected); const perfectMatch = result.matches === result.total && extracted.length === expected.length; if (perfectMatch) { passedCount++; console.log(` Result: PASS (${result.matches}/${result.total})`); } else { failedCount++; console.log(` Result: FAIL (${result.matches}/${result.total})`); result.errors.slice(0, 10).forEach((e) => console.log(` - ${e}`)); } // Log counterparty variations (names that differ but date/amount matched) if (result.variations.length > 0) { console.log(` Counterparty variations (${result.variations.length}):`); result.variations.slice(0, 5).forEach((v) => console.log(` ${v}`)); if (result.variations.length > 5) { console.log(` ... and ${result.variations.length - 5} more variations`); } } expect(result.matches).toEqual(result.total); expect(extracted.length).toEqual(expected.length); }); } tap.test('cleanup: stop orchestrator', async () => { if (orchestrator) { await orchestrator.stop(); } console.log('[Cleanup] Orchestrator stopped'); }); tap.test('summary', async () => { const total = testCases.length; console.log(`\n======================================================`); console.log(` Bank Statement Summary`); console.log(` (smartagent + ${MODEL})`); console.log(`======================================================`); console.log(` Method: DualAgentOrchestrator with vision`); console.log(` Passed: ${passedCount}/${total}`); console.log(` Failed: ${failedCount}/${total}`); console.log(`======================================================\n`); }); export default tap.start();