Files
ht-docker-ai/test/test.bankstatements.minicpm.ts

437 lines
14 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Bank statement extraction using MiniCPM-V via smartagent DualAgentOrchestrator
*
* Uses vision-capable orchestrator with JsonValidatorTool for self-validation:
* 1. Process each page with the orchestrator
* 2. Driver extracts transactions and validates JSON before completing
* 3. Streaming output during extraction
*/
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as fs from 'fs';
import * as path from 'path';
import { execSync } from 'child_process';
import * as os from 'os';
import { ensureMiniCpm } from './helpers/docker.js';
import { SmartAi } from '@push.rocks/smartai';
import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent';
const OLLAMA_URL = 'http://localhost:11434';
const MODEL = 'openbmb/minicpm-v4.5:q8_0';
interface ITransaction {
date: string;
counterparty: string;
amount: number;
}
// SmartAi instance and orchestrator (initialized in setup)
let smartAi: SmartAi;
let orchestrator: DualAgentOrchestrator;
/**
* Convert PDF to PNG images using ImageMagick
*/
function convertPdfToImages(pdfPath: string): string[] {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
const outputPattern = path.join(tempDir, 'page-%d.png');
try {
execSync(
`convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
{ stdio: 'pipe' }
);
const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort();
const images: string[] = [];
for (const file of files) {
const imagePath = path.join(tempDir, file);
const imageData = fs.readFileSync(imagePath);
images.push(imageData.toString('base64'));
}
return images;
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
}
}
const EXTRACTION_PROMPT = `Extract ALL transactions from this bank statement page as a JSON array.
IMPORTANT RULES:
1. Each transaction has: date, counterparty (description), and an amount
2. Amount is NEGATIVE for money going OUT (debits, payments, withdrawals)
3. Amount is POSITIVE for money coming IN (credits, deposits, refunds)
4. Date format: YYYY-MM-DD
5. Do NOT include: opening balance, closing balance, subtotals, headers, or summary rows
6. Only include actual transactions with a specific date and amount
Before completing, validate your JSON output:
<tool_call>
<tool>json</tool>
<action>validate</action>
<params>{"jsonString": "YOUR_JSON_ARRAY_HERE"}</params>
</tool_call>
Output format (must be a valid JSON array):
[
{"date": "2021-06-01", "counterparty": "COMPANY NAME", "amount": -25.99},
{"date": "2021-06-02", "counterparty": "DEPOSIT FROM", "amount": 100.00}
]
Only complete after validation passes. Output the final JSON array in <task_complete> tags.`;
/**
* Parse amount from various formats
*/
function parseAmount(value: unknown): number {
if (typeof value === 'number') return value;
if (typeof value !== 'string') return 0;
let s = value.replace(/[€$£\s]/g, '').replace('', '-').replace('', '-');
// European format: comma is decimal
if (s.includes(',') && s.indexOf(',') > s.lastIndexOf('.')) {
s = s.replace(/\./g, '').replace(',', '.');
} else {
s = s.replace(/,/g, '');
}
return parseFloat(s) || 0;
}
/**
* Extract JSON from response (handles markdown code blocks and task_complete tags)
*/
function extractJsonFromResponse(response: string): unknown[] | null {
// Try to find JSON in task_complete tags
const completeMatch = response.match(/<task_complete>([\s\S]*?)<\/task_complete>/);
if (completeMatch) {
const content = completeMatch[1].trim();
// Try to find JSON in the content
const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : content;
try {
const parsed = JSON.parse(jsonStr);
if (Array.isArray(parsed)) return parsed;
} catch {
// Try to find JSON array pattern
const jsonMatch = jsonStr.match(/\[[\s\S]*\]/);
if (jsonMatch) {
try {
const parsed = JSON.parse(jsonMatch[0]);
if (Array.isArray(parsed)) return parsed;
} catch {
return null;
}
}
}
}
// Try to find JSON in markdown code block
const codeBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/);
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : response.trim();
try {
const parsed = JSON.parse(jsonStr);
if (Array.isArray(parsed)) return parsed;
} catch {
// Try to find JSON array pattern
const jsonMatch = jsonStr.match(/\[[\s\S]*\]/);
if (jsonMatch) {
try {
const parsed = JSON.parse(jsonMatch[0]);
if (Array.isArray(parsed)) return parsed;
} catch {
return null;
}
}
}
return null;
}
/**
* Parse JSON response into transactions
*/
function parseJsonToTransactions(response: string): ITransaction[] {
const parsed = extractJsonFromResponse(response);
if (!parsed || !Array.isArray(parsed)) return [];
return parsed.map((tx: any) => ({
date: String(tx.date || ''),
counterparty: String(tx.counterparty || tx.description || ''),
amount: parseAmount(tx.amount),
}));
}
/**
* Extract transactions from a single page using smartagent orchestrator
*/
async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
console.log(`\n ======== Page ${pageNum} ========`);
const startTime = Date.now();
const result = await orchestrator.run(EXTRACTION_PROMPT, { images: [image] });
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`\n [Page ${pageNum}] Completed in ${elapsed}s (${result.iterations} iterations, status: ${result.status})`);
const transactions = parseJsonToTransactions(result.result);
console.log(` [Page ${pageNum}] Extracted ${transactions.length} transactions:`);
for (let i = 0; i < Math.min(transactions.length, 10); i++) {
const tx = transactions[i];
console.log(` ${(i + 1).toString().padStart(2)}. ${tx.date} | ${tx.counterparty.substring(0, 30).padEnd(30)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`);
}
if (transactions.length > 10) {
console.log(` ... and ${transactions.length - 10} more transactions`);
}
return transactions;
}
/**
* Extract all transactions from bank statement
*/
async function extractTransactions(images: string[]): Promise<ITransaction[]> {
console.log(` [Vision] Processing ${images.length} page(s) with smartagent DualAgentOrchestrator`);
const allTransactions: ITransaction[] = [];
for (let i = 0; i < images.length; i++) {
const pageTransactions = await extractTransactionsFromPage(images[i], i + 1);
allTransactions.push(...pageTransactions);
}
console.log(` [Vision] Total: ${allTransactions.length} transactions`);
return allTransactions;
}
/**
* Compare extracted transactions against expected
*/
function compareTransactions(
extracted: ITransaction[],
expected: ITransaction[]
): { matches: number; total: number; errors: string[]; variations: string[] } {
const errors: string[] = [];
const variations: string[] = [];
let matches = 0;
for (let i = 0; i < expected.length; i++) {
const exp = expected[i];
const ext = extracted[i];
if (!ext) {
errors.push(`Missing transaction ${i}: ${exp.date} ${exp.counterparty}`);
continue;
}
const dateMatch = ext.date === exp.date;
const amountMatch = Math.abs(ext.amount - exp.amount) < 0.01;
if (dateMatch && amountMatch) {
matches++;
// Track counterparty variations (date and amount match but name differs)
if (ext.counterparty !== exp.counterparty) {
variations.push(
`[${i}] "${exp.counterparty}" → "${ext.counterparty}"`
);
}
} else {
errors.push(
`Mismatch at ${i}: expected ${exp.date}/${exp.amount}, got ${ext.date}/${ext.amount}`
);
}
}
if (extracted.length > expected.length) {
errors.push(`Extra transactions: ${extracted.length - expected.length}`);
}
return { matches, total: expected.length, errors, variations };
}
/**
* Find all test cases (PDF + JSON pairs) in .nogit/
*/
function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
const testDir = path.join(process.cwd(), '.nogit');
if (!fs.existsSync(testDir)) {
return [];
}
const files = fs.readdirSync(testDir);
const pdfFiles = files.filter((f: string) => f.endsWith('.pdf'));
const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
for (const pdf of pdfFiles) {
const baseName = pdf.replace('.pdf', '');
const jsonFile = `${baseName}.json`;
if (files.includes(jsonFile)) {
testCases.push({
name: baseName,
pdfPath: path.join(testDir, pdf),
jsonPath: path.join(testDir, jsonFile),
});
}
}
return testCases.sort((a, b) => a.name.localeCompare(b.name));
}
// Tests
tap.test('setup: ensure Docker containers are running', async () => {
console.log('\n[Setup] Checking Docker containers...\n');
const minicpmOk = await ensureMiniCpm();
expect(minicpmOk).toBeTrue();
console.log('\n[Setup] All containers ready!\n');
});
tap.test('setup: initialize smartagent orchestrator', async () => {
console.log('[Setup] Initializing SmartAi and DualAgentOrchestrator...');
smartAi = new SmartAi({
ollama: {
baseUrl: OLLAMA_URL,
model: MODEL,
defaultOptions: {
num_ctx: 32768,
num_predict: 4000,
temperature: 0.1,
},
defaultTimeout: 300000, // 5 minutes for vision tasks
},
});
await smartAi.start();
orchestrator = new DualAgentOrchestrator({
smartAiInstance: smartAi,
defaultProvider: 'ollama',
guardianPolicyPrompt: `You are a Guardian agent overseeing bank statement extraction tasks.
APPROVE all tool calls that:
- Use the json.validate action to verify JSON output
- Are reasonable attempts to complete the extraction task
REJECT tool calls that:
- Attempt to access external resources
- Try to execute arbitrary code
- Are clearly unrelated to bank statement extraction`,
driverSystemMessage: `You are an AI assistant that extracts bank transactions from statement images.
Your task is to analyze bank statement images and extract transaction data.
You have access to a json.validate tool to verify your JSON output.
IMPORTANT: Always validate your JSON before completing the task.
## Tool Usage Format
When you need to validate JSON, output:
<tool_call>
<tool>json</tool>
<action>validate</action>
<params>{"jsonString": "YOUR_JSON_ARRAY"}</params>
</tool_call>
## Completion Format
After validation passes, complete the task:
<task_complete>
[{"date": "YYYY-MM-DD", "counterparty": "...", "amount": -123.45}, ...]
</task_complete>`,
maxIterations: 5,
maxConsecutiveRejections: 3,
onToken: (token, source) => {
if (source === 'driver') {
process.stdout.write(token);
}
},
onProgress: (event) => {
if (event.logLevel === 'error') {
console.error(event.logMessage);
}
},
});
// Register the JsonValidatorTool
orchestrator.registerTool(new JsonValidatorTool());
await orchestrator.start();
console.log('[Setup] Orchestrator initialized!\n');
});
tap.test('should have MiniCPM-V model loaded', async () => {
const response = await fetch(`${OLLAMA_URL}/api/tags`);
const data = await response.json();
const modelNames = data.models.map((m: { name: string }) => m.name);
expect(modelNames.some((name: string) => name.includes('minicpm'))).toBeTrue();
});
const testCases = findTestCases();
console.log(`\nFound ${testCases.length} bank statement test cases (smartagent + MiniCPM-V)\n`);
let passedCount = 0;
let failedCount = 0;
for (const testCase of testCases) {
tap.test(`should extract: ${testCase.name}`, async () => {
const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
console.log(`\n=== ${testCase.name} ===`);
console.log(`Expected: ${expected.length} transactions`);
const images = convertPdfToImages(testCase.pdfPath);
console.log(` Pages: ${images.length}`);
const extracted = await extractTransactions(images);
console.log(` Extracted: ${extracted.length} transactions`);
const result = compareTransactions(extracted, expected);
const perfectMatch = result.matches === result.total && extracted.length === expected.length;
if (perfectMatch) {
passedCount++;
console.log(` Result: PASS (${result.matches}/${result.total})`);
} else {
failedCount++;
console.log(` Result: FAIL (${result.matches}/${result.total})`);
result.errors.slice(0, 10).forEach((e) => console.log(` - ${e}`));
}
// Log counterparty variations (names that differ but date/amount matched)
if (result.variations.length > 0) {
console.log(` Counterparty variations (${result.variations.length}):`);
result.variations.slice(0, 5).forEach((v) => console.log(` ${v}`));
if (result.variations.length > 5) {
console.log(` ... and ${result.variations.length - 5} more variations`);
}
}
expect(result.matches).toEqual(result.total);
expect(extracted.length).toEqual(expected.length);
});
}
tap.test('cleanup: stop orchestrator', async () => {
if (orchestrator) {
await orchestrator.stop();
}
console.log('[Cleanup] Orchestrator stopped');
});
tap.test('summary', async () => {
const total = testCases.length;
console.log(`\n======================================================`);
console.log(` Bank Statement Summary`);
console.log(` (smartagent + ${MODEL})`);
console.log(`======================================================`);
console.log(` Method: DualAgentOrchestrator with vision`);
console.log(` Passed: ${passedCount}/${total}`);
console.log(` Failed: ${failedCount}/${total}`);
console.log(`======================================================\n`);
});
export default tap.start();