/** * Bank statement extraction using MiniCPM-V (visual extraction) * * Multi-query approach with thinking DISABLED for speed: * 1. First ask how many transactions on each page * 2. Then query each transaction individually * Single pass, no consensus voting. */ import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; import * as os from 'os'; import { ensureMiniCpm } from './helpers/docker.js'; const OLLAMA_URL = 'http://localhost:11434'; const MODEL = 'minicpm-v:latest'; interface ITransaction { date: string; counterparty: string; amount: number; } /** * Convert PDF to PNG images using ImageMagick */ function convertPdfToImages(pdfPath: string): string[] { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-')); const outputPattern = path.join(tempDir, 'page-%d.png'); try { execSync( `convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`, { stdio: 'pipe' } ); const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort(); const images: string[] = []; for (const file of files) { const imagePath = path.join(tempDir, file); const imageData = fs.readFileSync(imagePath); images.push(imageData.toString('base64')); } return images; } finally { fs.rmSync(tempDir, { recursive: true, force: true }); } } /** * Query MiniCPM-V with a prompt (thinking disabled for speed) */ async function queryVision(image: string, prompt: string): Promise { const response = await fetch(`${OLLAMA_URL}/api/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: MODEL, prompt: `/no_think\n${prompt}`, images: [image], stream: false, options: { num_predict: 500, temperature: 0.1, }, }), }); if (!response.ok) { throw new Error(`Ollama API error: ${response.status}`); } const data = await response.json(); return (data.response || '').trim(); } /** * Count transactions on a page */ async function countTransactions(image: string, pageNum: number): Promise { const response = await queryVision(image, `Count the transaction rows in this bank statement table. Each transaction has a date, description, and amount (debit or credit). Do not count headers or totals. How many transaction rows are there? Answer with just the number.` ); console.log(` [Page ${pageNum}] Count response: "${response}"`); const match = response.match(/(\d+)/); const count = match ? parseInt(match[1], 10) : 0; console.log(` [Page ${pageNum}] Parsed count: ${count}`); return count; } /** * Get a single transaction by index (logs immediately) */ async function getTransaction(image: string, index: number, pageNum: number): Promise { const response = await queryVision(image, `Look at transaction row #${index} in the bank statement table (row 1 is the first transaction after the header). Extract: - DATE: in YYYY-MM-DD format - COUNTERPARTY: the description/name - AMOUNT: as a number (negative for debits like "- 21,47 €" = -21.47, positive for credits) Format your answer as: DATE|COUNTERPARTY|AMOUNT Example: 2024-01-15|Amazon|-25.99` ); // Parse the response const lines = response.split('\n').filter(l => l.includes('|')); const line = lines[lines.length - 1] || response; const parts = line.split('|').map(p => p.trim()); if (parts.length >= 3) { // Parse amount - handle various formats let amountStr = parts[2].replace(/[€$£\s]/g, '').replace('−', '-').replace('–', '-'); // European format: comma is decimal if (amountStr.includes(',')) { amountStr = amountStr.replace(/\./g, '').replace(',', '.'); } const amount = parseFloat(amountStr) || 0; const tx = { date: parts[0], counterparty: parts[1], amount: amount, }; // Log immediately as this transaction completes console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] ${tx.date} | ${tx.counterparty.substring(0, 25).padEnd(25)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`); return tx; } // Log raw response on parse failure console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] PARSE FAILED: "${response.replace(/\n/g, ' ').substring(0, 60)}..."`); return null; } /** * Extract transactions from a single page using multi-query approach */ async function extractTransactionsFromPage(image: string, pageNum: number): Promise { // Step 1: Count transactions const count = await countTransactions(image, pageNum); if (count === 0) { return []; } // Step 2: Query each transaction (in batches to avoid overwhelming) // Each transaction logs itself as it completes const transactions: ITransaction[] = []; const batchSize = 5; for (let start = 1; start <= count; start += batchSize) { const end = Math.min(start + batchSize - 1, count); const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i); // Query batch in parallel - each logs as it completes const results = await Promise.all( indices.map(i => getTransaction(image, i, pageNum)) ); for (const tx of results) { if (tx) { transactions.push(tx); } } } console.log(` [Page ${pageNum}] Complete: ${transactions.length}/${count} extracted`); return transactions; } /** * Extract all transactions from bank statement */ async function extractTransactions(images: string[]): Promise { console.log(` [Vision] Processing ${images.length} page(s) with MiniCPM-V (multi-query, deep think)`); const allTransactions: ITransaction[] = []; for (let i = 0; i < images.length; i++) { const pageTransactions = await extractTransactionsFromPage(images[i], i + 1); allTransactions.push(...pageTransactions); } console.log(` [Vision] Total: ${allTransactions.length} transactions`); return allTransactions; } /** * Compare extracted transactions against expected */ function compareTransactions( extracted: ITransaction[], expected: ITransaction[] ): { matches: number; total: number; errors: string[] } { const errors: string[] = []; let matches = 0; for (let i = 0; i < expected.length; i++) { const exp = expected[i]; const ext = extracted[i]; if (!ext) { errors.push(`Missing transaction ${i}: ${exp.date} ${exp.counterparty}`); continue; } const dateMatch = ext.date === exp.date; const amountMatch = Math.abs(ext.amount - exp.amount) < 0.01; if (dateMatch && amountMatch) { matches++; } else { errors.push( `Mismatch at ${i}: expected ${exp.date}/${exp.amount}, got ${ext.date}/${ext.amount}` ); } } if (extracted.length > expected.length) { errors.push(`Extra transactions: ${extracted.length - expected.length}`); } return { matches, total: expected.length, errors }; } /** * Find all test cases (PDF + JSON pairs) in .nogit/ */ function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> { const testDir = path.join(process.cwd(), '.nogit'); if (!fs.existsSync(testDir)) { return []; } const files = fs.readdirSync(testDir); const pdfFiles = files.filter((f: string) => f.endsWith('.pdf')); const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = []; for (const pdf of pdfFiles) { const baseName = pdf.replace('.pdf', ''); const jsonFile = `${baseName}.json`; if (files.includes(jsonFile)) { testCases.push({ name: baseName, pdfPath: path.join(testDir, pdf), jsonPath: path.join(testDir, jsonFile), }); } } return testCases.sort((a, b) => a.name.localeCompare(b.name)); } // Tests tap.test('setup: ensure Docker containers are running', async () => { console.log('\n[Setup] Checking Docker containers...\n'); const minicpmOk = await ensureMiniCpm(); expect(minicpmOk).toBeTrue(); console.log('\n[Setup] All containers ready!\n'); }); tap.test('should have MiniCPM-V model loaded', async () => { const response = await fetch(`${OLLAMA_URL}/api/tags`); const data = await response.json(); const modelNames = data.models.map((m: { name: string }) => m.name); expect(modelNames.some((name: string) => name.includes('minicpm'))).toBeTrue(); }); const testCases = findTestCases(); console.log(`\nFound ${testCases.length} bank statement test cases (MiniCPM-V)\n`); let passedCount = 0; let failedCount = 0; for (const testCase of testCases) { tap.test(`should extract: ${testCase.name}`, async () => { const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8')); console.log(`\n=== ${testCase.name} ===`); console.log(`Expected: ${expected.length} transactions`); const images = convertPdfToImages(testCase.pdfPath); console.log(` Pages: ${images.length}`); const extracted = await extractTransactions(images); console.log(` Extracted: ${extracted.length} transactions`); const result = compareTransactions(extracted, expected); const accuracy = result.total > 0 ? result.matches / result.total : 0; if (accuracy >= 0.95 && extracted.length === expected.length) { passedCount++; console.log(` Result: PASS (${result.matches}/${result.total})`); } else { failedCount++; console.log(` Result: FAIL (${result.matches}/${result.total})`); result.errors.slice(0, 5).forEach((e) => console.log(` - ${e}`)); } expect(accuracy).toBeGreaterThan(0.95); expect(extracted.length).toEqual(expected.length); }); } tap.test('summary', async () => { const total = testCases.length; console.log(`\n======================================================`); console.log(` Bank Statement Summary (MiniCPM-V)`); console.log(`======================================================`); console.log(` Method: Multi-query (no_think)`); console.log(` Passed: ${passedCount}/${total}`); console.log(` Failed: ${failedCount}/${total}`); console.log(`======================================================\n`); }); export default tap.start();