Files
ht-docker-ai/test/test.bankstatements.minicpm.ts

325 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Bank statement extraction using MiniCPM-V (visual extraction)
*
* Multi-query approach with thinking DISABLED for speed:
* 1. First ask how many transactions on each page
* 2. Then query each transaction individually
* Single pass, no consensus voting.
*/
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as fs from 'fs';
import * as path from 'path';
import { execSync } from 'child_process';
import * as os from 'os';
import { ensureMiniCpm } from './helpers/docker.js';
const OLLAMA_URL = 'http://localhost:11434';
const MODEL = 'minicpm-v:latest';
interface ITransaction {
date: string;
counterparty: string;
amount: number;
}
/**
* Convert PDF to PNG images using ImageMagick
*/
function convertPdfToImages(pdfPath: string): string[] {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
const outputPattern = path.join(tempDir, 'page-%d.png');
try {
execSync(
`convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
{ stdio: 'pipe' }
);
const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort();
const images: string[] = [];
for (const file of files) {
const imagePath = path.join(tempDir, file);
const imageData = fs.readFileSync(imagePath);
images.push(imageData.toString('base64'));
}
return images;
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
}
}
/**
* Query MiniCPM-V with a prompt (thinking disabled for speed)
*/
async function queryVision(image: string, prompt: string): Promise<string> {
const response = await fetch(`${OLLAMA_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: MODEL,
prompt: `/no_think\n${prompt}`,
images: [image],
stream: false,
options: {
num_predict: 500,
temperature: 0.1,
},
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const data = await response.json();
return (data.response || '').trim();
}
/**
* Count transactions on a page
*/
async function countTransactions(image: string, pageNum: number): Promise<number> {
const response = await queryVision(image,
`Count the transaction rows in this bank statement table.
Each transaction has a date, description, and amount (debit or credit).
Do not count headers or totals.
How many transaction rows are there? Answer with just the number.`
);
console.log(` [Page ${pageNum}] Count response: "${response}"`);
const match = response.match(/(\d+)/);
const count = match ? parseInt(match[1], 10) : 0;
console.log(` [Page ${pageNum}] Parsed count: ${count}`);
return count;
}
/**
* Get a single transaction by index (logs immediately)
*/
async function getTransaction(image: string, index: number, pageNum: number): Promise<ITransaction | null> {
const response = await queryVision(image,
`Look at transaction row #${index} in the bank statement table (row 1 is the first transaction after the header).
Extract:
- DATE: in YYYY-MM-DD format
- COUNTERPARTY: the description/name
- AMOUNT: as a number (negative for debits like "- 21,47 €" = -21.47, positive for credits)
Format your answer as: DATE|COUNTERPARTY|AMOUNT
Example: 2024-01-15|Amazon|-25.99`
);
// Parse the response
const lines = response.split('\n').filter(l => l.includes('|'));
const line = lines[lines.length - 1] || response;
const parts = line.split('|').map(p => p.trim());
if (parts.length >= 3) {
// Parse amount - handle various formats
let amountStr = parts[2].replace(/[€$£\s]/g, '').replace('', '-').replace('', '-');
// European format: comma is decimal
if (amountStr.includes(',')) {
amountStr = amountStr.replace(/\./g, '').replace(',', '.');
}
const amount = parseFloat(amountStr) || 0;
const tx = {
date: parts[0],
counterparty: parts[1],
amount: amount,
};
// Log immediately as this transaction completes
console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] ${tx.date} | ${tx.counterparty.substring(0, 25).padEnd(25)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`);
return tx;
}
// Log raw response on parse failure
console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] PARSE FAILED: "${response.replace(/\n/g, ' ').substring(0, 60)}..."`);
return null;
}
/**
* Extract transactions from a single page using multi-query approach
*/
async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
// Step 1: Count transactions
const count = await countTransactions(image, pageNum);
if (count === 0) {
return [];
}
// Step 2: Query each transaction (in batches to avoid overwhelming)
// Each transaction logs itself as it completes
const transactions: ITransaction[] = [];
const batchSize = 5;
for (let start = 1; start <= count; start += batchSize) {
const end = Math.min(start + batchSize - 1, count);
const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
// Query batch in parallel - each logs as it completes
const results = await Promise.all(
indices.map(i => getTransaction(image, i, pageNum))
);
for (const tx of results) {
if (tx) {
transactions.push(tx);
}
}
}
console.log(` [Page ${pageNum}] Complete: ${transactions.length}/${count} extracted`);
return transactions;
}
/**
* Extract all transactions from bank statement
*/
async function extractTransactions(images: string[]): Promise<ITransaction[]> {
console.log(` [Vision] Processing ${images.length} page(s) with MiniCPM-V (multi-query, deep think)`);
const allTransactions: ITransaction[] = [];
for (let i = 0; i < images.length; i++) {
const pageTransactions = await extractTransactionsFromPage(images[i], i + 1);
allTransactions.push(...pageTransactions);
}
console.log(` [Vision] Total: ${allTransactions.length} transactions`);
return allTransactions;
}
/**
* Compare extracted transactions against expected
*/
function compareTransactions(
extracted: ITransaction[],
expected: ITransaction[]
): { matches: number; total: number; errors: string[] } {
const errors: string[] = [];
let matches = 0;
for (let i = 0; i < expected.length; i++) {
const exp = expected[i];
const ext = extracted[i];
if (!ext) {
errors.push(`Missing transaction ${i}: ${exp.date} ${exp.counterparty}`);
continue;
}
const dateMatch = ext.date === exp.date;
const amountMatch = Math.abs(ext.amount - exp.amount) < 0.01;
if (dateMatch && amountMatch) {
matches++;
} else {
errors.push(
`Mismatch at ${i}: expected ${exp.date}/${exp.amount}, got ${ext.date}/${ext.amount}`
);
}
}
if (extracted.length > expected.length) {
errors.push(`Extra transactions: ${extracted.length - expected.length}`);
}
return { matches, total: expected.length, errors };
}
/**
* Find all test cases (PDF + JSON pairs) in .nogit/
*/
function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
const testDir = path.join(process.cwd(), '.nogit');
if (!fs.existsSync(testDir)) {
return [];
}
const files = fs.readdirSync(testDir);
const pdfFiles = files.filter((f: string) => f.endsWith('.pdf'));
const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
for (const pdf of pdfFiles) {
const baseName = pdf.replace('.pdf', '');
const jsonFile = `${baseName}.json`;
if (files.includes(jsonFile)) {
testCases.push({
name: baseName,
pdfPath: path.join(testDir, pdf),
jsonPath: path.join(testDir, jsonFile),
});
}
}
return testCases.sort((a, b) => a.name.localeCompare(b.name));
}
// Tests
tap.test('setup: ensure Docker containers are running', async () => {
console.log('\n[Setup] Checking Docker containers...\n');
const minicpmOk = await ensureMiniCpm();
expect(minicpmOk).toBeTrue();
console.log('\n[Setup] All containers ready!\n');
});
tap.test('should have MiniCPM-V model loaded', async () => {
const response = await fetch(`${OLLAMA_URL}/api/tags`);
const data = await response.json();
const modelNames = data.models.map((m: { name: string }) => m.name);
expect(modelNames.some((name: string) => name.includes('minicpm'))).toBeTrue();
});
const testCases = findTestCases();
console.log(`\nFound ${testCases.length} bank statement test cases (MiniCPM-V)\n`);
let passedCount = 0;
let failedCount = 0;
for (const testCase of testCases) {
tap.test(`should extract: ${testCase.name}`, async () => {
const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
console.log(`\n=== ${testCase.name} ===`);
console.log(`Expected: ${expected.length} transactions`);
const images = convertPdfToImages(testCase.pdfPath);
console.log(` Pages: ${images.length}`);
const extracted = await extractTransactions(images);
console.log(` Extracted: ${extracted.length} transactions`);
const result = compareTransactions(extracted, expected);
const accuracy = result.total > 0 ? result.matches / result.total : 0;
if (accuracy >= 0.95 && extracted.length === expected.length) {
passedCount++;
console.log(` Result: PASS (${result.matches}/${result.total})`);
} else {
failedCount++;
console.log(` Result: FAIL (${result.matches}/${result.total})`);
result.errors.slice(0, 5).forEach((e) => console.log(` - ${e}`));
}
expect(accuracy).toBeGreaterThan(0.95);
expect(extracted.length).toEqual(expected.length);
});
}
tap.test('summary', async () => {
const total = testCases.length;
console.log(`\n======================================================`);
console.log(` Bank Statement Summary (MiniCPM-V)`);
console.log(`======================================================`);
console.log(` Method: Multi-query (no_think)`);
console.log(` Passed: ${passedCount}/${total}`);
console.log(` Failed: ${failedCount}/${total}`);
console.log(`======================================================\n`);
});
export default tap.start();