feat(tests): switch vision tests to multi-query extraction (count then per-row/field queries) and add logging/summaries
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
/**
|
||||
* Bank statement extraction using Qwen3-VL 8B Vision (Direct)
|
||||
*
|
||||
* Single-step pipeline: PDF → Images → Qwen3-VL → JSON
|
||||
*
|
||||
* Key insights:
|
||||
* - Use /no_think in prompt + think:false in API to disable reasoning
|
||||
* - Need high num_predict (8000+) for many transactions
|
||||
* - Single pass extraction, no consensus needed
|
||||
* Multi-query approach:
|
||||
* 1. First ask how many transactions on each page
|
||||
* 2. Then query each transaction individually
|
||||
* Single pass, no consensus voting.
|
||||
*/
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as fs from 'fs';
|
||||
@@ -53,15 +51,9 @@ function convertPdfToImages(pdfPath: string): string[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions from a single page
|
||||
* Processes one page at a time to minimize thinking tokens
|
||||
* Query Qwen3-VL with a simple prompt
|
||||
*/
|
||||
async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
|
||||
const prompt = `/no_think
|
||||
Extract transactions from this bank statement page.
|
||||
Amount: "- 21,47 €" = -21.47, "+ 1.000,00 €" = 1000.00 (European format)
|
||||
Return JSON array only: [{"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47},...]`;
|
||||
|
||||
async function queryVision(image: string, prompt: string): Promise<string> {
|
||||
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@@ -73,9 +65,8 @@ Return JSON array only: [{"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21
|
||||
images: [image],
|
||||
}],
|
||||
stream: false,
|
||||
think: false,
|
||||
options: {
|
||||
num_predict: 4000,
|
||||
num_predict: 500,
|
||||
temperature: 0.1,
|
||||
},
|
||||
}),
|
||||
@@ -86,47 +77,116 @@ Return JSON array only: [{"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
let content = data.message?.content || '';
|
||||
|
||||
if (!content) {
|
||||
console.log(` [Page ${pageNum}] Empty response`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Parse JSON array
|
||||
if (content.startsWith('```json')) content = content.slice(7);
|
||||
else if (content.startsWith('```')) content = content.slice(3);
|
||||
if (content.endsWith('```')) content = content.slice(0, -3);
|
||||
content = content.trim();
|
||||
|
||||
const startIdx = content.indexOf('[');
|
||||
const endIdx = content.lastIndexOf(']') + 1;
|
||||
|
||||
if (startIdx < 0 || endIdx <= startIdx) {
|
||||
console.log(` [Page ${pageNum}] No JSON array found`);
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const transactions = JSON.parse(content.substring(startIdx, endIdx));
|
||||
console.log(` [Page ${pageNum}] Found ${transactions.length} transactions`);
|
||||
return transactions;
|
||||
} catch {
|
||||
console.log(` [Page ${pageNum}] JSON parse error`);
|
||||
return [];
|
||||
}
|
||||
return (data.message?.content || '').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions using Qwen3-VL vision
|
||||
* Processes each page separately to avoid thinking token exhaustion
|
||||
* Count transactions on a page
|
||||
*/
|
||||
async function countTransactions(image: string, pageNum: number): Promise<number> {
|
||||
const response = await queryVision(image,
|
||||
`How many transaction rows are in this bank statement table?
|
||||
Count only the data rows (with dates like "01.01.2024" and amounts like "- 50,00 €").
|
||||
Do NOT count the header row or summary/total rows.
|
||||
Answer with just the number, for example: 7`
|
||||
);
|
||||
|
||||
console.log(` [Page ${pageNum}] Count query response: "${response}"`);
|
||||
const match = response.match(/(\d+)/);
|
||||
const count = match ? parseInt(match[1], 10) : 0;
|
||||
console.log(` [Page ${pageNum}] Parsed count: ${count}`);
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single transaction by index (logs immediately when complete)
|
||||
*/
|
||||
async function getTransaction(image: string, index: number, pageNum: number): Promise<ITransaction | null> {
|
||||
const response = await queryVision(image,
|
||||
`This is a bank statement. Look at transaction row #${index} in the table (counting from top, excluding headers).
|
||||
|
||||
Extract this transaction's details:
|
||||
- Date in YYYY-MM-DD format
|
||||
- Counterparty/description name
|
||||
- Amount as number (negative for debits like "- 21,47 €" = -21.47, positive for credits like "+ 100,00 €" = 100.00)
|
||||
|
||||
Answer in format: DATE|COUNTERPARTY|AMOUNT
|
||||
Example: 2024-01-15|Amazon|−25.99`
|
||||
);
|
||||
|
||||
// Parse the response
|
||||
const lines = response.split('\n').filter(l => l.includes('|'));
|
||||
const line = lines[lines.length - 1] || response;
|
||||
const parts = line.split('|').map(p => p.trim());
|
||||
|
||||
if (parts.length >= 3) {
|
||||
// Parse amount - handle various formats
|
||||
let amountStr = parts[2].replace(/[€$£\s]/g, '').replace('−', '-').replace('–', '-');
|
||||
// European format: comma is decimal
|
||||
if (amountStr.includes(',')) {
|
||||
amountStr = amountStr.replace(/\./g, '').replace(',', '.');
|
||||
}
|
||||
const amount = parseFloat(amountStr) || 0;
|
||||
|
||||
const tx = {
|
||||
date: parts[0],
|
||||
counterparty: parts[1],
|
||||
amount: amount,
|
||||
};
|
||||
// Log immediately as this transaction completes
|
||||
console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] ${tx.date} | ${tx.counterparty.substring(0, 25).padEnd(25)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`);
|
||||
return tx;
|
||||
}
|
||||
|
||||
// Log raw response on parse failure
|
||||
console.log(` [P${pageNum} Tx${index.toString().padStart(2, ' ')}] PARSE FAILED: "${response.replace(/\n/g, ' ').substring(0, 60)}..."`);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions from a single page using multi-query approach
|
||||
*/
|
||||
async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
|
||||
// Step 1: Count transactions
|
||||
const count = await countTransactions(image, pageNum);
|
||||
|
||||
if (count === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Step 2: Query each transaction (in batches to avoid overwhelming)
|
||||
// Each transaction logs itself as it completes
|
||||
const transactions: ITransaction[] = [];
|
||||
const batchSize = 5;
|
||||
|
||||
for (let start = 1; start <= count; start += batchSize) {
|
||||
const end = Math.min(start + batchSize - 1, count);
|
||||
const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
|
||||
|
||||
// Query batch in parallel - each logs as it completes
|
||||
const results = await Promise.all(
|
||||
indices.map(i => getTransaction(image, i, pageNum))
|
||||
);
|
||||
|
||||
for (const tx of results) {
|
||||
if (tx) {
|
||||
transactions.push(tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` [Page ${pageNum}] Complete: ${transactions.length}/${count} extracted`);
|
||||
return transactions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all transactions from bank statement
|
||||
*/
|
||||
async function extractTransactions(images: string[]): Promise<ITransaction[]> {
|
||||
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL`);
|
||||
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL (multi-query)`);
|
||||
|
||||
const allTransactions: ITransaction[] = [];
|
||||
|
||||
// Process pages sequentially to avoid overwhelming the model
|
||||
for (let i = 0; i < images.length; i++) {
|
||||
const pageTransactions = await extractTransactionsFromPage(images[i], i + 1);
|
||||
allTransactions.push(...pageTransactions);
|
||||
@@ -276,8 +336,9 @@ tap.test('summary', async () => {
|
||||
console.log(`\n======================================================`);
|
||||
console.log(` Bank Statement Summary (Qwen3-VL Vision)`);
|
||||
console.log(`======================================================`);
|
||||
console.log(` Passed: ${passedCount}/${total}`);
|
||||
console.log(` Failed: ${failedCount}/${total}`);
|
||||
console.log(` Method: Multi-query (count then extract each)`);
|
||||
console.log(` Passed: ${passedCount}/${total}`);
|
||||
console.log(` Failed: ${failedCount}/${total}`);
|
||||
console.log(`======================================================\n`);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user