feat(vision): process pages separately and make Qwen3-VL vision extraction more robust; add per-page parsing, safer JSON handling, reduced token usage, and multi-query invoice extraction
This commit is contained in:
@@ -53,23 +53,14 @@ function convertPdfToImages(pdfPath: string): string[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions using Qwen3-VL vision
|
||||
* Extract transactions from a single page
|
||||
* Processes one page at a time to minimize thinking tokens
|
||||
*/
|
||||
async function extractTransactions(images: string[]): Promise<ITransaction[]> {
|
||||
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL`);
|
||||
|
||||
async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
|
||||
const prompt = `/no_think
|
||||
Extract ALL transactions from this bank statement.
|
||||
|
||||
Amount format:
|
||||
- "- 21,47 €" = DEBIT = -21.47
|
||||
- "+ 1.000,00 €" = CREDIT = 1000.00
|
||||
- European format: comma is decimal separator
|
||||
|
||||
For each transaction: {"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47}
|
||||
|
||||
Return ONLY a JSON array, no explanation:
|
||||
[{"date":"...","counterparty":"...","amount":0},...]`;
|
||||
Extract transactions from this bank statement page.
|
||||
Amount: "- 21,47 €" = -21.47, "+ 1.000,00 €" = 1000.00 (European format)
|
||||
Return JSON array only: [{"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47},...]`;
|
||||
|
||||
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
@@ -79,26 +70,28 @@ Return ONLY a JSON array, no explanation:
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
images: images,
|
||||
images: [image],
|
||||
}],
|
||||
stream: false,
|
||||
think: false,
|
||||
options: {
|
||||
num_predict: 8000,
|
||||
num_predict: 4000,
|
||||
temperature: 0.1,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const err = await response.text();
|
||||
throw new Error(`Ollama API error: ${response.status} - ${err}`);
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
let content = data.message?.content || '';
|
||||
|
||||
console.log(` [Vision] Got ${content.length} chars`);
|
||||
if (!content) {
|
||||
console.log(` [Page ${pageNum}] Empty response`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Parse JSON array
|
||||
if (content.startsWith('```json')) content = content.slice(7);
|
||||
@@ -110,10 +103,37 @@ Return ONLY a JSON array, no explanation:
|
||||
const endIdx = content.lastIndexOf(']') + 1;
|
||||
|
||||
if (startIdx < 0 || endIdx <= startIdx) {
|
||||
throw new Error(`No JSON array found: ${content.substring(0, 300)}`);
|
||||
console.log(` [Page ${pageNum}] No JSON array found`);
|
||||
return [];
|
||||
}
|
||||
|
||||
return JSON.parse(content.substring(startIdx, endIdx));
|
||||
try {
|
||||
const transactions = JSON.parse(content.substring(startIdx, endIdx));
|
||||
console.log(` [Page ${pageNum}] Found ${transactions.length} transactions`);
|
||||
return transactions;
|
||||
} catch {
|
||||
console.log(` [Page ${pageNum}] JSON parse error`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions using Qwen3-VL vision
|
||||
* Processes each page separately to avoid thinking token exhaustion
|
||||
*/
|
||||
async function extractTransactions(images: string[]): Promise<ITransaction[]> {
|
||||
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL`);
|
||||
|
||||
const allTransactions: ITransaction[] = [];
|
||||
|
||||
// Process pages sequentially to avoid overwhelming the model
|
||||
for (let i = 0; i < images.length; i++) {
|
||||
const pageTransactions = await extractTransactionsFromPage(images[i], i + 1);
|
||||
allTransactions.push(...pageTransactions);
|
||||
}
|
||||
|
||||
console.log(` [Vision] Total: ${allTransactions.length} transactions`);
|
||||
return allTransactions;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user