feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing
This commit is contained in:
@@ -7,7 +7,7 @@ import * as os from 'os';
|
||||
const OLLAMA_URL = 'http://localhost:11434';
|
||||
const MODEL = 'openbmb/minicpm-v4.5:q8_0';
|
||||
|
||||
const BANK_STATEMENT_PROMPT = `You are a bank statement parser. Extract EVERY transaction from the table.
|
||||
const EXTRACT_PROMPT = `You are a bank statement parser. Extract EVERY transaction from the table.
|
||||
|
||||
Read the Amount column carefully:
|
||||
- "- 21,47 €" means DEBIT, output as: -21.47
|
||||
@@ -16,7 +16,7 @@ Read the Amount column carefully:
|
||||
|
||||
For each row output: {"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47}
|
||||
|
||||
Do not skip any rows. Return complete JSON array:`;
|
||||
Do not skip any rows. Return ONLY the JSON array, no explanation.`;
|
||||
|
||||
interface ITransaction {
|
||||
date: string;
|
||||
@@ -53,12 +53,12 @@ function convertPdfToImages(pdfPath: string): string[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract transactions from images using Ollama with streaming
|
||||
* Single extraction pass
|
||||
*/
|
||||
async function extractTransactionsStreaming(images: string[]): Promise<ITransaction[]> {
|
||||
async function extractOnce(images: string[], passNum: number): Promise<ITransaction[]> {
|
||||
const payload = {
|
||||
model: MODEL,
|
||||
prompt: BANK_STATEMENT_PROMPT,
|
||||
prompt: EXTRACT_PROMPT,
|
||||
images,
|
||||
stream: true,
|
||||
options: {
|
||||
@@ -86,7 +86,8 @@ async function extractTransactionsStreaming(images: string[]): Promise<ITransact
|
||||
let fullText = '';
|
||||
let lineBuffer = '';
|
||||
|
||||
// Stream and print output (buffer until newline for cleaner display)
|
||||
console.log(`[Pass ${passNum}] Extracting...`);
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
@@ -116,13 +117,11 @@ async function extractTransactionsStreaming(images: string[]): Promise<ITransact
|
||||
}
|
||||
}
|
||||
|
||||
// Print any remaining buffer
|
||||
if (lineBuffer) {
|
||||
console.log(lineBuffer);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Parse JSON from response
|
||||
const startIdx = fullText.indexOf('[');
|
||||
const endIdx = fullText.lastIndexOf(']') + 1;
|
||||
|
||||
@@ -133,6 +132,60 @@ async function extractTransactionsStreaming(images: string[]): Promise<ITransact
|
||||
return JSON.parse(fullText.substring(startIdx, endIdx));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a hash of transactions for comparison
|
||||
*/
|
||||
function hashTransactions(transactions: ITransaction[]): string {
|
||||
return transactions
|
||||
.map((t) => `${t.date}|${t.amount.toFixed(2)}`)
|
||||
.sort()
|
||||
.join(';');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract with majority voting - run until 2 passes match
|
||||
*/
|
||||
async function extractWithConsensus(images: string[], maxPasses: number = 5): Promise<ITransaction[]> {
|
||||
const results: Array<{ transactions: ITransaction[]; hash: string }> = [];
|
||||
const hashCounts: Map<string, number> = new Map();
|
||||
|
||||
for (let pass = 1; pass <= maxPasses; pass++) {
|
||||
const transactions = await extractOnce(images, pass);
|
||||
const hash = hashTransactions(transactions);
|
||||
|
||||
results.push({ transactions, hash });
|
||||
hashCounts.set(hash, (hashCounts.get(hash) || 0) + 1);
|
||||
|
||||
console.log(`[Pass ${pass}] Got ${transactions.length} transactions (hash: ${hash.substring(0, 20)}...)`);
|
||||
|
||||
// Check if we have consensus (2+ matching)
|
||||
const count = hashCounts.get(hash)!;
|
||||
if (count >= 2) {
|
||||
console.log(`[Consensus] Reached after ${pass} passes (${count} matching results)`);
|
||||
return transactions;
|
||||
}
|
||||
|
||||
// After 2 passes, if no match yet, continue
|
||||
if (pass >= 2) {
|
||||
console.log(`[Pass ${pass}] No consensus yet, trying again...`);
|
||||
}
|
||||
}
|
||||
|
||||
// No consensus reached - return the most common result
|
||||
let bestHash = '';
|
||||
let bestCount = 0;
|
||||
for (const [hash, count] of hashCounts) {
|
||||
if (count > bestCount) {
|
||||
bestCount = count;
|
||||
bestHash = hash;
|
||||
}
|
||||
}
|
||||
|
||||
const best = results.find((r) => r.hash === bestHash)!;
|
||||
console.log(`[No consensus] Using most common result (${bestCount}/${maxPasses} passes)`);
|
||||
return best.transactions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare extracted transactions against expected
|
||||
*/
|
||||
@@ -227,16 +280,15 @@ for (const testCase of testCases) {
|
||||
// Convert PDF to images
|
||||
console.log('Converting PDF to images...');
|
||||
const images = convertPdfToImages(testCase.pdfPath);
|
||||
console.log(`Converted: ${images.length} pages`);
|
||||
console.log(`Converted: ${images.length} pages\n`);
|
||||
|
||||
// Extract transactions with streaming output
|
||||
console.log('Extracting transactions (streaming)...\n');
|
||||
const extracted = await extractTransactionsStreaming(images);
|
||||
console.log(`Extracted: ${extracted.length} transactions`);
|
||||
// Extract with consensus voting
|
||||
const extracted = await extractWithConsensus(images);
|
||||
console.log(`\nFinal: ${extracted.length} transactions`);
|
||||
|
||||
// Compare results
|
||||
const result = compareTransactions(extracted, expected);
|
||||
console.log(`Matches: ${result.matches}/${result.total}`);
|
||||
console.log(`Accuracy: ${result.matches}/${result.total}`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log('Errors:');
|
||||
|
||||
Reference in New Issue
Block a user