feat(tests): switch vision tests to multi-query extraction (count then per-row/field queries) and add logging/summaries
This commit is contained in:
@@ -1,10 +1,8 @@
|
||||
/**
|
||||
* Invoice extraction using Qwen3-VL 8B Vision (Direct)
|
||||
*
|
||||
* Single-step pipeline: PDF → Images → Qwen3-VL → JSON
|
||||
* Uses /no_think to disable reasoning mode for fast, direct responses.
|
||||
*
|
||||
* Qwen3-VL outperforms PaddleOCR-VL on certain invoice formats.
|
||||
* Multi-query approach: 5 parallel simple queries to avoid token exhaustion.
|
||||
* Single pass, no consensus voting.
|
||||
*/
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as fs from 'fs';
|
||||
@@ -67,11 +65,10 @@ async function queryField(images: string[], question: string): Promise<string> {
|
||||
model: VISION_MODEL,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: `/no_think\n${question} Reply with just the value, nothing else.`,
|
||||
content: `${question} Reply with just the value, nothing else.`,
|
||||
images: images,
|
||||
}],
|
||||
stream: false,
|
||||
think: false,
|
||||
options: {
|
||||
num_predict: 500,
|
||||
temperature: 0.1,
|
||||
@@ -96,35 +93,80 @@ async function extractInvoiceFromImages(images: string[]): Promise<IInvoice> {
|
||||
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL (multi-query)`);
|
||||
|
||||
// Query each field separately to avoid excessive thinking tokens
|
||||
const [invoiceNum, invoiceDate, vendor, currency, amounts] = await Promise.all([
|
||||
queryField(images, 'What is the invoice number on this document?'),
|
||||
queryField(images, 'What is the invoice date? Format as YYYY-MM-DD.'),
|
||||
queryField(images, 'What company issued this invoice?'),
|
||||
queryField(images, 'What currency is used? Answer EUR, USD, or GBP.'),
|
||||
queryField(images, 'What are the net amount, VAT amount, and total amount? Format: net,vat,total'),
|
||||
// Use explicit questions to avoid confusion between similar fields
|
||||
// Log each result as it comes in (not waiting for all to complete)
|
||||
const queryAndLog = async (name: string, question: string): Promise<string> => {
|
||||
const result = await queryField(images, question);
|
||||
console.log(` [Query] ${name}: "${result}"`);
|
||||
return result;
|
||||
};
|
||||
|
||||
const [invoiceNum, invoiceDate, vendor, currency, totalAmount, netAmount, vatAmount] = await Promise.all([
|
||||
queryAndLog('Invoice Number', 'What is the INVOICE NUMBER (not VAT number, not customer ID)? Look for "Invoice No", "Invoice #", "Rechnung Nr", "Facture". Just the number/code.'),
|
||||
queryAndLog('Invoice Date ', 'What is the INVOICE DATE (not due date, not delivery date)? The date the invoice was issued. Format: YYYY-MM-DD'),
|
||||
queryAndLog('Vendor ', 'What company ISSUED this invoice (the seller/vendor, not the buyer)? Look at the letterhead or "From" section.'),
|
||||
queryAndLog('Currency ', 'What CURRENCY is used? Look for € (EUR), $ (USD), or £ (GBP). Answer with 3-letter code: EUR, USD, or GBP'),
|
||||
queryAndLog('Total Amount ', 'What is the TOTAL AMOUNT INCLUDING TAX (the final amount to pay, with VAT/tax included)? Just the number, e.g. 24.99'),
|
||||
queryAndLog('Net Amount ', 'What is the NET AMOUNT (subtotal before VAT/tax)? Just the number, e.g. 20.99'),
|
||||
queryAndLog('VAT Amount ', 'What is the VAT/TAX AMOUNT? Just the number, e.g. 4.00'),
|
||||
]);
|
||||
|
||||
console.log(` [Vision] Got: ${invoiceNum} | ${invoiceDate} | ${vendor} | ${currency}`);
|
||||
|
||||
// Parse amounts (format: "net,vat,total" or similar)
|
||||
const amountMatch = amounts.match(/([\d.,]+)/g) || [];
|
||||
// Parse amount from string (handles European format)
|
||||
const parseAmount = (s: string): number => {
|
||||
if (!s) return 0;
|
||||
// Extract number from the response
|
||||
const match = s.match(/([\d.,]+)/);
|
||||
if (!match) return 0;
|
||||
const numStr = match[1];
|
||||
// Handle European format: 1.234,56 → 1234.56
|
||||
const normalized = s.includes(',') && s.indexOf(',') > s.lastIndexOf('.')
|
||||
? s.replace(/\./g, '').replace(',', '.')
|
||||
: s.replace(/,/g, '');
|
||||
const normalized = numStr.includes(',') && numStr.indexOf(',') > numStr.lastIndexOf('.')
|
||||
? numStr.replace(/\./g, '').replace(',', '.')
|
||||
: numStr.replace(/,/g, '');
|
||||
return parseFloat(normalized) || 0;
|
||||
};
|
||||
|
||||
// Extract invoice number from potentially verbose response
|
||||
const extractInvoiceNumber = (s: string): string => {
|
||||
let clean = s.replace(/\*\*/g, '').replace(/`/g, '').trim();
|
||||
// Look for common invoice number patterns
|
||||
const patterns = [
|
||||
/\b([A-Z]{2,3}\d{10,})\b/i, // IEE2022006460244
|
||||
/\b([A-Z]\d{8,})\b/i, // R0014359508
|
||||
/\b(INV[-\s]?\d{4}[-\s]?\d+)\b/i, // INV-2024-001
|
||||
/\b(\d{7,})\b/, // 1579087430
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = clean.match(pattern);
|
||||
if (match) return match[1];
|
||||
}
|
||||
return clean.replace(/[^A-Z0-9-]/gi, '').trim() || clean;
|
||||
};
|
||||
|
||||
// Extract date (YYYY-MM-DD) from response
|
||||
const extractDate = (s: string): string => {
|
||||
let clean = s.replace(/\*\*/g, '').replace(/`/g, '').trim();
|
||||
const isoMatch = clean.match(/(\d{4}-\d{2}-\d{2})/);
|
||||
if (isoMatch) return isoMatch[1];
|
||||
return clean.replace(/[^\d-]/g, '').trim();
|
||||
};
|
||||
|
||||
// Extract currency
|
||||
const extractCurrency = (s: string): string => {
|
||||
const upper = s.toUpperCase();
|
||||
if (upper.includes('EUR') || upper.includes('€')) return 'EUR';
|
||||
if (upper.includes('USD') || upper.includes('$')) return 'USD';
|
||||
if (upper.includes('GBP') || upper.includes('£')) return 'GBP';
|
||||
return 'EUR';
|
||||
};
|
||||
|
||||
return {
|
||||
invoice_number: invoiceNum || '',
|
||||
invoice_date: invoiceDate || '',
|
||||
vendor_name: vendor || '',
|
||||
currency: (currency || 'EUR').toUpperCase().replace(/[^A-Z]/g, '').slice(0, 3) || 'EUR',
|
||||
net_amount: parseAmount(amountMatch[0] || ''),
|
||||
vat_amount: parseAmount(amountMatch[1] || ''),
|
||||
total_amount: parseAmount(amountMatch[2] || amountMatch[0] || ''),
|
||||
invoice_number: extractInvoiceNumber(invoiceNum),
|
||||
invoice_date: extractDate(invoiceDate),
|
||||
vendor_name: vendor.replace(/\*\*/g, '').replace(/`/g, '').trim() || '',
|
||||
currency: extractCurrency(currency),
|
||||
net_amount: parseAmount(netAmount),
|
||||
vat_amount: parseAmount(vatAmount),
|
||||
total_amount: parseAmount(totalAmount),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -296,7 +338,7 @@ tap.test('summary', async () => {
|
||||
console.log(`\n======================================================`);
|
||||
console.log(` Invoice Extraction Summary (Qwen3-VL Vision)`);
|
||||
console.log(`======================================================`);
|
||||
console.log(` Method: Qwen3-VL 8B Direct Vision (/no_think)`);
|
||||
console.log(` Method: Multi-query (single pass)`);
|
||||
console.log(` Passed: ${passedCount}/${total}`);
|
||||
console.log(` Failed: ${failedCount}/${total}`);
|
||||
console.log(` Accuracy: ${accuracy.toFixed(1)}%`);
|
||||
|
||||
Reference in New Issue
Block a user