feat(tests): switch vision tests to multi-query extraction (count then per-row/field queries) and add logging/summaries

This commit is contained in:
2026-01-18 11:26:38 +00:00
parent 4c368dfef9
commit 76b21f1f7b
5 changed files with 624 additions and 367 deletions

View File

@@ -1,10 +1,8 @@
/**
* Invoice extraction using Qwen3-VL 8B Vision (Direct)
*
* Single-step pipeline: PDF → Images → Qwen3-VL → JSON
* Uses /no_think to disable reasoning mode for fast, direct responses.
*
* Qwen3-VL outperforms PaddleOCR-VL on certain invoice formats.
* Multi-query approach: 5 parallel simple queries to avoid token exhaustion.
* Single pass, no consensus voting.
*/
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as fs from 'fs';
@@ -67,11 +65,10 @@ async function queryField(images: string[], question: string): Promise<string> {
model: VISION_MODEL,
messages: [{
role: 'user',
content: `/no_think\n${question} Reply with just the value, nothing else.`,
content: `${question} Reply with just the value, nothing else.`,
images: images,
}],
stream: false,
think: false,
options: {
num_predict: 500,
temperature: 0.1,
@@ -96,35 +93,80 @@ async function extractInvoiceFromImages(images: string[]): Promise<IInvoice> {
console.log(` [Vision] Processing ${images.length} page(s) with Qwen3-VL (multi-query)`);
// Query each field separately to avoid excessive thinking tokens
const [invoiceNum, invoiceDate, vendor, currency, amounts] = await Promise.all([
queryField(images, 'What is the invoice number on this document?'),
queryField(images, 'What is the invoice date? Format as YYYY-MM-DD.'),
queryField(images, 'What company issued this invoice?'),
queryField(images, 'What currency is used? Answer EUR, USD, or GBP.'),
queryField(images, 'What are the net amount, VAT amount, and total amount? Format: net,vat,total'),
// Use explicit questions to avoid confusion between similar fields
// Log each result as it comes in (not waiting for all to complete)
const queryAndLog = async (name: string, question: string): Promise<string> => {
const result = await queryField(images, question);
console.log(` [Query] ${name}: "${result}"`);
return result;
};
const [invoiceNum, invoiceDate, vendor, currency, totalAmount, netAmount, vatAmount] = await Promise.all([
queryAndLog('Invoice Number', 'What is the INVOICE NUMBER (not VAT number, not customer ID)? Look for "Invoice No", "Invoice #", "Rechnung Nr", "Facture". Just the number/code.'),
queryAndLog('Invoice Date ', 'What is the INVOICE DATE (not due date, not delivery date)? The date the invoice was issued. Format: YYYY-MM-DD'),
queryAndLog('Vendor ', 'What company ISSUED this invoice (the seller/vendor, not the buyer)? Look at the letterhead or "From" section.'),
queryAndLog('Currency ', 'What CURRENCY is used? Look for € (EUR), $ (USD), or £ (GBP). Answer with 3-letter code: EUR, USD, or GBP'),
queryAndLog('Total Amount ', 'What is the TOTAL AMOUNT INCLUDING TAX (the final amount to pay, with VAT/tax included)? Just the number, e.g. 24.99'),
queryAndLog('Net Amount ', 'What is the NET AMOUNT (subtotal before VAT/tax)? Just the number, e.g. 20.99'),
queryAndLog('VAT Amount ', 'What is the VAT/TAX AMOUNT? Just the number, e.g. 4.00'),
]);
console.log(` [Vision] Got: ${invoiceNum} | ${invoiceDate} | ${vendor} | ${currency}`);
// Parse amounts (format: "net,vat,total" or similar)
const amountMatch = amounts.match(/([\d.,]+)/g) || [];
// Parse amount from string (handles European format)
const parseAmount = (s: string): number => {
if (!s) return 0;
// Extract number from the response
const match = s.match(/([\d.,]+)/);
if (!match) return 0;
const numStr = match[1];
// Handle European format: 1.234,56 → 1234.56
const normalized = s.includes(',') && s.indexOf(',') > s.lastIndexOf('.')
? s.replace(/\./g, '').replace(',', '.')
: s.replace(/,/g, '');
const normalized = numStr.includes(',') && numStr.indexOf(',') > numStr.lastIndexOf('.')
? numStr.replace(/\./g, '').replace(',', '.')
: numStr.replace(/,/g, '');
return parseFloat(normalized) || 0;
};
// Extract invoice number from potentially verbose response
const extractInvoiceNumber = (s: string): string => {
let clean = s.replace(/\*\*/g, '').replace(/`/g, '').trim();
// Look for common invoice number patterns
const patterns = [
/\b([A-Z]{2,3}\d{10,})\b/i, // IEE2022006460244
/\b([A-Z]\d{8,})\b/i, // R0014359508
/\b(INV[-\s]?\d{4}[-\s]?\d+)\b/i, // INV-2024-001
/\b(\d{7,})\b/, // 1579087430
];
for (const pattern of patterns) {
const match = clean.match(pattern);
if (match) return match[1];
}
return clean.replace(/[^A-Z0-9-]/gi, '').trim() || clean;
};
// Extract date (YYYY-MM-DD) from response
const extractDate = (s: string): string => {
let clean = s.replace(/\*\*/g, '').replace(/`/g, '').trim();
const isoMatch = clean.match(/(\d{4}-\d{2}-\d{2})/);
if (isoMatch) return isoMatch[1];
return clean.replace(/[^\d-]/g, '').trim();
};
// Extract currency
const extractCurrency = (s: string): string => {
const upper = s.toUpperCase();
if (upper.includes('EUR') || upper.includes('€')) return 'EUR';
if (upper.includes('USD') || upper.includes('$')) return 'USD';
if (upper.includes('GBP') || upper.includes('£')) return 'GBP';
return 'EUR';
};
return {
invoice_number: invoiceNum || '',
invoice_date: invoiceDate || '',
vendor_name: vendor || '',
currency: (currency || 'EUR').toUpperCase().replace(/[^A-Z]/g, '').slice(0, 3) || 'EUR',
net_amount: parseAmount(amountMatch[0] || ''),
vat_amount: parseAmount(amountMatch[1] || ''),
total_amount: parseAmount(amountMatch[2] || amountMatch[0] || ''),
invoice_number: extractInvoiceNumber(invoiceNum),
invoice_date: extractDate(invoiceDate),
vendor_name: vendor.replace(/\*\*/g, '').replace(/`/g, '').trim() || '',
currency: extractCurrency(currency),
net_amount: parseAmount(netAmount),
vat_amount: parseAmount(vatAmount),
total_amount: parseAmount(totalAmount),
};
}
@@ -296,7 +338,7 @@ tap.test('summary', async () => {
console.log(`\n======================================================`);
console.log(` Invoice Extraction Summary (Qwen3-VL Vision)`);
console.log(`======================================================`);
console.log(` Method: Qwen3-VL 8B Direct Vision (/no_think)`);
console.log(` Method: Multi-query (single pass)`);
console.log(` Passed: ${passedCount}/${total}`);
console.log(` Failed: ${failedCount}/${total}`);
console.log(` Accuracy: ${accuracy.toFixed(1)}%`);