2026-01-18 03:35:05 +00:00
/ * *
2026-01-18 04:17:30 +00:00
* Invoice extraction using Qwen3 - VL 8 B Vision ( Direct )
2026-01-18 03:35:05 +00:00
*
2026-01-18 11:26:38 +00:00
* Multi - query approach : 5 parallel simple queries to avoid token exhaustion .
* Single pass , no consensus voting .
2026-01-18 03:35:05 +00:00
* /
import { tap , expect } from '@git.zone/tstest/tapbundle' ;
import * as fs from 'fs' ;
import * as path from 'path' ;
import { execSync } from 'child_process' ;
import * as os from 'os' ;
2026-01-18 04:17:30 +00:00
import { ensureMiniCpm } from './helpers/docker.js' ;
2026-01-18 03:35:05 +00:00
const OLLAMA_URL = 'http://localhost:11434' ;
const VISION_MODEL = 'qwen3-vl:8b' ;
interface IInvoice {
invoice_number : string ;
invoice_date : string ;
vendor_name : string ;
currency : string ;
net_amount : number ;
vat_amount : number ;
total_amount : number ;
}
/ * *
* Convert PDF to PNG images using ImageMagick
* /
function convertPdfToImages ( pdfPath : string ) : string [ ] {
const tempDir = fs . mkdtempSync ( path . join ( os . tmpdir ( ) , 'pdf-convert-' ) ) ;
const outputPattern = path . join ( tempDir , 'page-%d.png' ) ;
try {
// 150 DPI is sufficient for invoice extraction, reduces context size
execSync (
` convert -density 150 -quality 90 " ${ pdfPath } " -background white -alpha remove " ${ outputPattern } " ` ,
{ stdio : 'pipe' }
) ;
const files = fs . readdirSync ( tempDir ) . filter ( ( f ) = > f . endsWith ( '.png' ) ) . sort ( ) ;
const images : string [ ] = [ ] ;
for ( const file of files ) {
const imagePath = path . join ( tempDir , file ) ;
const imageData = fs . readFileSync ( imagePath ) ;
images . push ( imageData . toString ( 'base64' ) ) ;
}
return images ;
} finally {
fs . rmSync ( tempDir , { recursive : true , force : true } ) ;
}
}
/ * *
2026-01-18 04:50:57 +00:00
* Query Qwen3 - VL for a single field
* Uses simple prompts to minimize thinking tokens
2026-01-18 03:35:05 +00:00
* /
2026-01-18 04:50:57 +00:00
async function queryField ( images : string [ ] , question : string ) : Promise < string > {
2026-01-18 03:35:05 +00:00
const response = await fetch ( ` ${ OLLAMA_URL } /api/chat ` , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( {
model : VISION_MODEL ,
2026-01-18 04:17:30 +00:00
messages : [ {
role : 'user' ,
2026-01-18 11:26:38 +00:00
content : ` ${ question } Reply with just the value, nothing else. ` ,
2026-01-18 04:50:57 +00:00
images : images ,
2026-01-18 04:17:30 +00:00
} ] ,
stream : false ,
2026-01-18 03:35:05 +00:00
options : {
2026-01-18 04:50:57 +00:00
num_predict : 500 ,
2026-01-18 04:28:57 +00:00
temperature : 0.1 ,
2026-01-18 03:35:05 +00:00
} ,
} ) ,
} ) ;
if ( ! response . ok ) {
2026-01-18 04:50:57 +00:00
throw new Error ( ` Ollama API error: ${ response . status } ` ) ;
2026-01-18 03:35:05 +00:00
}
2026-01-18 04:17:30 +00:00
const data = await response . json ( ) ;
2026-01-18 04:50:57 +00:00
return ( data . message ? . content || '' ) . trim ( ) ;
}
2026-01-18 03:35:05 +00:00
2026-01-18 04:50:57 +00:00
/ * *
* Extract invoice data using multiple simple queries
* Each query asks for 1 - 2 fields to minimize thinking tokens
* ( Qwen3 ' s thinking mode uses all tokens on complex prompts )
* /
async function extractInvoiceFromImages ( images : string [ ] ) : Promise < IInvoice > {
console . log ( ` [Vision] Processing ${ images . length } page(s) with Qwen3-VL (multi-query) ` ) ;
// Query each field separately to avoid excessive thinking tokens
2026-01-18 11:26:38 +00:00
// Use explicit questions to avoid confusion between similar fields
// Log each result as it comes in (not waiting for all to complete)
const queryAndLog = async ( name : string , question : string ) : Promise < string > = > {
const result = await queryField ( images , question ) ;
console . log ( ` [Query] ${ name } : " ${ result } " ` ) ;
return result ;
} ;
2026-01-18 04:50:57 +00:00
2026-01-18 11:26:38 +00:00
const [ invoiceNum , invoiceDate , vendor , currency , totalAmount , netAmount , vatAmount ] = await Promise . all ( [
queryAndLog ( 'Invoice Number' , 'What is the INVOICE NUMBER (not VAT number, not customer ID)? Look for "Invoice No", "Invoice #", "Rechnung Nr", "Facture". Just the number/code.' ) ,
queryAndLog ( 'Invoice Date ' , 'What is the INVOICE DATE (not due date, not delivery date)? The date the invoice was issued. Format: YYYY-MM-DD' ) ,
queryAndLog ( 'Vendor ' , 'What company ISSUED this invoice (the seller/vendor, not the buyer)? Look at the letterhead or "From" section.' ) ,
queryAndLog ( 'Currency ' , 'What CURRENCY is used? Look for € (EUR), $ (USD), or £ (GBP). Answer with 3-letter code: EUR, USD, or GBP' ) ,
queryAndLog ( 'Total Amount ' , 'What is the TOTAL AMOUNT INCLUDING TAX (the final amount to pay, with VAT/tax included)? Just the number, e.g. 24.99' ) ,
queryAndLog ( 'Net Amount ' , 'What is the NET AMOUNT (subtotal before VAT/tax)? Just the number, e.g. 20.99' ) ,
queryAndLog ( 'VAT Amount ' , 'What is the VAT/TAX AMOUNT? Just the number, e.g. 4.00' ) ,
] ) ;
2026-01-18 04:50:57 +00:00
2026-01-18 11:26:38 +00:00
// Parse amount from string (handles European format)
2026-01-18 04:50:57 +00:00
const parseAmount = ( s : string ) : number = > {
if ( ! s ) return 0 ;
2026-01-18 11:26:38 +00:00
// Extract number from the response
const match = s . match ( /([\d.,]+)/ ) ;
if ( ! match ) return 0 ;
const numStr = match [ 1 ] ;
2026-01-18 04:50:57 +00:00
// Handle European format: 1.234,56 → 1234.56
2026-01-18 11:26:38 +00:00
const normalized = numStr . includes ( ',' ) && numStr . indexOf ( ',' ) > numStr . lastIndexOf ( '.' )
? numStr . replace ( /\./g , '' ) . replace ( ',' , '.' )
: numStr . replace ( /,/g , '' ) ;
2026-01-18 04:50:57 +00:00
return parseFloat ( normalized ) || 0 ;
} ;
2026-01-18 03:35:05 +00:00
2026-01-18 11:26:38 +00:00
// Extract invoice number from potentially verbose response
const extractInvoiceNumber = ( s : string ) : string = > {
let clean = s . replace ( /\*\*/g , '' ) . replace ( /`/g , '' ) . trim ( ) ;
// Look for common invoice number patterns
const patterns = [
/\b([A-Z]{2,3}\d{10,})\b/i , // IEE2022006460244
/\b([A-Z]\d{8,})\b/i , // R0014359508
/\b(INV[-\s]?\d{4}[-\s]?\d+)\b/i , // INV-2024-001
/\b(\d{7,})\b/ , // 1579087430
] ;
for ( const pattern of patterns ) {
const match = clean . match ( pattern ) ;
if ( match ) return match [ 1 ] ;
}
return clean . replace ( /[^A-Z0-9-]/gi , '' ) . trim ( ) || clean ;
} ;
// Extract date (YYYY-MM-DD) from response
const extractDate = ( s : string ) : string = > {
let clean = s . replace ( /\*\*/g , '' ) . replace ( /`/g , '' ) . trim ( ) ;
const isoMatch = clean . match ( /(\d{4}-\d{2}-\d{2})/ ) ;
if ( isoMatch ) return isoMatch [ 1 ] ;
return clean . replace ( /[^\d-]/g , '' ) . trim ( ) ;
} ;
// Extract currency
const extractCurrency = ( s : string ) : string = > {
const upper = s . toUpperCase ( ) ;
if ( upper . includes ( 'EUR' ) || upper . includes ( '€' ) ) return 'EUR' ;
if ( upper . includes ( 'USD' ) || upper . includes ( '$' ) ) return 'USD' ;
if ( upper . includes ( 'GBP' ) || upper . includes ( '£' ) ) return 'GBP' ;
return 'EUR' ;
} ;
2026-01-18 03:35:05 +00:00
return {
2026-01-18 11:26:38 +00:00
invoice_number : extractInvoiceNumber ( invoiceNum ) ,
invoice_date : extractDate ( invoiceDate ) ,
vendor_name : vendor.replace ( /\*\*/g , '' ) . replace ( /`/g , '' ) . trim ( ) || '' ,
currency : extractCurrency ( currency ) ,
net_amount : parseAmount ( netAmount ) ,
vat_amount : parseAmount ( vatAmount ) ,
total_amount : parseAmount ( totalAmount ) ,
2026-01-18 03:35:05 +00:00
} ;
}
/ * *
* Normalize date to YYYY - MM - DD
* /
function normalizeDate ( dateStr : string | null ) : string {
if ( ! dateStr ) return '' ;
if ( /^\d{4}-\d{2}-\d{2}$/ . test ( dateStr ) ) return dateStr ;
const monthMap : Record < string , string > = {
JAN : '01' , FEB : '02' , MAR : '03' , APR : '04' , MAY : '05' , JUN : '06' ,
JUL : '07' , AUG : '08' , SEP : '09' , OCT : '10' , NOV : '11' , DEC : '12' ,
} ;
let match = dateStr . match ( /^(\d{1,2})-([A-Z]{3})-(\d{4})$/i ) ;
if ( match ) {
return ` ${ match [ 3 ] } - ${ monthMap [ match [ 2 ] . toUpperCase ( ) ] || '01' } - ${ match [ 1 ] . padStart ( 2 , '0' ) } ` ;
}
match = dateStr . match ( /^(\d{1,2})[\/.](\d{1,2})[\/.](\d{4})$/ ) ;
if ( match ) {
return ` ${ match [ 3 ] } - ${ match [ 2 ] . padStart ( 2 , '0' ) } - ${ match [ 1 ] . padStart ( 2 , '0' ) } ` ;
}
return dateStr ;
}
/ * *
* Compare extracted vs expected
* /
function compareInvoice ( extracted : IInvoice , expected : IInvoice ) : { match : boolean ; errors : string [ ] } {
const errors : string [ ] = [ ] ;
const extNum = extracted . invoice_number ? . replace ( /\s/g , '' ) . toLowerCase ( ) || '' ;
const expNum = expected . invoice_number ? . replace ( /\s/g , '' ) . toLowerCase ( ) || '' ;
if ( extNum !== expNum ) {
errors . push ( ` invoice_number: expected " ${ expected . invoice_number } ", got " ${ extracted . invoice_number } " ` ) ;
}
if ( normalizeDate ( extracted . invoice_date ) !== normalizeDate ( expected . invoice_date ) ) {
errors . push ( ` invoice_date: expected " ${ expected . invoice_date } ", got " ${ extracted . invoice_date } " ` ) ;
}
if ( Math . abs ( extracted . total_amount - expected . total_amount ) > 0.02 ) {
errors . push ( ` total_amount: expected ${ expected . total_amount } , got ${ extracted . total_amount } ` ) ;
}
if ( extracted . currency ? . toUpperCase ( ) !== expected . currency ? . toUpperCase ( ) ) {
errors . push ( ` currency: expected " ${ expected . currency } ", got " ${ extracted . currency } " ` ) ;
}
return { match : errors.length === 0 , errors } ;
}
/ * *
* Find test cases
* /
function findTestCases ( ) : Array < { name : string ; pdfPath : string ; jsonPath : string } > {
const testDir = path . join ( process . cwd ( ) , '.nogit/invoices' ) ;
if ( ! fs . existsSync ( testDir ) ) return [ ] ;
const files = fs . readdirSync ( testDir ) ;
const testCases : Array < { name : string ; pdfPath : string ; jsonPath : string } > = [ ] ;
for ( const pdf of files . filter ( ( f ) = > f . endsWith ( '.pdf' ) ) ) {
const baseName = pdf . replace ( '.pdf' , '' ) ;
const jsonFile = ` ${ baseName } .json ` ;
if ( files . includes ( jsonFile ) ) {
testCases . push ( {
name : baseName ,
pdfPath : path.join ( testDir , pdf ) ,
jsonPath : path.join ( testDir , jsonFile ) ,
} ) ;
}
}
return testCases . sort ( ( a , b ) = > a . name . localeCompare ( b . name ) ) ;
}
2026-01-18 04:17:30 +00:00
/ * *
* Ensure Qwen3 - VL 8 B model is available
* /
async function ensureQwen3Vl ( ) : Promise < boolean > {
try {
const response = await fetch ( ` ${ OLLAMA_URL } /api/tags ` ) ;
if ( response . ok ) {
const data = await response . json ( ) ;
const models = data . models || [ ] ;
if ( models . some ( ( m : { name : string } ) = > m . name === VISION_MODEL ) ) {
console . log ( ` [Ollama] Model already available: ${ VISION_MODEL } ` ) ;
return true ;
}
}
} catch {
console . log ( '[Ollama] Cannot check models' ) ;
return false ;
}
console . log ( ` [Ollama] Pulling model: ${ VISION_MODEL } ... ` ) ;
const pullResponse = await fetch ( ` ${ OLLAMA_URL } /api/pull ` , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( { name : VISION_MODEL , stream : false } ) ,
} ) ;
return pullResponse . ok ;
}
2026-01-18 03:35:05 +00:00
// Tests
tap . test ( 'setup: ensure Qwen3-VL is running' , async ( ) = > {
2026-01-18 04:17:30 +00:00
console . log ( '\n[Setup] Checking Qwen3-VL 8B...\n' ) ;
// Ensure Ollama service is running
const ollamaOk = await ensureMiniCpm ( ) ;
expect ( ollamaOk ) . toBeTrue ( ) ;
// Ensure Qwen3-VL 8B model
const visionOk = await ensureQwen3Vl ( ) ;
expect ( visionOk ) . toBeTrue ( ) ;
2026-01-18 03:35:05 +00:00
console . log ( '\n[Setup] Ready!\n' ) ;
} ) ;
const testCases = findTestCases ( ) ;
console . log ( ` \ nFound ${ testCases . length } invoice test cases (Qwen3-VL Vision) \ n ` ) ;
let passedCount = 0 ;
let failedCount = 0 ;
const times : number [ ] = [ ] ;
for ( const testCase of testCases ) {
tap . test ( ` should extract invoice: ${ testCase . name } ` , async ( ) = > {
const expected : IInvoice = JSON . parse ( fs . readFileSync ( testCase . jsonPath , 'utf-8' ) ) ;
console . log ( ` \ n=== ${ testCase . name } === ` ) ;
console . log ( ` Expected: ${ expected . invoice_number } | ${ expected . invoice_date } | ${ expected . total_amount } ${ expected . currency } ` ) ;
const start = Date . now ( ) ;
const images = convertPdfToImages ( testCase . pdfPath ) ;
console . log ( ` Pages: ${ images . length } ` ) ;
const extracted = await extractInvoiceFromImages ( images ) ;
console . log ( ` Extracted: ${ extracted . invoice_number } | ${ extracted . invoice_date } | ${ extracted . total_amount } ${ extracted . currency } ` ) ;
const elapsed = Date . now ( ) - start ;
times . push ( elapsed ) ;
const result = compareInvoice ( extracted , expected ) ;
if ( result . match ) {
passedCount ++ ;
console . log ( ` Result: MATCH ( ${ ( elapsed / 1000 ) . toFixed ( 1 ) } s) ` ) ;
} else {
failedCount ++ ;
console . log ( ` Result: MISMATCH ( ${ ( elapsed / 1000 ) . toFixed ( 1 ) } s) ` ) ;
result . errors . forEach ( ( e ) = > console . log ( ` - ${ e } ` ) ) ;
}
expect ( result . match ) . toBeTrue ( ) ;
} ) ;
}
tap . test ( 'summary' , async ( ) = > {
const total = testCases . length ;
const accuracy = total > 0 ? ( passedCount / total ) * 100 : 0 ;
const totalTime = times . reduce ( ( a , b ) = > a + b , 0 ) / 1000 ;
const avgTime = times . length > 0 ? totalTime / times.length : 0 ;
console . log ( ` \ n====================================================== ` ) ;
console . log ( ` Invoice Extraction Summary (Qwen3-VL Vision) ` ) ;
console . log ( ` ====================================================== ` ) ;
2026-01-18 11:26:38 +00:00
console . log ( ` Method: Multi-query (single pass) ` ) ;
2026-01-18 03:35:05 +00:00
console . log ( ` Passed: ${ passedCount } / ${ total } ` ) ;
console . log ( ` Failed: ${ failedCount } / ${ total } ` ) ;
console . log ( ` Accuracy: ${ accuracy . toFixed ( 1 ) } % ` ) ;
console . log ( ` ------------------------------------------------------ ` ) ;
console . log ( ` Total time: ${ totalTime . toFixed ( 1 ) } s ` ) ;
console . log ( ` Avg per inv: ${ avgTime . toFixed ( 1 ) } s ` ) ;
console . log ( ` ====================================================== \ n ` ) ;
} ) ;
export default tap . start ( ) ;