2026-01-18 02:53:24 +00:00
/ * *
* Invoice extraction using Ministral 3 Vision ( Direct )
*
* NO PaddleOCR needed - Ministral 3 has built - in vision encoder :
* 1 . Convert PDF to images
* 2 . Send images directly to Ministral 3 via Ollama
* 3 . Extract structured JSON with native schema support
*
* This is the simplest possible pipeline .
* /
import { tap , expect } from '@git.zone/tstest/tapbundle' ;
import * as fs from 'fs' ;
import * as path from 'path' ;
import { execSync } from 'child_process' ;
import * as os from 'os' ;
import { ensureMinistral3 } from './helpers/docker.js' ;
const OLLAMA_URL = 'http://localhost:11434' ;
const VISION_MODEL = 'ministral-3:8b' ;
interface IInvoice {
invoice_number : string ;
invoice_date : string ;
vendor_name : string ;
currency : string ;
net_amount : number ;
vat_amount : number ;
total_amount : number ;
}
/ * *
* Convert PDF to PNG images using ImageMagick
* /
function convertPdfToImages ( pdfPath : string ) : string [ ] {
const tempDir = fs . mkdtempSync ( path . join ( os . tmpdir ( ) , 'pdf-convert-' ) ) ;
const outputPattern = path . join ( tempDir , 'page-%d.png' ) ;
try {
2026-01-18 03:35:05 +00:00
// High quality conversion: 300 DPI, max quality, sharpen for better OCR
2026-01-18 02:53:24 +00:00
execSync (
2026-01-18 03:35:05 +00:00
` convert -density 300 -quality 100 " ${ pdfPath } " -background white -alpha remove -sharpen 0x1 " ${ outputPattern } " ` ,
2026-01-18 02:53:24 +00:00
{ stdio : 'pipe' }
) ;
const files = fs . readdirSync ( tempDir ) . filter ( ( f ) = > f . endsWith ( '.png' ) ) . sort ( ) ;
const images : string [ ] = [ ] ;
for ( const file of files ) {
const imagePath = path . join ( tempDir , file ) ;
const imageData = fs . readFileSync ( imagePath ) ;
images . push ( imageData . toString ( 'base64' ) ) ;
}
return images ;
} finally {
fs . rmSync ( tempDir , { recursive : true , force : true } ) ;
}
}
/ * *
* Extract invoice data directly from images using Ministral 3 Vision
* /
async function extractInvoiceFromImages ( images : string [ ] ) : Promise < IInvoice > {
console . log ( ` [Vision] Processing ${ images . length } page(s) with Ministral 3 ` ) ;
// JSON schema for structured output
const invoiceSchema = {
type : 'object' ,
properties : {
invoice_number : { type : 'string' } ,
invoice_date : { type : 'string' } ,
vendor_name : { type : 'string' } ,
currency : { type : 'string' } ,
net_amount : { type : 'number' } ,
vat_amount : { type : 'number' } ,
total_amount : { type : 'number' } ,
} ,
required : [ 'invoice_number' , 'invoice_date' , 'vendor_name' , 'currency' , 'net_amount' , 'vat_amount' , 'total_amount' ] ,
} ;
2026-01-18 03:35:05 +00:00
const prompt = ` You are an expert invoice data extraction system. Carefully analyze this invoice document and extract the following fields with high precision.
2026-01-18 02:53:24 +00:00
2026-01-18 03:35:05 +00:00
INVOICE NUMBER :
- Look for labels : "Invoice No" , "Invoice #" , "Invoice Number" , "Rechnung Nr" , "Rechnungsnummer" , "Document No" , "Bill No" , "Reference"
- Usually alphanumeric , often starts with letters ( e . g . , R0014359508 , INV - 2024 - 001 )
- Located near the top of the invoice
2026-01-18 02:53:24 +00:00
2026-01-18 03:35:05 +00:00
INVOICE DATE :
- Look for labels : "Invoice Date" , "Date" , "Datum" , "Rechnungsdatum" , "Issue Date" , "Bill Date"
- Convert ANY date format to YYYY - MM - DD ( e . g . , 14 / 10 / 2021 → 2021 - 10 - 14 , Oct 14 , 2021 → 2021 - 10 - 14 )
- Usually near the invoice number
VENDOR NAME :
- The company ISSUING the invoice ( not the recipient )
- Found in letterhead , logo area , or header - typically the largest / most prominent company name
- Examples : "Hetzner Online GmbH" , "Adobe Inc" , "DigitalOcean LLC"
CURRENCY :
- Detect from symbols : € = EUR , $ = USD , £ = GBP
- Or from text : "EUR" , "USD" , "GBP"
- Default to EUR if unclear
AMOUNTS ( Critical - read carefully ! ) :
- total_amount : The FINAL amount due / payable - look for "Total" , "Grand Total" , "Amount Due" , "Balance Due" , "Gesamtbetrag" , "Endbetrag"
- net_amount : Subtotal BEFORE tax - look for "Subtotal" , "Net" , "Netto" , "excl. VAT"
- vat_amount : Tax amount - look for "VAT" , "Tax" , "MwSt" , "USt" , "19%" , "20%"
- For multi - page invoices : the FINAL totals are usually on the LAST page
Return ONLY valid JSON with the extracted values . ` ;
2026-01-18 02:53:24 +00:00
const response = await fetch ( ` ${ OLLAMA_URL } /api/chat ` , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( {
model : VISION_MODEL ,
messages : [
{
role : 'user' ,
content : prompt ,
images : images , // Send all page images
} ,
] ,
format : invoiceSchema ,
stream : true ,
options : {
2026-01-18 03:35:05 +00:00
num_predict : 1024 ,
2026-01-18 02:53:24 +00:00
temperature : 0.0 ,
} ,
} ) ,
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Ollama API error: ${ response . status } ` ) ;
}
const reader = response . body ? . getReader ( ) ;
if ( ! reader ) {
throw new Error ( 'No response body' ) ;
}
const decoder = new TextDecoder ( ) ;
let fullText = '' ;
while ( true ) {
const { done , value } = await reader . read ( ) ;
if ( done ) break ;
const chunk = decoder . decode ( value , { stream : true } ) ;
const lines = chunk . split ( '\n' ) . filter ( ( l ) = > l . trim ( ) ) ;
for ( const line of lines ) {
try {
const json = JSON . parse ( line ) ;
if ( json . message ? . content ) {
fullText += json . message . content ;
}
} catch {
// Skip invalid JSON lines
}
}
}
// Parse JSON response
let jsonStr = fullText . trim ( ) ;
if ( jsonStr . startsWith ( '```json' ) ) jsonStr = jsonStr . slice ( 7 ) ;
else if ( jsonStr . startsWith ( '```' ) ) jsonStr = jsonStr . slice ( 3 ) ;
if ( jsonStr . endsWith ( '```' ) ) jsonStr = jsonStr . slice ( 0 , - 3 ) ;
jsonStr = jsonStr . trim ( ) ;
const startIdx = jsonStr . indexOf ( '{' ) ;
const endIdx = jsonStr . lastIndexOf ( '}' ) + 1 ;
if ( startIdx < 0 || endIdx <= startIdx ) {
throw new Error ( ` No JSON found: ${ fullText . substring ( 0 , 200 ) } ` ) ;
}
const parsed = JSON . parse ( jsonStr . substring ( startIdx , endIdx ) ) ;
return {
invoice_number : parsed.invoice_number || null ,
invoice_date : parsed.invoice_date || null ,
vendor_name : parsed.vendor_name || null ,
currency : parsed.currency || 'EUR' ,
net_amount : parseFloat ( parsed . net_amount ) || 0 ,
vat_amount : parseFloat ( parsed . vat_amount ) || 0 ,
total_amount : parseFloat ( parsed . total_amount ) || 0 ,
} ;
}
/ * *
* Normalize date to YYYY - MM - DD
* /
function normalizeDate ( dateStr : string | null ) : string {
if ( ! dateStr ) return '' ;
if ( /^\d{4}-\d{2}-\d{2}$/ . test ( dateStr ) ) return dateStr ;
const monthMap : Record < string , string > = {
JAN : '01' , FEB : '02' , MAR : '03' , APR : '04' , MAY : '05' , JUN : '06' ,
JUL : '07' , AUG : '08' , SEP : '09' , OCT : '10' , NOV : '11' , DEC : '12' ,
} ;
let match = dateStr . match ( /^(\d{1,2})-([A-Z]{3})-(\d{4})$/i ) ;
if ( match ) {
return ` ${ match [ 3 ] } - ${ monthMap [ match [ 2 ] . toUpperCase ( ) ] || '01' } - ${ match [ 1 ] . padStart ( 2 , '0' ) } ` ;
}
match = dateStr . match ( /^(\d{1,2})[\/.](\d{1,2})[\/.](\d{4})$/ ) ;
if ( match ) {
return ` ${ match [ 3 ] } - ${ match [ 2 ] . padStart ( 2 , '0' ) } - ${ match [ 1 ] . padStart ( 2 , '0' ) } ` ;
}
return dateStr ;
}
/ * *
* Compare extracted vs expected
* /
function compareInvoice ( extracted : IInvoice , expected : IInvoice ) : { match : boolean ; errors : string [ ] } {
const errors : string [ ] = [ ] ;
const extNum = extracted . invoice_number ? . replace ( /\s/g , '' ) . toLowerCase ( ) || '' ;
const expNum = expected . invoice_number ? . replace ( /\s/g , '' ) . toLowerCase ( ) || '' ;
if ( extNum !== expNum ) {
errors . push ( ` invoice_number: expected " ${ expected . invoice_number } ", got " ${ extracted . invoice_number } " ` ) ;
}
if ( normalizeDate ( extracted . invoice_date ) !== normalizeDate ( expected . invoice_date ) ) {
errors . push ( ` invoice_date: expected " ${ expected . invoice_date } ", got " ${ extracted . invoice_date } " ` ) ;
}
if ( Math . abs ( extracted . total_amount - expected . total_amount ) > 0.02 ) {
errors . push ( ` total_amount: expected ${ expected . total_amount } , got ${ extracted . total_amount } ` ) ;
}
if ( extracted . currency ? . toUpperCase ( ) !== expected . currency ? . toUpperCase ( ) ) {
errors . push ( ` currency: expected " ${ expected . currency } ", got " ${ extracted . currency } " ` ) ;
}
return { match : errors.length === 0 , errors } ;
}
/ * *
* Find test cases
* /
function findTestCases ( ) : Array < { name : string ; pdfPath : string ; jsonPath : string } > {
const testDir = path . join ( process . cwd ( ) , '.nogit/invoices' ) ;
if ( ! fs . existsSync ( testDir ) ) return [ ] ;
const files = fs . readdirSync ( testDir ) ;
const testCases : Array < { name : string ; pdfPath : string ; jsonPath : string } > = [ ] ;
for ( const pdf of files . filter ( ( f ) = > f . endsWith ( '.pdf' ) ) ) {
const baseName = pdf . replace ( '.pdf' , '' ) ;
const jsonFile = ` ${ baseName } .json ` ;
if ( files . includes ( jsonFile ) ) {
testCases . push ( {
name : baseName ,
pdfPath : path.join ( testDir , pdf ) ,
jsonPath : path.join ( testDir , jsonFile ) ,
} ) ;
}
}
return testCases . sort ( ( a , b ) = > a . name . localeCompare ( b . name ) ) ;
}
// Tests
tap . test ( 'setup: ensure Ministral 3 is running' , async ( ) = > {
console . log ( '\n[Setup] Checking Ministral 3...\n' ) ;
const ok = await ensureMinistral3 ( ) ;
expect ( ok ) . toBeTrue ( ) ;
console . log ( '\n[Setup] Ready!\n' ) ;
} ) ;
const testCases = findTestCases ( ) ;
console . log ( ` \ nFound ${ testCases . length } invoice test cases (Ministral 3 Vision Direct) \ n ` ) ;
let passedCount = 0 ;
let failedCount = 0 ;
const times : number [ ] = [ ] ;
for ( const testCase of testCases ) {
tap . test ( ` should extract invoice: ${ testCase . name } ` , async ( ) = > {
const expected : IInvoice = JSON . parse ( fs . readFileSync ( testCase . jsonPath , 'utf-8' ) ) ;
console . log ( ` \ n=== ${ testCase . name } === ` ) ;
console . log ( ` Expected: ${ expected . invoice_number } | ${ expected . invoice_date } | ${ expected . total_amount } ${ expected . currency } ` ) ;
const start = Date . now ( ) ;
const images = convertPdfToImages ( testCase . pdfPath ) ;
console . log ( ` Pages: ${ images . length } ` ) ;
2026-01-18 03:35:05 +00:00
const extracted = await extractInvoiceFromImages ( images ) ;
console . log ( ` Extracted: ${ extracted . invoice_number } | ${ extracted . invoice_date } | ${ extracted . total_amount } ${ extracted . currency } ` ) ;
2026-01-18 02:53:24 +00:00
const elapsed = Date . now ( ) - start ;
times . push ( elapsed ) ;
const result = compareInvoice ( extracted , expected ) ;
if ( result . match ) {
passedCount ++ ;
console . log ( ` Result: MATCH ( ${ ( elapsed / 1000 ) . toFixed ( 1 ) } s) ` ) ;
} else {
failedCount ++ ;
console . log ( ` Result: MISMATCH ( ${ ( elapsed / 1000 ) . toFixed ( 1 ) } s) ` ) ;
result . errors . forEach ( ( e ) = > console . log ( ` - ${ e } ` ) ) ;
}
expect ( result . match ) . toBeTrue ( ) ;
} ) ;
}
tap . test ( 'summary' , async ( ) = > {
const total = testCases . length ;
const accuracy = total > 0 ? ( passedCount / total ) * 100 : 0 ;
const totalTime = times . reduce ( ( a , b ) = > a + b , 0 ) / 1000 ;
const avgTime = times . length > 0 ? totalTime / times.length : 0 ;
console . log ( ` \ n====================================================== ` ) ;
console . log ( ` Invoice Extraction Summary (Ministral 3 Vision) ` ) ;
console . log ( ` ====================================================== ` ) ;
console . log ( ` Method: Ministral 3 8B Vision (Direct) ` ) ;
console . log ( ` Passed: ${ passedCount } / ${ total } ` ) ;
console . log ( ` Failed: ${ failedCount } / ${ total } ` ) ;
console . log ( ` Accuracy: ${ accuracy . toFixed ( 1 ) } % ` ) ;
console . log ( ` ------------------------------------------------------ ` ) ;
console . log ( ` Total time: ${ totalTime . toFixed ( 1 ) } s ` ) ;
console . log ( ` Avg per inv: ${ avgTime . toFixed ( 1 ) } s ` ) ;
console . log ( ` ====================================================== \ n ` ) ;
} ) ;
export default tap . start ( ) ;