feat(paddleocr-vl): add PaddleOCR-VL full pipeline Docker image and API server, plus integration tests and docker helpers

2026-01-17 20:22:23 +00:00
parent addae20cbd
commit 80e6866442
12 changed files with 2414 additions and 21 deletions
--- a/test/helpers/docker.ts
+++ b/test/helpers/docker.ts
@@ -0,0 +1,297 @@
+import { execSync } from 'child_process';
+
+// Project container names (only manage these)
+const PROJECT_CONTAINERS = [
+  'paddleocr-vl-test',
+  'paddleocr-vl-gpu-test',
+  'paddleocr-vl-cpu-test',
+  'paddleocr-vl-full-test',
+  'minicpm-test',
+];
+
+// Image configurations
+export interface IImageConfig {
+  name: string;
+  dockerfile: string;
+  buildContext: string;
+  containerName: string;
+  ports: string[];
+  volumes?: string[];
+  gpus?: boolean;
+  healthEndpoint?: string;
+  healthTimeout?: number;
+}
+
+export const IMAGES = {
+  paddleocrVlGpu: {
+    name: 'paddleocr-vl-gpu',
+    dockerfile: 'Dockerfile_paddleocr_vl_gpu',
+    buildContext: '.',
+    containerName: 'paddleocr-vl-test',
+    ports: ['8000:8000'],
+    volumes: ['ht-huggingface-cache:/root/.cache/huggingface'],
+    gpus: true,
+    healthEndpoint: 'http://localhost:8000/health',
+    healthTimeout: 300000, // 5 minutes for model loading
+  } as IImageConfig,
+
+  paddleocrVlCpu: {
+    name: 'paddleocr-vl-cpu',
+    dockerfile: 'Dockerfile_paddleocr_vl_cpu',
+    buildContext: '.',
+    containerName: 'paddleocr-vl-test',
+    ports: ['8000:8000'],
+    volumes: ['ht-huggingface-cache:/root/.cache/huggingface'],
+    gpus: false,
+    healthEndpoint: 'http://localhost:8000/health',
+    healthTimeout: 300000,
+  } as IImageConfig,
+
+  minicpm: {
+    name: 'minicpm45v',
+    dockerfile: 'Dockerfile_minicpm45v',
+    buildContext: '.',
+    containerName: 'minicpm-test',
+    ports: ['11434:11434'],
+    volumes: ['ht-ollama-models:/root/.ollama'],
+    gpus: true,
+    healthEndpoint: 'http://localhost:11434/api/tags',
+    healthTimeout: 120000,
+  } as IImageConfig,
+
+  // Full PaddleOCR-VL pipeline with PP-DocLayoutV2 + structured JSON output
+  paddleocrVlFull: {
+    name: 'paddleocr-vl-full',
+    dockerfile: 'Dockerfile_paddleocr_vl_full',
+    buildContext: '.',
+    containerName: 'paddleocr-vl-full-test',
+    ports: ['8000:8000'],
+    volumes: [
+      'ht-huggingface-cache:/root/.cache/huggingface',
+      'ht-paddleocr-cache:/root/.paddleocr',
+    ],
+    gpus: true,
+    healthEndpoint: 'http://localhost:8000/health',
+    healthTimeout: 600000, // 10 minutes for model loading (vLLM + PP-DocLayoutV2)
+  } as IImageConfig,
+};
+
+/**
+ * Execute a shell command and return output
+ */
+function exec(command: string, silent = false): string {
+  try {
+    return execSync(command, {
+      encoding: 'utf-8',
+      stdio: silent ? 'pipe' : 'inherit',
+    });
+  } catch (err: unknown) {
+    if (silent) return '';
+    throw err;
+  }
+}
+
+/**
+ * Check if a Docker image exists locally
+ */
+export function imageExists(imageName: string): boolean {
+  const result = exec(`docker images -q ${imageName}`, true);
+  return result.trim().length > 0;
+}
+
+/**
+ * Check if a container is running
+ */
+export function isContainerRunning(containerName: string): boolean {
+  const result = exec(`docker ps --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
+  return result.trim() === containerName;
+}
+
+/**
+ * Check if a container exists (running or stopped)
+ */
+export function containerExists(containerName: string): boolean {
+  const result = exec(`docker ps -a --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
+  return result.trim() === containerName;
+}
+
+/**
+ * Stop and remove a container
+ */
+export function removeContainer(containerName: string): void {
+  if (containerExists(containerName)) {
+    console.log(`[Docker] Removing container: ${containerName}`);
+    exec(`docker rm -f ${containerName}`, true);
+  }
+}
+
+/**
+ * Stop all project containers that conflict with the required one
+ */
+export function stopConflictingContainers(requiredContainer: string, requiredPort: string): void {
+  // Stop project containers using the same port
+  for (const container of PROJECT_CONTAINERS) {
+    if (container === requiredContainer) continue;
+
+    if (isContainerRunning(container)) {
+      // Check if this container uses the same port
+      const ports = exec(`docker port ${container} 2>/dev/null || true`, true);
+      if (ports.includes(requiredPort.split(':')[0])) {
+        console.log(`[Docker] Stopping conflicting container: ${container}`);
+        exec(`docker stop ${container}`, true);
+      }
+    }
+  }
+}
+
+/**
+ * Build a Docker image
+ */
+export function buildImage(config: IImageConfig): void {
+  console.log(`[Docker] Building image: ${config.name}`);
+  const cmd = `docker build --load -f ${config.dockerfile} -t ${config.name} ${config.buildContext}`;
+  exec(cmd);
+}
+
+/**
+ * Start a container from an image
+ */
+export function startContainer(config: IImageConfig): void {
+  // Remove existing container if it exists
+  removeContainer(config.containerName);
+
+  console.log(`[Docker] Starting container: ${config.containerName}`);
+
+  const portArgs = config.ports.map((p) => `-p ${p}`).join(' ');
+  const volumeArgs = config.volumes?.map((v) => `-v ${v}`).join(' ') || '';
+  const gpuArgs = config.gpus ? '--gpus all' : '';
+
+  const cmd = `docker run -d --name ${config.containerName} ${gpuArgs} ${portArgs} ${volumeArgs} ${config.name}`;
+  exec(cmd);
+}
+
+/**
+ * Wait for a container to become healthy
+ */
+export async function waitForHealth(
+  endpoint: string,
+  timeoutMs: number = 120000,
+  intervalMs: number = 5000
+): Promise<boolean> {
+  const startTime = Date.now();
+  console.log(`[Docker] Waiting for health: ${endpoint}`);
+
+  while (Date.now() - startTime < timeoutMs) {
+    try {
+      const response = await fetch(endpoint, {
+        method: 'GET',
+        signal: AbortSignal.timeout(5000),
+      });
+      if (response.ok) {
+        console.log(`[Docker] Service healthy!`);
+        return true;
+      }
+    } catch {
+      // Service not ready yet
+    }
+
+    const elapsed = Math.round((Date.now() - startTime) / 1000);
+    console.log(`[Docker] Waiting... (${elapsed}s)`);
+    await new Promise((resolve) => setTimeout(resolve, intervalMs));
+  }
+
+  console.log(`[Docker] Health check timeout after ${timeoutMs / 1000}s`);
+  return false;
+}
+
+/**
+ * Ensure a service is running and healthy
+ * - Builds image if missing
+ * - Stops conflicting project containers
+ * - Starts container if not running
+ * - Waits for health check
+ */
+export async function ensureService(config: IImageConfig): Promise<boolean> {
+  console.log(`\n[Docker] Ensuring service: ${config.name}`);
+
+  // Build image if it doesn't exist
+  if (!imageExists(config.name)) {
+    console.log(`[Docker] Image not found, building...`);
+    buildImage(config);
+  }
+
+  // Stop conflicting containers on the same port
+  const mainPort = config.ports[0];
+  stopConflictingContainers(config.containerName, mainPort);
+
+  // Start container if not running
+  if (!isContainerRunning(config.containerName)) {
+    startContainer(config);
+  } else {
+    console.log(`[Docker] Container already running: ${config.containerName}`);
+  }
+
+  // Wait for health
+  if (config.healthEndpoint) {
+    return waitForHealth(config.healthEndpoint, config.healthTimeout);
+  }
+
+  return true;
+}
+
+/**
+ * Ensure PaddleOCR-VL GPU service is running
+ */
+export async function ensurePaddleOcrVlGpu(): Promise<boolean> {
+  return ensureService(IMAGES.paddleocrVlGpu);
+}
+
+/**
+ * Ensure PaddleOCR-VL CPU service is running
+ */
+export async function ensurePaddleOcrVlCpu(): Promise<boolean> {
+  return ensureService(IMAGES.paddleocrVlCpu);
+}
+
+/**
+ * Ensure MiniCPM service is running
+ */
+export async function ensureMiniCpm(): Promise<boolean> {
+  return ensureService(IMAGES.minicpm);
+}
+
+/**
+ * Check if GPU is available
+ */
+export function isGpuAvailable(): boolean {
+  try {
+    const result = exec('nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null', true);
+    return result.trim().length > 0;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Ensure PaddleOCR-VL service (auto-detect GPU/CPU)
+ */
+export async function ensurePaddleOcrVl(): Promise<boolean> {
+  if (isGpuAvailable()) {
+    console.log('[Docker] GPU detected, using GPU image');
+    return ensurePaddleOcrVlGpu();
+  } else {
+    console.log('[Docker] No GPU detected, using CPU image');
+    return ensurePaddleOcrVlCpu();
+  }
+}
+
+/**
+ * Ensure PaddleOCR-VL Full Pipeline service (PP-DocLayoutV2 + structured output)
+ * This is the recommended service for production use - outputs structured JSON/Markdown
+ */
+export async function ensurePaddleOcrVlFull(): Promise<boolean> {
+  if (!isGpuAvailable()) {
+    console.log('[Docker] WARNING: Full pipeline requires GPU, but none detected');
+  }
+  return ensureService(IMAGES.paddleocrVlFull);
+}
--- a/test/test.bankstatements.combined.ts
+++ b/test/test.bankstatements.combined.ts
@@ -1,15 +1,23 @@
+/**
+ * Bank statement extraction test using MiniCPM-V (visual) + PaddleOCR-VL (table recognition)
+ *
+ * This is the combined/dual-VLM approach that uses both models for consensus:
+ *   - MiniCPM-V for visual extraction
+ *   - PaddleOCR-VL for table recognition
+ */
 import { tap, expect } from '@git.zone/tstest/tapbundle';
 import * as fs from 'fs';
 import * as path from 'path';
 import { execSync } from 'child_process';
 import * as os from 'os';
+import { ensurePaddleOcrVl, ensureMiniCpm } from './helpers/docker.js';

 // Service URLs
 const OLLAMA_URL = 'http://localhost:11434';
 const PADDLEOCR_VL_URL = 'http://localhost:8000';

 // Models
-const MINICPM_MODEL = 'openbmb/minicpm-v4.5:q8_0';
+const MINICPM_MODEL = 'minicpm-v:latest';
 const PADDLEOCR_VL_MODEL = 'paddleocr-vl';

 // Prompt for MiniCPM-V visual extraction
@@ -477,11 +485,18 @@ function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: strin

 // Tests

-tap.test('should connect to Ollama API', async () => {
-  const response = await fetch(`${OLLAMA_URL}/api/tags`);
-  expect(response.ok).toBeTrue();
-  const data = await response.json();
-  expect(data.models).toBeArray();
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure PaddleOCR-VL is running (auto-detects GPU/CPU)
+  const paddleOk = await ensurePaddleOcrVl();
+  expect(paddleOk).toBeTrue();
+
+  // Ensure MiniCPM is running
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
 });

 tap.test('should have MiniCPM-V 4.5 model loaded', async () => {
@@ -494,8 +509,7 @@ tap.test('should have MiniCPM-V 4.5 model loaded', async () => {
 tap.test('should check PaddleOCR-VL availability', async () => {
  const available = await isPaddleOCRVLAvailable();
  console.log(`PaddleOCR-VL available: ${available}`);
-  // This test passes regardless - PaddleOCR-VL is optional
-  expect(true).toBeTrue();
+  expect(available).toBeTrue();
 });

 // Dynamic test for each PDF/JSON pair
--- a/test/test.bankstatements.minicpm.ts
+++ b/test/test.bankstatements.minicpm.ts
@@ -0,0 +1,334 @@
+/**
+ * Bank statement extraction test using MiniCPM-V only (visual extraction)
+ *
+ * This tests MiniCPM-V's ability to extract bank transactions directly from images
+ * without any OCR augmentation.
+ */
+import { tap, expect } from '@git.zone/tstest/tapbundle';
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+import * as os from 'os';
+import { ensureMiniCpm } from './helpers/docker.js';
+
+// Service URL
+const OLLAMA_URL = 'http://localhost:11434';
+
+// Model
+const MINICPM_MODEL = 'minicpm-v:latest';
+
+// Prompt for MiniCPM-V visual extraction
+const MINICPM_EXTRACT_PROMPT = `/nothink
+You are a bank statement parser. Extract EVERY transaction from the table.
+
+Read the Amount column carefully:
+- "- 21,47 €" means DEBIT, output as: -21.47
+- "+ 1.000,00 €" means CREDIT, output as: 1000.00
+- European format: comma = decimal point
+
+For each row output: {"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47}
+
+Do not skip any rows. Return ONLY the JSON array, no explanation.`;
+
+interface ITransaction {
+  date: string;
+  counterparty: string;
+  amount: number;
+}
+
+/**
+ * Convert PDF to PNG images using ImageMagick
+ */
+function convertPdfToImages(pdfPath: string): string[] {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
+  const outputPattern = path.join(tempDir, 'page-%d.png');
+
+  try {
+    execSync(
+      `convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
+      { stdio: 'pipe' }
+    );
+
+    const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort();
+    const images: string[] = [];
+
+    for (const file of files) {
+      const imagePath = path.join(tempDir, file);
+      const imageData = fs.readFileSync(imagePath);
+      images.push(imageData.toString('base64'));
+    }
+
+    return images;
+  } finally {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  }
+}
+
+/**
+ * Extract using MiniCPM-V via Ollama
+ */
+async function extractWithMiniCPM(images: string[], passLabel: string): Promise<ITransaction[]> {
+  const payload = {
+    model: MINICPM_MODEL,
+    prompt: MINICPM_EXTRACT_PROMPT,
+    images,
+    stream: true,
+    options: {
+      num_predict: 16384,
+      temperature: 0.1,
+    },
+  };
+
+  const response = await fetch(`${OLLAMA_URL}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(payload),
+  });
+
+  if (!response.ok) {
+    throw new Error(`Ollama API error: ${response.status}`);
+  }
+
+  const reader = response.body?.getReader();
+  if (!reader) {
+    throw new Error('No response body');
+  }
+
+  const decoder = new TextDecoder();
+  let fullText = '';
+  let lineBuffer = '';
+
+  console.log(`[${passLabel}] Extracting with MiniCPM-V...`);
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    const chunk = decoder.decode(value, { stream: true });
+    const lines = chunk.split('\n').filter((l) => l.trim());
+
+    for (const line of lines) {
+      try {
+        const json = JSON.parse(line);
+        if (json.response) {
+          fullText += json.response;
+          lineBuffer += json.response;
+
+          if (lineBuffer.includes('\n')) {
+            const parts = lineBuffer.split('\n');
+            for (let i = 0; i < parts.length - 1; i++) {
+              console.log(parts[i]);
+            }
+            lineBuffer = parts[parts.length - 1];
+          }
+        }
+      } catch {
+        // Skip invalid JSON lines
+      }
+    }
+  }
+
+  if (lineBuffer) {
+    console.log(lineBuffer);
+  }
+  console.log('');
+
+  const startIdx = fullText.indexOf('[');
+  const endIdx = fullText.lastIndexOf(']') + 1;
+
+  if (startIdx < 0 || endIdx <= startIdx) {
+    throw new Error('No JSON array found in response');
+  }
+
+  return JSON.parse(fullText.substring(startIdx, endIdx));
+}
+
+/**
+ * Create a hash of transactions for comparison
+ */
+function hashTransactions(transactions: ITransaction[]): string {
+  return transactions
+    .map((t) => `${t.date}|${t.amount.toFixed(2)}`)
+    .sort()
+    .join(';');
+}
+
+/**
+ * Extract with consensus voting using MiniCPM-V only
+ */
+async function extractWithConsensus(
+  images: string[],
+  maxPasses: number = 5
+): Promise<ITransaction[]> {
+  const results: Array<{ transactions: ITransaction[]; hash: string }> = [];
+  const hashCounts: Map<string, number> = new Map();
+
+  const addResult = (transactions: ITransaction[], passLabel: string): number => {
+    const hash = hashTransactions(transactions);
+    results.push({ transactions, hash });
+    hashCounts.set(hash, (hashCounts.get(hash) || 0) + 1);
+    console.log(
+      `[${passLabel}] Got ${transactions.length} transactions (hash: ${hash.substring(0, 20)}...)`
+    );
+    return hashCounts.get(hash)!;
+  };
+
+  console.log('[Setup] Using MiniCPM-V only');
+
+  for (let pass = 1; pass <= maxPasses; pass++) {
+    try {
+      const transactions = await extractWithMiniCPM(images, `Pass ${pass} MiniCPM-V`);
+      const count = addResult(transactions, `Pass ${pass} MiniCPM-V`);
+
+      if (count >= 2) {
+        console.log(`[Consensus] Reached after ${pass} passes`);
+        return transactions;
+      }
+
+      console.log(`[Pass ${pass}] No consensus yet, trying again...`);
+    } catch (err) {
+      console.log(`[Pass ${pass}] Error: ${err}`);
+    }
+  }
+
+  // No consensus reached - return the most common result
+  let bestHash = '';
+  let bestCount = 0;
+  for (const [hash, count] of hashCounts) {
+    if (count > bestCount) {
+      bestCount = count;
+      bestHash = hash;
+    }
+  }
+
+  if (!bestHash) {
+    throw new Error('No valid results obtained');
+  }
+
+  const best = results.find((r) => r.hash === bestHash)!;
+  console.log(`[No consensus] Using most common result (${bestCount}/${maxPasses} passes)`);
+  return best.transactions;
+}
+
+/**
+ * Compare extracted transactions against expected
+ */
+function compareTransactions(
+  extracted: ITransaction[],
+  expected: ITransaction[]
+): { matches: number; total: number; errors: string[] } {
+  const errors: string[] = [];
+  let matches = 0;
+
+  for (let i = 0; i < expected.length; i++) {
+    const exp = expected[i];
+    const ext = extracted[i];
+
+    if (!ext) {
+      errors.push(`Missing transaction ${i}: ${exp.date} ${exp.counterparty}`);
+      continue;
+    }
+
+    const dateMatch = ext.date === exp.date;
+    const amountMatch = Math.abs(ext.amount - exp.amount) < 0.01;
+
+    if (dateMatch && amountMatch) {
+      matches++;
+    } else {
+      errors.push(
+        `Mismatch at ${i}: expected ${exp.date}/${exp.amount}, got ${ext.date}/${ext.amount}`
+      );
+    }
+  }
+
+  if (extracted.length > expected.length) {
+    errors.push(`Extra transactions: ${extracted.length - expected.length}`);
+  }
+
+  return { matches, total: expected.length, errors };
+}
+
+/**
+ * Find all test cases (PDF + JSON pairs) in .nogit/
+ */
+function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
+  const testDir = path.join(process.cwd(), '.nogit');
+  if (!fs.existsSync(testDir)) {
+    return [];
+  }
+
+  const files = fs.readdirSync(testDir);
+  const pdfFiles = files.filter((f: string) => f.endsWith('.pdf'));
+  const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
+
+  for (const pdf of pdfFiles) {
+    const baseName = pdf.replace('.pdf', '');
+    const jsonFile = `${baseName}.json`;
+    if (files.includes(jsonFile)) {
+      testCases.push({
+        name: baseName,
+        pdfPath: path.join(testDir, pdf),
+        jsonPath: path.join(testDir, jsonFile),
+      });
+    }
+  }
+
+  return testCases;
+}
+
+// Tests
+
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure MiniCPM is running
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
+});
+
+tap.test('should have MiniCPM-V 4.5 model loaded', async () => {
+  const response = await fetch(`${OLLAMA_URL}/api/tags`);
+  const data = await response.json();
+  const modelNames = data.models.map((m: { name: string }) => m.name);
+  expect(modelNames.some((name: string) => name.includes('minicpm-v4.5'))).toBeTrue();
+});
+
+// Dynamic test for each PDF/JSON pair
+const testCases = findTestCases();
+console.log(`\nFound ${testCases.length} bank statement test cases (MiniCPM-V only)\n`);
+
+for (const testCase of testCases) {
+  tap.test(`should extract transactions from ${testCase.name}`, async () => {
+    // Load expected transactions
+    const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
+    console.log(`\n=== ${testCase.name} ===`);
+    console.log(`Expected: ${expected.length} transactions`);
+
+    // Convert PDF to images
+    console.log('Converting PDF to images...');
+    const images = convertPdfToImages(testCase.pdfPath);
+    console.log(`Converted: ${images.length} pages\n`);
+
+    // Extract with consensus (MiniCPM-V only)
+    const extracted = await extractWithConsensus(images);
+    console.log(`\nFinal: ${extracted.length} transactions`);
+
+    // Compare results
+    const result = compareTransactions(extracted, expected);
+    console.log(`Accuracy: ${result.matches}/${result.total}`);
+
+    if (result.errors.length > 0) {
+      console.log('Errors:');
+      result.errors.forEach((e) => console.log(`  - ${e}`));
+    }
+
+    // Assert high accuracy
+    const accuracy = result.matches / result.total;
+    expect(accuracy).toBeGreaterThan(0.95);
+    expect(extracted.length).toEqual(expected.length);
+  });
+}
+
+export default tap.start();
--- a/test/test.bankstatements.paddleocr-vl.ts
+++ b/test/test.bankstatements.paddleocr-vl.ts
@@ -0,0 +1,346 @@
+/**
+ * Bank statement extraction test using PaddleOCR-VL Full Pipeline
+ *
+ * This tests the complete PaddleOCR-VL pipeline for bank statements:
+ *   1. PP-DocLayoutV2 for layout detection
+ *   2. PaddleOCR-VL for recognition (tables with proper structure)
+ *   3. Structured Markdown output with tables
+ *   4. MiniCPM extracts transactions from structured tables
+ *
+ * The structured Markdown has properly formatted tables,
+ * making it much easier for MiniCPM to extract transaction data.
+ */
+import { tap, expect } from '@git.zone/tstest/tapbundle';
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+import * as os from 'os';
+import { ensurePaddleOcrVlFull, ensureMiniCpm } from './helpers/docker.js';
+
+const PADDLEOCR_VL_URL = 'http://localhost:8000';
+const OLLAMA_URL = 'http://localhost:11434';
+const MINICPM_MODEL = 'minicpm-v:latest';
+
+interface ITransaction {
+  date: string;
+  counterparty: string;
+  amount: number;
+}
+
+/**
+ * Convert PDF to PNG images using ImageMagick
+ */
+function convertPdfToImages(pdfPath: string): string[] {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
+  const outputPattern = path.join(tempDir, 'page-%d.png');
+
+  try {
+    execSync(
+      `convert -density 300 -quality 100 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
+      { stdio: 'pipe' }
+    );
+
+    const files = fs.readdirSync(tempDir).filter((f: string) => f.endsWith('.png')).sort();
+    const images: string[] = [];
+
+    for (const file of files) {
+      const imagePath = path.join(tempDir, file);
+      const imageData = fs.readFileSync(imagePath);
+      images.push(imageData.toString('base64'));
+    }
+
+    return images;
+  } finally {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  }
+}
+
+/**
+ * Parse document using PaddleOCR-VL Full Pipeline (returns structured Markdown)
+ */
+async function parseDocument(imageBase64: string): Promise<string> {
+  const response = await fetch(`${PADDLEOCR_VL_URL}/parse`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      image: imageBase64,
+      output_format: 'markdown',
+    }),
+  });
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new Error(`PaddleOCR-VL API error: ${response.status} - ${text}`);
+  }
+
+  const data = await response.json();
+
+  if (!data.success) {
+    throw new Error(`PaddleOCR-VL error: ${data.error}`);
+  }
+
+  return data.result?.markdown || '';
+}
+
+/**
+ * Extract transactions from structured Markdown using MiniCPM
+ */
+async function extractTransactionsFromMarkdown(markdown: string): Promise<ITransaction[]> {
+  console.log(`    [Extract] Processing ${markdown.length} chars of Markdown`);
+
+  const prompt = `/nothink
+Convert this bank statement to a JSON array of transactions.
+
+Read the Amount values carefully:
+- "- 21,47 €" means DEBIT, output as: -21.47
+- "+ 1.000,00 €" means CREDIT, output as: 1000.00
+- European format: comma = decimal point, dot = thousands
+
+For each transaction output: {"date":"YYYY-MM-DD","counterparty":"NAME","amount":-21.47}
+
+Return ONLY the JSON array, no explanation.
+
+Document:
+${markdown}`;
+
+  const payload = {
+    model: MINICPM_MODEL,
+    prompt,
+    stream: true,
+    options: {
+      num_predict: 16384,
+      temperature: 0.1,
+    },
+  };
+
+  const response = await fetch(`${OLLAMA_URL}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(payload),
+  });
+
+  if (!response.ok) {
+    throw new Error(`Ollama API error: ${response.status}`);
+  }
+
+  const reader = response.body?.getReader();
+  if (!reader) {
+    throw new Error('No response body');
+  }
+
+  const decoder = new TextDecoder();
+  let fullText = '';
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    const chunk = decoder.decode(value, { stream: true });
+    const lines = chunk.split('\n').filter((l) => l.trim());
+
+    for (const line of lines) {
+      try {
+        const json = JSON.parse(line);
+        if (json.response) {
+          fullText += json.response;
+        }
+      } catch {
+        // Skip invalid JSON lines
+      }
+    }
+  }
+
+  // Extract JSON array from response
+  const startIdx = fullText.indexOf('[');
+  const endIdx = fullText.lastIndexOf(']') + 1;
+
+  if (startIdx < 0 || endIdx <= startIdx) {
+    throw new Error(`No JSON array found in response: ${fullText.substring(0, 200)}`);
+  }
+
+  const jsonStr = fullText.substring(startIdx, endIdx);
+  return JSON.parse(jsonStr);
+}
+
+/**
+ * Extract transactions from all pages of a bank statement
+ */
+async function extractAllTransactions(images: string[]): Promise<ITransaction[]> {
+  const allTransactions: ITransaction[] = [];
+
+  for (let i = 0; i < images.length; i++) {
+    console.log(`  Processing page ${i + 1}/${images.length}...`);
+
+    // Parse with full pipeline
+    const markdown = await parseDocument(images[i]);
+    console.log(`    [Parse] Got ${markdown.split('\n').length} lines of Markdown`);
+
+    // Extract transactions
+    try {
+      const transactions = await extractTransactionsFromMarkdown(markdown);
+      console.log(`    [Extracted] ${transactions.length} transactions`);
+      allTransactions.push(...transactions);
+    } catch (err) {
+      console.log(`    [Error] ${err}`);
+    }
+  }
+
+  return allTransactions;
+}
+
+/**
+ * Compare transactions - find matching transaction in expected list
+ */
+function findMatchingTransaction(
+  tx: ITransaction,
+  expectedList: ITransaction[]
+): ITransaction | undefined {
+  return expectedList.find((exp) => {
+    const dateMatch = tx.date === exp.date;
+    const amountMatch = Math.abs(tx.amount - exp.amount) < 0.02;
+    const counterpartyMatch =
+      tx.counterparty?.toLowerCase().includes(exp.counterparty?.toLowerCase().slice(0, 10)) ||
+      exp.counterparty?.toLowerCase().includes(tx.counterparty?.toLowerCase().slice(0, 10));
+    return dateMatch && amountMatch && counterpartyMatch;
+  });
+}
+
+/**
+ * Calculate extraction accuracy
+ */
+function calculateAccuracy(
+  extracted: ITransaction[],
+  expected: ITransaction[]
+): { matched: number; total: number; accuracy: number } {
+  let matched = 0;
+  const usedExpected = new Set<number>();
+
+  for (const tx of extracted) {
+    for (let i = 0; i < expected.length; i++) {
+      if (usedExpected.has(i)) continue;
+
+      const exp = expected[i];
+      const dateMatch = tx.date === exp.date;
+      const amountMatch = Math.abs(tx.amount - exp.amount) < 0.02;
+
+      if (dateMatch && amountMatch) {
+        matched++;
+        usedExpected.add(i);
+        break;
+      }
+    }
+  }
+
+  return {
+    matched,
+    total: expected.length,
+    accuracy: expected.length > 0 ? (matched / expected.length) * 100 : 0,
+  };
+}
+
+/**
+ * Find all test cases (PDF + JSON pairs) in .nogit/bankstatements/
+ */
+function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
+  const testDir = path.join(process.cwd(), '.nogit/bankstatements');
+  if (!fs.existsSync(testDir)) {
+    return [];
+  }
+
+  const files = fs.readdirSync(testDir);
+  const pdfFiles = files.filter((f) => f.endsWith('.pdf'));
+  const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
+
+  for (const pdf of pdfFiles) {
+    const baseName = pdf.replace('.pdf', '');
+    const jsonFile = `${baseName}.json`;
+    if (files.includes(jsonFile)) {
+      testCases.push({
+        name: baseName,
+        pdfPath: path.join(testDir, pdf),
+        jsonPath: path.join(testDir, jsonFile),
+      });
+    }
+  }
+
+  testCases.sort((a, b) => a.name.localeCompare(b.name));
+  return testCases;
+}
+
+// Tests
+
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure PaddleOCR-VL Full Pipeline is running
+  const paddleOk = await ensurePaddleOcrVlFull();
+  expect(paddleOk).toBeTrue();
+
+  // Ensure MiniCPM is running (for field extraction from Markdown)
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
+});
+
+// Dynamic test for each PDF/JSON pair
+const testCases = findTestCases();
+console.log(`\nFound ${testCases.length} bank statement test cases (PaddleOCR-VL Full Pipeline)\n`);
+
+const results: Array<{ name: string; accuracy: number; matched: number; total: number }> = [];
+
+for (const testCase of testCases) {
+  tap.test(`should extract bank statement: ${testCase.name}`, async () => {
+    // Load expected data
+    const expected: ITransaction[] = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
+    console.log(`\n=== ${testCase.name} ===`);
+    console.log(`Expected: ${expected.length} transactions`);
+
+    const startTime = Date.now();
+
+    // Convert PDF to images
+    const images = convertPdfToImages(testCase.pdfPath);
+    console.log(`  Pages: ${images.length}`);
+
+    // Extract all transactions
+    const extracted = await extractAllTransactions(images);
+
+    const endTime = Date.now();
+    const elapsedMs = endTime - startTime;
+
+    // Calculate accuracy
+    const accuracy = calculateAccuracy(extracted, expected);
+    results.push({
+      name: testCase.name,
+      accuracy: accuracy.accuracy,
+      matched: accuracy.matched,
+      total: accuracy.total,
+    });
+
+    console.log(`  Extracted: ${extracted.length} transactions`);
+    console.log(`  Matched: ${accuracy.matched}/${accuracy.total} (${accuracy.accuracy.toFixed(1)}%)`);
+    console.log(`  Time: ${(elapsedMs / 1000).toFixed(1)}s`);
+
+    // We expect at least 50% accuracy
+    expect(accuracy.accuracy).toBeGreaterThan(50);
+  });
+}
+
+tap.test('summary', async () => {
+  const totalStatements = results.length;
+  const avgAccuracy =
+    results.length > 0 ? results.reduce((a, b) => a + b.accuracy, 0) / results.length : 0;
+  const totalMatched = results.reduce((a, b) => a + b.matched, 0);
+  const totalExpected = results.reduce((a, b) => a + b.total, 0);
+
+  console.log(`\n======================================================`);
+  console.log(`  Bank Statement Extraction Summary (PaddleOCR-VL Full)`);
+  console.log(`======================================================`);
+  console.log(`  Method:      PaddleOCR-VL Full Pipeline -> MiniCPM`);
+  console.log(`  Statements:  ${totalStatements}`);
+  console.log(`  Transactions: ${totalMatched}/${totalExpected} matched`);
+  console.log(`  Avg accuracy: ${avgAccuracy.toFixed(1)}%`);
+  console.log(`======================================================\n`);
+});
+
+export default tap.start();
--- a/test/test.invoices.combined.ts
+++ b/test/test.invoices.combined.ts
@@ -1,11 +1,19 @@
+/**
+ * Invoice extraction test using MiniCPM-V (visual) + PaddleOCR-VL (OCR augmentation)
+ *
+ * This is the combined approach that uses both models for best accuracy:
+ *   - MiniCPM-V for visual understanding
+ *   - PaddleOCR-VL for OCR text to augment prompts
+ */
 import { tap, expect } from '@git.zone/tstest/tapbundle';
 import * as fs from 'fs';
 import * as path from 'path';
 import { execSync } from 'child_process';
 import * as os from 'os';
+import { ensurePaddleOcrVl, ensureMiniCpm } from './helpers/docker.js';

 const OLLAMA_URL = 'http://localhost:11434';
-const MODEL = 'openbmb/minicpm-v4.5:q8_0';
+const MODEL = 'minicpm-v:latest';
 const PADDLEOCR_VL_URL = 'http://localhost:8000';

 interface IInvoice {
@@ -358,11 +366,18 @@ function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: strin

 // Tests

-tap.test('should connect to Ollama API', async () => {
-  const response = await fetch(`${OLLAMA_URL}/api/tags`);
-  expect(response.ok).toBeTrue();
-  const data = await response.json();
-  expect(data.models).toBeArray();
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure PaddleOCR-VL is running (auto-detects GPU/CPU)
+  const paddleOk = await ensurePaddleOcrVl();
+  expect(paddleOk).toBeTrue();
+
+  // Ensure MiniCPM is running
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
 });

 tap.test('should have MiniCPM-V 4.5 model loaded', async () => {
--- a/test/test.invoices.minicpm.ts
+++ b/test/test.invoices.minicpm.ts
@@ -0,0 +1,345 @@
+/**
+ * Invoice extraction test using MiniCPM-V only (visual extraction)
+ *
+ * This tests MiniCPM-V's ability to extract invoice data directly from images
+ * without any OCR augmentation.
+ */
+import { tap, expect } from '@git.zone/tstest/tapbundle';
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+import * as os from 'os';
+import { ensureMiniCpm } from './helpers/docker.js';
+
+const OLLAMA_URL = 'http://localhost:11434';
+const MODEL = 'minicpm-v:latest';
+
+interface IInvoice {
+  invoice_number: string;
+  invoice_date: string;
+  vendor_name: string;
+  currency: string;
+  net_amount: number;
+  vat_amount: number;
+  total_amount: number;
+}
+
+/**
+ * Build extraction prompt (MiniCPM-V only, no OCR augmentation)
+ */
+function buildPrompt(): string {
+  return `/nothink
+You are an invoice parser. Extract the following fields from this invoice:
+
+1. invoice_number: The invoice/receipt number
+2. invoice_date: Date in YYYY-MM-DD format
+3. vendor_name: Company that issued the invoice
+4. currency: EUR, USD, etc.
+5. net_amount: Amount before tax (if shown)
+6. vat_amount: Tax/VAT amount (if shown, 0 if reverse charge or no tax)
+7. total_amount: Final amount due
+
+Return ONLY valid JSON in this exact format:
+{"invoice_number":"XXX","invoice_date":"YYYY-MM-DD","vendor_name":"Company Name","currency":"EUR","net_amount":100.00,"vat_amount":19.00,"total_amount":119.00}
+
+If a field is not visible, use null for strings or 0 for numbers.
+No explanation, just the JSON object.`;
+}
+
+/**
+ * Convert PDF to PNG images using ImageMagick
+ */
+function convertPdfToImages(pdfPath: string): string[] {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
+  const outputPattern = path.join(tempDir, 'page-%d.png');
+
+  try {
+    execSync(
+      `convert -density 200 -quality 90 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
+      { stdio: 'pipe' }
+    );
+
+    const files = fs.readdirSync(tempDir).filter((f) => f.endsWith('.png')).sort();
+    const images: string[] = [];
+
+    for (const file of files) {
+      const imagePath = path.join(tempDir, file);
+      const imageData = fs.readFileSync(imagePath);
+      images.push(imageData.toString('base64'));
+    }
+
+    return images;
+  } finally {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  }
+}
+
+/**
+ * Single extraction pass with MiniCPM-V
+ */
+async function extractOnce(images: string[], passNum: number): Promise<IInvoice> {
+  const payload = {
+    model: MODEL,
+    prompt: buildPrompt(),
+    images,
+    stream: true,
+    options: {
+      num_predict: 2048,
+      temperature: 0.1,
+    },
+  };
+
+  const response = await fetch(`${OLLAMA_URL}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(payload),
+  });
+
+  if (!response.ok) {
+    throw new Error(`Ollama API error: ${response.status}`);
+  }
+
+  const reader = response.body?.getReader();
+  if (!reader) {
+    throw new Error('No response body');
+  }
+
+  const decoder = new TextDecoder();
+  let fullText = '';
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    const chunk = decoder.decode(value, { stream: true });
+    const lines = chunk.split('\n').filter((l) => l.trim());
+
+    for (const line of lines) {
+      try {
+        const json = JSON.parse(line);
+        if (json.response) {
+          fullText += json.response;
+        }
+      } catch {
+        // Skip invalid JSON lines
+      }
+    }
+  }
+
+  // Extract JSON from response
+  const startIdx = fullText.indexOf('{');
+  const endIdx = fullText.lastIndexOf('}') + 1;
+
+  if (startIdx < 0 || endIdx <= startIdx) {
+    throw new Error(`No JSON object found in response: ${fullText.substring(0, 200)}`);
+  }
+
+  const jsonStr = fullText.substring(startIdx, endIdx);
+  return JSON.parse(jsonStr);
+}
+
+/**
+ * Create a hash of invoice for comparison (using key fields)
+ */
+function hashInvoice(invoice: IInvoice): string {
+  return `${invoice.invoice_number}|${invoice.invoice_date}|${invoice.total_amount.toFixed(2)}`;
+}
+
+/**
+ * Extract with consensus voting using MiniCPM-V only
+ */
+async function extractWithConsensus(images: string[], invoiceName: string, maxPasses: number = 5): Promise<IInvoice> {
+  const results: Array<{ invoice: IInvoice; hash: string }> = [];
+  const hashCounts: Map<string, number> = new Map();
+
+  const addResult = (invoice: IInvoice, passLabel: string): number => {
+    const hash = hashInvoice(invoice);
+    results.push({ invoice, hash });
+    hashCounts.set(hash, (hashCounts.get(hash) || 0) + 1);
+    console.log(`  [${passLabel}] ${invoice.invoice_number} | ${invoice.invoice_date} | ${invoice.total_amount} ${invoice.currency}`);
+    return hashCounts.get(hash)!;
+  };
+
+  for (let pass = 1; pass <= maxPasses; pass++) {
+    try {
+      const invoice = await extractOnce(images, pass);
+      const count = addResult(invoice, `Pass ${pass}`);
+
+      if (count >= 2) {
+        console.log(`  [Consensus] Reached after ${pass} passes`);
+        return invoice;
+      }
+    } catch (err) {
+      console.log(`  [Pass ${pass}] Error: ${err}`);
+    }
+  }
+
+  // No consensus reached - return the most common result
+  let bestHash = '';
+  let bestCount = 0;
+  for (const [hash, count] of hashCounts) {
+    if (count > bestCount) {
+      bestCount = count;
+      bestHash = hash;
+    }
+  }
+
+  if (!bestHash) {
+    throw new Error(`No valid results for ${invoiceName}`);
+  }
+
+  const best = results.find((r) => r.hash === bestHash)!;
+  console.log(`  [No consensus] Using most common result (${bestCount}/${maxPasses} passes)`);
+  return best.invoice;
+}
+
+/**
+ * Compare extracted invoice against expected
+ */
+function compareInvoice(
+  extracted: IInvoice,
+  expected: IInvoice
+): { match: boolean; errors: string[] } {
+  const errors: string[] = [];
+
+  // Compare invoice number (normalize by removing spaces and case)
+  const extNum = extracted.invoice_number?.replace(/\s/g, '').toLowerCase() || '';
+  const expNum = expected.invoice_number?.replace(/\s/g, '').toLowerCase() || '';
+  if (extNum !== expNum) {
+    errors.push(`invoice_number: expected "${expected.invoice_number}", got "${extracted.invoice_number}"`);
+  }
+
+  // Compare date
+  if (extracted.invoice_date !== expected.invoice_date) {
+    errors.push(`invoice_date: expected "${expected.invoice_date}", got "${extracted.invoice_date}"`);
+  }
+
+  // Compare total amount (with tolerance)
+  if (Math.abs(extracted.total_amount - expected.total_amount) > 0.02) {
+    errors.push(`total_amount: expected ${expected.total_amount}, got ${extracted.total_amount}`);
+  }
+
+  // Compare currency
+  if (extracted.currency?.toUpperCase() !== expected.currency?.toUpperCase()) {
+    errors.push(`currency: expected "${expected.currency}", got "${extracted.currency}"`);
+  }
+
+  return { match: errors.length === 0, errors };
+}
+
+/**
+ * Find all test cases (PDF + JSON pairs) in .nogit/invoices/
+ */
+function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
+  const testDir = path.join(process.cwd(), '.nogit/invoices');
+  if (!fs.existsSync(testDir)) {
+    return [];
+  }
+
+  const files = fs.readdirSync(testDir);
+  const pdfFiles = files.filter((f) => f.endsWith('.pdf'));
+  const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
+
+  for (const pdf of pdfFiles) {
+    const baseName = pdf.replace('.pdf', '');
+    const jsonFile = `${baseName}.json`;
+    if (files.includes(jsonFile)) {
+      testCases.push({
+        name: baseName,
+        pdfPath: path.join(testDir, pdf),
+        jsonPath: path.join(testDir, jsonFile),
+      });
+    }
+  }
+
+  // Sort alphabetically
+  testCases.sort((a, b) => a.name.localeCompare(b.name));
+
+  return testCases;
+}
+
+// Tests
+
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure MiniCPM is running
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
+});
+
+tap.test('should have MiniCPM-V 4.5 model loaded', async () => {
+  const response = await fetch(`${OLLAMA_URL}/api/tags`);
+  const data = await response.json();
+  const modelNames = data.models.map((m: { name: string }) => m.name);
+  expect(modelNames.some((name: string) => name.includes('minicpm-v4.5'))).toBeTrue();
+});
+
+// Dynamic test for each PDF/JSON pair
+const testCases = findTestCases();
+console.log(`\nFound ${testCases.length} invoice test cases (MiniCPM-V only)\n`);
+
+let passedCount = 0;
+let failedCount = 0;
+const processingTimes: number[] = [];
+
+for (const testCase of testCases) {
+  tap.test(`should extract invoice: ${testCase.name}`, async () => {
+    // Load expected data
+    const expected: IInvoice = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
+    console.log(`\n=== ${testCase.name} ===`);
+    console.log(`Expected: ${expected.invoice_number} | ${expected.invoice_date} | ${expected.total_amount} ${expected.currency}`);
+
+    const startTime = Date.now();
+
+    // Convert PDF to images
+    const images = convertPdfToImages(testCase.pdfPath);
+    console.log(`  Pages: ${images.length}`);
+
+    // Extract with consensus voting (MiniCPM-V only)
+    const extracted = await extractWithConsensus(images, testCase.name);
+
+    const endTime = Date.now();
+    const elapsedMs = endTime - startTime;
+    processingTimes.push(elapsedMs);
+
+    // Compare results
+    const result = compareInvoice(extracted, expected);
+
+    if (result.match) {
+      passedCount++;
+      console.log(`  Result: MATCH (${(elapsedMs / 1000).toFixed(1)}s)`);
+    } else {
+      failedCount++;
+      console.log(`  Result: MISMATCH (${(elapsedMs / 1000).toFixed(1)}s)`);
+      result.errors.forEach((e) => console.log(`    - ${e}`));
+    }
+
+    // Assert match
+    expect(result.match).toBeTrue();
+  });
+}
+
+tap.test('summary', async () => {
+  const totalInvoices = testCases.length;
+  const accuracy = totalInvoices > 0 ? (passedCount / totalInvoices) * 100 : 0;
+  const totalTimeMs = processingTimes.reduce((a, b) => a + b, 0);
+  const avgTimeMs = processingTimes.length > 0 ? totalTimeMs / processingTimes.length : 0;
+  const avgTimeSec = avgTimeMs / 1000;
+  const totalTimeSec = totalTimeMs / 1000;
+
+  console.log(`\n========================================`);
+  console.log(`   Invoice Extraction Summary (MiniCPM)`);
+  console.log(`========================================`);
+  console.log(`  Passed:    ${passedCount}/${totalInvoices}`);
+  console.log(`  Failed:    ${failedCount}/${totalInvoices}`);
+  console.log(`  Accuracy:  ${accuracy.toFixed(1)}%`);
+  console.log(`----------------------------------------`);
+  console.log(`  Total time:   ${totalTimeSec.toFixed(1)}s`);
+  console.log(`  Avg per inv:  ${avgTimeSec.toFixed(1)}s`);
+  console.log(`========================================\n`);
+});
+
+export default tap.start();
--- a/test/test.invoices.paddleocr-vl.ts
+++ b/test/test.invoices.paddleocr-vl.ts
@@ -0,0 +1,393 @@
+/**
+ * Invoice extraction test using PaddleOCR-VL Full Pipeline
+ *
+ * This tests the complete PaddleOCR-VL pipeline:
+ *   1. PP-DocLayoutV2 for layout detection
+ *   2. PaddleOCR-VL for recognition
+ *   3. Structured Markdown output
+ *   4. MiniCPM extracts invoice fields from structured Markdown
+ *
+ * The structured Markdown has proper tables and formatting,
+ * making it much easier for MiniCPM to extract invoice data.
+ */
+import { tap, expect } from '@git.zone/tstest/tapbundle';
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+import * as os from 'os';
+import { ensurePaddleOcrVlFull, ensureMiniCpm } from './helpers/docker.js';
+
+const PADDLEOCR_VL_URL = 'http://localhost:8000';
+const OLLAMA_URL = 'http://localhost:11434';
+const MINICPM_MODEL = 'minicpm-v:latest';
+
+interface IInvoice {
+  invoice_number: string;
+  invoice_date: string;
+  vendor_name: string;
+  currency: string;
+  net_amount: number;
+  vat_amount: number;
+  total_amount: number;
+}
+
+/**
+ * Convert PDF to PNG images using ImageMagick
+ */
+function convertPdfToImages(pdfPath: string): string[] {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pdf-convert-'));
+  const outputPattern = path.join(tempDir, 'page-%d.png');
+
+  try {
+    execSync(
+      `convert -density 200 -quality 90 "${pdfPath}" -background white -alpha remove "${outputPattern}"`,
+      { stdio: 'pipe' }
+    );
+
+    const files = fs.readdirSync(tempDir).filter((f) => f.endsWith('.png')).sort();
+    const images: string[] = [];
+
+    for (const file of files) {
+      const imagePath = path.join(tempDir, file);
+      const imageData = fs.readFileSync(imagePath);
+      images.push(imageData.toString('base64'));
+    }
+
+    return images;
+  } finally {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  }
+}
+
+/**
+ * Parse document using PaddleOCR-VL Full Pipeline (returns structured Markdown)
+ */
+async function parseDocument(imageBase64: string): Promise<string> {
+  const response = await fetch(`${PADDLEOCR_VL_URL}/parse`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      image: imageBase64,
+      output_format: 'markdown',
+    }),
+  });
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new Error(`PaddleOCR-VL API error: ${response.status} - ${text}`);
+  }
+
+  const data = await response.json();
+
+  if (!data.success) {
+    throw new Error(`PaddleOCR-VL error: ${data.error}`);
+  }
+
+  return data.result?.markdown || '';
+}
+
+/**
+ * Extract invoice fields from structured Markdown using MiniCPM with image context
+ */
+async function extractInvoiceFromMarkdown(markdown: string, images: string[]): Promise<IInvoice> {
+  // Truncate if too long
+  const truncated = markdown.length > 8000 ? markdown.slice(0, 8000) : markdown;
+  console.log(`    [Extract] Processing ${truncated.length} chars of Markdown`);
+
+  const prompt = `/nothink
+You are an invoice parser. Extract fields from this invoice image.
+
+Required fields:
+- invoice_number: The invoice/receipt number
+- invoice_date: Date in YYYY-MM-DD format
+- vendor_name: Company that issued the invoice
+- currency: EUR, USD, etc.
+- net_amount: Amount before tax
+- vat_amount: Tax/VAT amount (0 if reverse charge)
+- total_amount: Final amount due
+
+Return ONLY a JSON object like:
+{"invoice_number":"123","invoice_date":"2022-01-28","vendor_name":"Adobe","currency":"EUR","net_amount":24.99,"vat_amount":0,"total_amount":24.99}
+
+Use null for missing strings, 0 for missing numbers. No explanation.
+
+OCR text from the invoice (for reference):
+---
+${truncated}
+---`;
+
+  const payload = {
+    model: MINICPM_MODEL,
+    prompt,
+    images,  // Send the actual image to MiniCPM
+    stream: true,
+    options: {
+      num_predict: 2048,
+      temperature: 0.1,
+    },
+  };
+
+  const response = await fetch(`${OLLAMA_URL}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(payload),
+  });
+
+  if (!response.ok) {
+    throw new Error(`Ollama API error: ${response.status}`);
+  }
+
+  const reader = response.body?.getReader();
+  if (!reader) {
+    throw new Error('No response body');
+  }
+
+  const decoder = new TextDecoder();
+  let fullText = '';
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    const chunk = decoder.decode(value, { stream: true });
+    const lines = chunk.split('\n').filter((l) => l.trim());
+
+    for (const line of lines) {
+      try {
+        const json = JSON.parse(line);
+        if (json.response) {
+          fullText += json.response;
+        }
+      } catch {
+        // Skip invalid JSON lines
+      }
+    }
+  }
+
+  // Extract JSON from response
+  const startIdx = fullText.indexOf('{');
+  const endIdx = fullText.lastIndexOf('}') + 1;
+
+  if (startIdx < 0 || endIdx <= startIdx) {
+    throw new Error(`No JSON object found in response: ${fullText.substring(0, 200)}`);
+  }
+
+  const jsonStr = fullText.substring(startIdx, endIdx);
+  return JSON.parse(jsonStr);
+}
+
+/**
+ * Single extraction pass: Parse with PaddleOCR-VL Full, extract with MiniCPM
+ */
+async function extractOnce(images: string[], passNum: number): Promise<IInvoice> {
+  // Parse document with full pipeline
+  const markdown = await parseDocument(images[0]);
+  console.log(`    [Parse] Got ${markdown.split('\n').length} lines of Markdown`);
+
+  // Extract invoice fields from Markdown with image context
+  return extractInvoiceFromMarkdown(markdown, images);
+}
+
+/**
+ * Create a hash of invoice for comparison (using key fields)
+ */
+function hashInvoice(invoice: IInvoice): string {
+  return `${invoice.invoice_number}|${invoice.invoice_date}|${invoice.total_amount.toFixed(2)}`;
+}
+
+/**
+ * Extract with consensus voting
+ */
+async function extractWithConsensus(images: string[], invoiceName: string, maxPasses: number = 5): Promise<IInvoice> {
+  const results: Array<{ invoice: IInvoice; hash: string }> = [];
+  const hashCounts: Map<string, number> = new Map();
+
+  const addResult = (invoice: IInvoice, passLabel: string): number => {
+    const hash = hashInvoice(invoice);
+    results.push({ invoice, hash });
+    hashCounts.set(hash, (hashCounts.get(hash) || 0) + 1);
+    console.log(`  [${passLabel}] ${invoice.invoice_number} | ${invoice.invoice_date} | ${invoice.total_amount} ${invoice.currency}`);
+    return hashCounts.get(hash)!;
+  };
+
+  for (let pass = 1; pass <= maxPasses; pass++) {
+    try {
+      const invoice = await extractOnce(images, pass);
+      const count = addResult(invoice, `Pass ${pass}`);
+
+      if (count >= 2) {
+        console.log(`  [Consensus] Reached after ${pass} passes`);
+        return invoice;
+      }
+    } catch (err) {
+      console.log(`  [Pass ${pass}] Error: ${err}`);
+    }
+  }
+
+  // No consensus reached - return the most common result
+  let bestHash = '';
+  let bestCount = 0;
+  for (const [hash, count] of hashCounts) {
+    if (count > bestCount) {
+      bestCount = count;
+      bestHash = hash;
+    }
+  }
+
+  if (!bestHash) {
+    throw new Error(`No valid results for ${invoiceName}`);
+  }
+
+  const best = results.find((r) => r.hash === bestHash)!;
+  console.log(`  [No consensus] Using most common result (${bestCount}/${maxPasses} passes)`);
+  return best.invoice;
+}
+
+/**
+ * Compare extracted invoice against expected
+ */
+function compareInvoice(
+  extracted: IInvoice,
+  expected: IInvoice
+): { match: boolean; errors: string[] } {
+  const errors: string[] = [];
+
+  // Compare invoice number (normalize by removing spaces and case)
+  const extNum = extracted.invoice_number?.replace(/\s/g, '').toLowerCase() || '';
+  const expNum = expected.invoice_number?.replace(/\s/g, '').toLowerCase() || '';
+  if (extNum !== expNum) {
+    errors.push(`invoice_number: expected "${expected.invoice_number}", got "${extracted.invoice_number}"`);
+  }
+
+  // Compare date
+  if (extracted.invoice_date !== expected.invoice_date) {
+    errors.push(`invoice_date: expected "${expected.invoice_date}", got "${extracted.invoice_date}"`);
+  }
+
+  // Compare total amount (with tolerance)
+  if (Math.abs(extracted.total_amount - expected.total_amount) > 0.02) {
+    errors.push(`total_amount: expected ${expected.total_amount}, got ${extracted.total_amount}`);
+  }
+
+  // Compare currency
+  if (extracted.currency?.toUpperCase() !== expected.currency?.toUpperCase()) {
+    errors.push(`currency: expected "${expected.currency}", got "${extracted.currency}"`);
+  }
+
+  return { match: errors.length === 0, errors };
+}
+
+/**
+ * Find all test cases (PDF + JSON pairs) in .nogit/invoices/
+ */
+function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: string }> {
+  const testDir = path.join(process.cwd(), '.nogit/invoices');
+  if (!fs.existsSync(testDir)) {
+    return [];
+  }
+
+  const files = fs.readdirSync(testDir);
+  const pdfFiles = files.filter((f) => f.endsWith('.pdf'));
+  const testCases: Array<{ name: string; pdfPath: string; jsonPath: string }> = [];
+
+  for (const pdf of pdfFiles) {
+    const baseName = pdf.replace('.pdf', '');
+    const jsonFile = `${baseName}.json`;
+    if (files.includes(jsonFile)) {
+      testCases.push({
+        name: baseName,
+        pdfPath: path.join(testDir, pdf),
+        jsonPath: path.join(testDir, jsonFile),
+      });
+    }
+  }
+
+  // Sort alphabetically
+  testCases.sort((a, b) => a.name.localeCompare(b.name));
+
+  return testCases;
+}
+
+// Tests
+
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure PaddleOCR-VL Full Pipeline is running
+  const paddleOk = await ensurePaddleOcrVlFull();
+  expect(paddleOk).toBeTrue();
+
+  // Ensure MiniCPM is running (for field extraction from Markdown)
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
+});
+
+// Dynamic test for each PDF/JSON pair
+const testCases = findTestCases();
+console.log(`\nFound ${testCases.length} invoice test cases (PaddleOCR-VL Full Pipeline)\n`);
+
+let passedCount = 0;
+let failedCount = 0;
+const processingTimes: number[] = [];
+
+for (const testCase of testCases) {
+  tap.test(`should extract invoice: ${testCase.name}`, async () => {
+    // Load expected data
+    const expected: IInvoice = JSON.parse(fs.readFileSync(testCase.jsonPath, 'utf-8'));
+    console.log(`\n=== ${testCase.name} ===`);
+    console.log(`Expected: ${expected.invoice_number} | ${expected.invoice_date} | ${expected.total_amount} ${expected.currency}`);
+
+    const startTime = Date.now();
+
+    // Convert PDF to images
+    const images = convertPdfToImages(testCase.pdfPath);
+    console.log(`  Pages: ${images.length}`);
+
+    // Extract with consensus voting (PaddleOCR-VL Full -> MiniCPM)
+    const extracted = await extractWithConsensus(images, testCase.name);
+
+    const endTime = Date.now();
+    const elapsedMs = endTime - startTime;
+    processingTimes.push(elapsedMs);
+
+    // Compare results
+    const result = compareInvoice(extracted, expected);
+
+    if (result.match) {
+      passedCount++;
+      console.log(`  Result: MATCH (${(elapsedMs / 1000).toFixed(1)}s)`);
+    } else {
+      failedCount++;
+      console.log(`  Result: MISMATCH (${(elapsedMs / 1000).toFixed(1)}s)`);
+      result.errors.forEach((e) => console.log(`    - ${e}`));
+    }
+
+    // Assert match
+    expect(result.match).toBeTrue();
+  });
+}
+
+tap.test('summary', async () => {
+  const totalInvoices = testCases.length;
+  const accuracy = totalInvoices > 0 ? (passedCount / totalInvoices) * 100 : 0;
+  const totalTimeMs = processingTimes.reduce((a, b) => a + b, 0);
+  const avgTimeMs = processingTimes.length > 0 ? totalTimeMs / processingTimes.length : 0;
+  const avgTimeSec = avgTimeMs / 1000;
+  const totalTimeSec = totalTimeMs / 1000;
+
+  console.log(`\n======================================================`);
+  console.log(`   Invoice Extraction Summary (PaddleOCR-VL Full)`);
+  console.log(`======================================================`);
+  console.log(`  Method:    PaddleOCR-VL Full Pipeline -> MiniCPM`);
+  console.log(`  Passed:    ${passedCount}/${totalInvoices}`);
+  console.log(`  Failed:    ${failedCount}/${totalInvoices}`);
+  console.log(`  Accuracy:  ${accuracy.toFixed(1)}%`);
+  console.log(`------------------------------------------------------`);
+  console.log(`  Total time:   ${totalTimeSec.toFixed(1)}s`);
+  console.log(`  Avg per inv:  ${avgTimeSec.toFixed(1)}s`);
+  console.log(`======================================================\n`);
+});
+
+export default tap.start();