feat(paddleocr-vl): add PaddleOCR-VL full pipeline Docker image and API server, plus integration tests and docker helpers

2026-01-17 20:22:23 +00:00
parent addae20cbd
commit 80e6866442
12 changed files with 2414 additions and 21 deletions
--- a/test/test.invoices.combined.ts
+++ b/test/test.invoices.combined.ts
@@ -1,11 +1,19 @@
+/**
+ * Invoice extraction test using MiniCPM-V (visual) + PaddleOCR-VL (OCR augmentation)
+ *
+ * This is the combined approach that uses both models for best accuracy:
+ *   - MiniCPM-V for visual understanding
+ *   - PaddleOCR-VL for OCR text to augment prompts
+ */
 import { tap, expect } from '@git.zone/tstest/tapbundle';
 import * as fs from 'fs';
 import * as path from 'path';
 import { execSync } from 'child_process';
 import * as os from 'os';
+import { ensurePaddleOcrVl, ensureMiniCpm } from './helpers/docker.js';

 const OLLAMA_URL = 'http://localhost:11434';
-const MODEL = 'openbmb/minicpm-v4.5:q8_0';
+const MODEL = 'minicpm-v:latest';
 const PADDLEOCR_VL_URL = 'http://localhost:8000';

 interface IInvoice {
@@ -358,11 +366,18 @@ function findTestCases(): Array<{ name: string; pdfPath: string; jsonPath: strin

 // Tests

-tap.test('should connect to Ollama API', async () => {
-  const response = await fetch(`${OLLAMA_URL}/api/tags`);
-  expect(response.ok).toBeTrue();
-  const data = await response.json();
-  expect(data.models).toBeArray();
+tap.test('setup: ensure Docker containers are running', async () => {
+  console.log('\n[Setup] Checking Docker containers...\n');
+
+  // Ensure PaddleOCR-VL is running (auto-detects GPU/CPU)
+  const paddleOk = await ensurePaddleOcrVl();
+  expect(paddleOk).toBeTrue();
+
+  // Ensure MiniCPM is running
+  const minicpmOk = await ensureMiniCpm();
+  expect(minicpmOk).toBeTrue();
+
+  console.log('\n[Setup] All containers ready!\n');
 });

 tap.test('should have MiniCPM-V 4.5 model loaded', async () => {