feat(paddleocr-vl): add PaddleOCR-VL GPU Dockerfile, pin vllm, update CPU image deps, and improve entrypoint and tests

2026-01-17 16:57:26 +00:00
parent 15ac1fcf67
commit 0482c35b69
9 changed files with 140 additions and 26 deletions
--- a/test/test.bankstatements.combined.ts
+++ b/test/test.bankstatements.combined.ts
--- a/test/test.invoices.combined.ts
+++ b/test/test.invoices.combined.ts
@@ -6,7 +6,7 @@ import * as os from 'os';

 const OLLAMA_URL = 'http://localhost:11434';
 const MODEL = 'openbmb/minicpm-v4.5:q8_0';
-const PADDLEOCR_URL = 'http://localhost:5000';
+const PADDLEOCR_VL_URL = 'http://localhost:8000';

 interface IInvoice {
  invoice_number: string;
@@ -19,24 +19,33 @@ interface IInvoice {
 }

 /**
- * Extract OCR text from an image using PaddleOCR
+ * Extract OCR text from an image using PaddleOCR-VL (OpenAI-compatible API)
 */
 async function extractOcrText(imageBase64: string): Promise<string> {
  try {
-    const response = await fetch(`${PADDLEOCR_URL}/ocr`, {
+    const response = await fetch(`${PADDLEOCR_VL_URL}/v1/chat/completions`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ image: imageBase64 }),
+      body: JSON.stringify({
+        model: 'paddleocr-vl',
+        messages: [{
+          role: 'user',
+          content: [
+            { type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
+            { type: 'text', text: 'OCR:' }
+          ]
+        }],
+        temperature: 0.0,
+        max_tokens: 4096
+      }),
    });

    if (!response.ok) return '';

    const data = await response.json();
-    if (data.success && data.results) {
-      return data.results.map((r: { text: string }) => r.text).join('\n');
-    }
+    return data.choices?.[0]?.message?.content || '';
  } catch {
-    // PaddleOCR unavailable
+    // PaddleOCR-VL unavailable
  }
  return '';
 }