v1.7.1

fix(docker): standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references
v1.7.0
2026-01-17 23:13:47 +00:00 · 2026-01-17 23:13:47 +00:00 · 2026-01-17 21:50:09 +00:00 · 2026-01-17 21:50:09 +00:00
13 changed files with 183 additions and 173 deletions
--- a/2
+++ b/2
@@ -14,7 +14,7 @@ ENV OLLAMA_ORIGINS="*"
 ENV CUDA_VISIBLE_DEVICES=""
 # Copy and setup entrypoint
-COPY image_support_files/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+COPY image_support_files/minicpm45v_entrypoint.sh /usr/local/bin/docker-entrypoint.sh
 RUN chmod +x /usr/local/bin/docker-entrypoint.sh
 # Expose Ollama API port
--- a/2
+++ b/2
@@ -12,7 +12,7 @@ ENV OLLAMA_HOST="0.0.0.0"
 ENV OLLAMA_ORIGINS="*"
 # Copy and setup entrypoint
-COPY image_support_files/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+COPY image_support_files/minicpm45v_entrypoint.sh /usr/local/bin/docker-entrypoint.sh
 RUN chmod +x /usr/local/bin/docker-entrypoint.sh
 # Expose Ollama API port
--- a/70
+++ b/70
@@ -1,70 +0,0 @@
 # PaddleOCR-VL GPU Variant
 # Vision-Language Model for document parsing using vLLM
 FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
 LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
 LABEL description="PaddleOCR-VL 0.9B - Vision-Language Model for document parsing"
 LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"
 # Environment configuration
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV HF_HOME=/root/.cache/huggingface
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 # Set working directory
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.11 \
    python3.11-venv \
    python3.11-dev \
    python3-pip \
    git \
    curl \
    build-essential \
    && rm -rf /var/lib/apt/lists/* \
    && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
 # Create and activate virtual environment
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 # Install PyTorch with CUDA support
 RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir \
    torch==2.5.1 \
    torchvision \
    --index-url https://download.pytorch.org/whl/cu124
 # Install vLLM 0.11.1 (first stable release with PaddleOCR-VL support)
 RUN pip install --no-cache-dir \
    vllm==0.11.1 \
    --extra-index-url https://download.pytorch.org/whl/cu124
 # Install additional dependencies
 RUN pip install --no-cache-dir \
    transformers \
    accelerate \
    safetensors \
    pillow \
    fastapi \
    uvicorn[standard] \
    python-multipart \
    openai \
    httpx
 # Copy entrypoint script
 COPY image_support_files/paddleocr-vl-entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
 RUN chmod +x /usr/local/bin/paddleocr-vl-entrypoint.sh
 # Expose vLLM API port
 EXPOSE 8000
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1
 ENTRYPOINT ["/usr/local/bin/paddleocr-vl-entrypoint.sh"]
--- a/2
+++ b/2
@@ -44,7 +44,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
 # Copy server files
 COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
-COPY image_support_files/paddleocr-vl-cpu-entrypoint.sh /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
+COPY image_support_files/paddleocr_vl_entrypoint.sh /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
 RUN chmod +x /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
 # Expose API port
--- a/2
+++ b/2
@@ -58,7 +58,7 @@ RUN pip install --no-cache-dir \
 # Copy server files (same as CPU variant - it auto-detects CUDA)
 COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
-COPY image_support_files/paddleocr-vl-cpu-entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
+COPY image_support_files/paddleocr_vl_entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
 RUN chmod +x /usr/local/bin/paddleocr-vl-entrypoint.sh
 # Expose API port
--- a/build-images.sh
+++ b/build-images.sh
@@ -16,7 +16,7 @@ echo -e "${BLUE}Building ht-docker-ai images...${NC}"
 # Build GPU variant
 echo -e "${GREEN}Building MiniCPM-V 4.5 GPU variant...${NC}"
 docker build \
-    -f Dockerfile_minicpm45v \
+    -f Dockerfile_minicpm45v_gpu \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v-gpu \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:latest \
@@ -29,10 +29,10 @@ docker build \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v-cpu \
    .
-# Build PaddleOCR-VL GPU variant (vLLM)
+# Build PaddleOCR-VL GPU variant
-echo -e "${GREEN}Building PaddleOCR-VL GPU variant (vLLM)...${NC}"
+echo -e "${GREEN}Building PaddleOCR-VL GPU variant...${NC}"
 docker build \
-    -f Dockerfile_paddleocr_vl \
+    -f Dockerfile_paddleocr_vl_gpu \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:paddleocr-vl \
    -t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:paddleocr-vl-gpu \
    .
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,23 @@
 # Changelog
 ## 2026-01-17 - 1.7.1 - fix(docker)
 standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references
 - Added Dockerfile_minicpm45v_gpu and image_support_files/minicpm45v_entrypoint.sh; removed the old Dockerfile_minicpm45v and docker-entrypoint.sh
 - Renamed and simplified PaddleOCR entrypoint to image_support_files/paddleocr_vl_entrypoint.sh and updated CPU/GPU Dockerfile references
 - Updated build-images.sh to use *_gpu Dockerfiles and clarified PaddleOCR GPU build log
 - Updated test/helpers/docker.ts to point to Dockerfile_minicpm45v_gpu so tests build the GPU variant
 ## 2026-01-17 - 1.7.0 - feat(tests)
 use Qwen2.5 (Ollama) for invoice extraction tests and add helpers for model management; normalize dates and coerce numeric fields
 - Added ensureOllamaModel and ensureQwen25 test helpers to pull/check Ollama models via localhost:11434
 - Updated invoices test to use qwen2.5:7b instead of MiniCPM and removed image payload from the text-only extraction step
 - Increased Markdown truncate limit from 8000 to 12000 and reduced model num_predict from 2048 to 512
 - Rewrote extraction prompt to require strict JSON output and added post-processing to parse/convert numeric fields
 - Added normalizeDate and improved compareInvoice to normalize dates and handle numeric formatting/tolerance
 - Updated test setup to ensure Qwen2.5 is available and adjusted logging/messages to reflect the Qwen2.5-based workflow
 ## 2026-01-17 - 1.6.0 - feat(paddleocr-vl)
 add PaddleOCR-VL full pipeline Docker image and API server, plus integration tests and docker helpers
--- a/image_support_files/minicpm45v_entrypoint.sh
+++ b/image_support_files/minicpm45v_entrypoint.sh
--- a/image_support_files/paddleocr-vl-entrypoint.sh
+++ b/image_support_files/paddleocr-vl-entrypoint.sh
@@ -1,59 +0,0 @@
 #!/bin/bash
 set -e
 echo "==================================="
 echo "PaddleOCR-VL Server"
 echo "==================================="
 # Configuration
 MODEL_NAME="${MODEL_NAME:-PaddlePaddle/PaddleOCR-VL}"
 HOST="${HOST:-0.0.0.0}"
 PORT="${PORT:-8000}"
 MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
 GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
 MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
 ENFORCE_EAGER="${ENFORCE_EAGER:-false}"
 echo "Model: ${MODEL_NAME}"
 echo "Host: ${HOST}"
 echo "Port: ${PORT}"
 echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
 echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
 echo "Max model length: ${MAX_MODEL_LEN}"
 echo "Enforce eager: ${ENFORCE_EAGER}"
 echo ""
 # Check GPU availability
 if command -v nvidia-smi &> /dev/null; then
    echo "GPU Information:"
    nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
    echo ""
 else
    echo "WARNING: nvidia-smi not found. GPU may not be available."
 fi
 echo "Starting vLLM server..."
 echo "==================================="
 # Build vLLM command
 VLLM_ARGS=(
    serve "${MODEL_NAME}"
    --trust-remote-code
    --host "${HOST}"
    --port "${PORT}"
    --max-num-batched-tokens "${MAX_BATCHED_TOKENS}"
    --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}"
    --max-model-len "${MAX_MODEL_LEN}"
    --no-enable-prefix-caching
    --mm-processor-cache-gb 0
    --served-model-name "paddleocr-vl"
    --limit-mm-per-prompt '{"image": 1}'
 )
 # Add enforce-eager if enabled (disables CUDA graphs, saves memory)
 if [ "${ENFORCE_EAGER}" = "true" ]; then
    VLLM_ARGS+=(--enforce-eager)
 fi
 # Start vLLM server with PaddleOCR-VL
 exec vllm "${VLLM_ARGS[@]}"
--- a/image_support_files/paddleocr-vl-cpu-entrypoint.sh
+++ b/image_support_files/paddleocr-vl-cpu-entrypoint.sh
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@host.today/ht-docker-ai",
-  "version": "1.6.0",
+  "version": "1.7.1",
  "type": "module",
  "private": false,
  "description": "Docker images for AI vision-language models including MiniCPM-V 4.5",
--- a/test/helpers/docker.ts
+++ b/test/helpers/docker.ts
@@ -49,7 +49,7 @@ export const IMAGES = {
  minicpm: {
    name: 'minicpm45v',
-    dockerfile: 'Dockerfile_minicpm45v',
+    dockerfile: 'Dockerfile_minicpm45v_gpu',
    buildContext: '.',
    containerName: 'minicpm-test',
    ports: ['11434:11434'],
@@ -295,3 +295,66 @@ export async function ensurePaddleOcrVlFull(): Promise<boolean> {
  }
  return ensureService(IMAGES.paddleocrVlFull);
 }
 /**
 * Ensure an Ollama model is pulled and available
 * Uses the MiniCPM container (which runs Ollama) to pull the model
 */
 export async function ensureOllamaModel(modelName: string): Promise<boolean> {
  const OLLAMA_URL = 'http://localhost:11434';
  console.log(`\n[Ollama] Ensuring model: ${modelName}`);
  // Check if model exists
  try {
    const response = await fetch(`${OLLAMA_URL}/api/tags`);
    if (response.ok) {
      const data = await response.json();
      const models = data.models || [];
      const exists = models.some((m: { name: string }) =>
        m.name === modelName || m.name.startsWith(modelName.split(':')[0])
      );
      if (exists) {
        console.log(`[Ollama] Model already available: ${modelName}`);
        return true;
      }
    }
  } catch {
    console.log(`[Ollama] Cannot check models, Ollama may not be running`);
    return false;
  }
  // Pull the model
  console.log(`[Ollama] Pulling model: ${modelName} (this may take a while)...`);
  try {
    const response = await fetch(`${OLLAMA_URL}/api/pull`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ name: modelName, stream: false }),
    });
    if (response.ok) {
      console.log(`[Ollama] Model pulled successfully: ${modelName}`);
      return true;
    } else {
      console.log(`[Ollama] Failed to pull model: ${response.status}`);
      return false;
    }
  } catch (err) {
    console.log(`[Ollama] Error pulling model: ${err}`);
    return false;
  }
 }
 /**
 * Ensure Qwen2.5 7B model is available (for text-only JSON extraction)
 */
 export async function ensureQwen25(): Promise<boolean> {
  // First ensure the Ollama service (MiniCPM container) is running
  const ollamaOk = await ensureMiniCpm();
  if (!ollamaOk) return false;
  // Then ensure the Qwen2.5 model is pulled
  return ensureOllamaModel('qwen2.5:7b');
 }
--- a/test/test.invoices.paddleocr-vl.ts
+++ b/test/test.invoices.paddleocr-vl.ts
@@ -15,11 +15,12 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { execSync } from 'child_process';
 import * as os from 'os';
-import { ensurePaddleOcrVlFull, ensureMiniCpm } from './helpers/docker.js';
+import { ensurePaddleOcrVlFull, ensureQwen25 } from './helpers/docker.js';
 const PADDLEOCR_VL_URL = 'http://localhost:8000';
 const OLLAMA_URL = 'http://localhost:11434';
-const MINICPM_MODEL = 'minicpm-v:latest';
+// Use Qwen2.5 for text-only JSON extraction (not MiniCPM which is vision-focused)
 const TEXT_MODEL = 'qwen2.5:7b';
 interface IInvoice {
  invoice_number: string;
@@ -87,42 +88,45 @@ async function parseDocument(imageBase64: string): Promise<string> {
 }
 /**
- * Extract invoice fields from structured Markdown using MiniCPM with image context
+ * Extract invoice fields from structured Markdown using Qwen2.5 (text-only model)
 */
-async function extractInvoiceFromMarkdown(markdown: string, images: string[]): Promise<IInvoice> {
+async function extractInvoiceFromMarkdown(markdown: string): Promise<IInvoice> {
  // Truncate if too long
-  const truncated = markdown.length > 8000 ? markdown.slice(0, 8000) : markdown;
+  const truncated = markdown.length > 12000 ? markdown.slice(0, 12000) : markdown;
  console.log(`    [Extract] Processing ${truncated.length} chars of Markdown`);
-  const prompt = `/nothink
+  const prompt = `You are an invoice data extractor. Extract the following fields from this OCR text and return ONLY a valid JSON object.
 You are an invoice parser. Extract fields from this invoice image.
 Required fields:
- invoice_number: The invoice/receipt number
+- invoice_number: The invoice/receipt/document number
- invoice_date: Date in YYYY-MM-DD format
+- invoice_date: Date in YYYY-MM-DD format (convert from any format)
 - vendor_name: Company that issued the invoice
- currency: EUR, USD, etc.
+- currency: EUR, USD, GBP, etc.
- net_amount: Amount before tax
+- net_amount: Amount before tax (number)
- vat_amount: Tax/VAT amount (0 if reverse charge)
+- vat_amount: Tax/VAT amount (number, use 0 if reverse charge or not shown)
- total_amount: Final amount due
+- total_amount: Final total amount (number)
-Return ONLY a JSON object like:
+Example output format:
-{"invoice_number":"123","invoice_date":"2022-01-28","vendor_name":"Adobe","currency":"EUR","net_amount":24.99,"vat_amount":0,"total_amount":24.99}
+{"invoice_number":"INV-123","invoice_date":"2022-01-28","vendor_name":"Adobe","currency":"EUR","net_amount":24.99,"vat_amount":0,"total_amount":24.99}
-Use null for missing strings, 0 for missing numbers. No explanation.
+Rules:
 - Return ONLY the JSON object, no explanation or markdown
 - Use null for missing string fields
 - Use 0 for missing numeric fields
 - Convert dates to YYYY-MM-DD format (e.g., "28-JAN-2022" becomes "2022-01-28")
 - Extract numbers without currency symbols
-OCR text from the invoice (for reference):
+OCR Text:
 ---
 ${truncated}
---`;
+
 JSON:`;
  const payload = {
-    model: MINICPM_MODEL,
+    model: TEXT_MODEL,
    prompt,
    images,  // Send the actual image to MiniCPM
    stream: true,
    options: {
-      num_predict: 2048,
+      num_predict: 512,
      temperature: 0.1,
    },
  };
@@ -173,26 +177,41 @@ ${truncated}
  }
  const jsonStr = fullText.substring(startIdx, endIdx);
-  return JSON.parse(jsonStr);
+  const parsed = JSON.parse(jsonStr);
  // Ensure numeric fields are actually numbers
  return {
    invoice_number: parsed.invoice_number || null,
    invoice_date: parsed.invoice_date || null,
    vendor_name: parsed.vendor_name || null,
    currency: parsed.currency || 'EUR',
    net_amount: parseFloat(parsed.net_amount) || 0,
    vat_amount: parseFloat(parsed.vat_amount) || 0,
    total_amount: parseFloat(parsed.total_amount) || 0,
  };
 }
 /**
- * Single extraction pass: Parse with PaddleOCR-VL Full, extract with MiniCPM
+ * Single extraction pass: Parse with PaddleOCR-VL Full, extract with Qwen2.5 (text-only)
 */
 async function extractOnce(images: string[], passNum: number): Promise<IInvoice> {
-  // Parse document with full pipeline
+  // Parse document with full pipeline (PaddleOCR-VL)
  const markdown = await parseDocument(images[0]);
  console.log(`    [Parse] Got ${markdown.split('\n').length} lines of Markdown`);
-  // Extract invoice fields from Markdown with image context
+  // Extract invoice fields from Markdown using text-only model (no images)
-  return extractInvoiceFromMarkdown(markdown, images);
+  return extractInvoiceFromMarkdown(markdown);
 }
 /**
 * Create a hash of invoice for comparison (using key fields)
 */
 function hashInvoice(invoice: IInvoice): string {
-  return `${invoice.invoice_number}|${invoice.invoice_date}|${invoice.total_amount.toFixed(2)}`;
+  // Ensure total_amount is a number
  const amount = typeof invoice.total_amount === 'number'
    ? invoice.total_amount.toFixed(2)
    : String(invoice.total_amount || 0);
  return `${invoice.invoice_number}|${invoice.invoice_date}|${amount}`;
 }
 /**
@@ -243,6 +262,43 @@ async function extractWithConsensus(images: string[], invoiceName: string, maxPa
  return best.invoice;
 }
 /**
 * Normalize date to YYYY-MM-DD format
 */
 function normalizeDate(dateStr: string | null): string {
  if (!dateStr) return '';
  // Already in correct format
  if (/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
    return dateStr;
  }
  // Handle DD-MMM-YYYY format (e.g., "28-JUN-2022")
  const monthMap: Record<string, string> = {
    JAN: '01', FEB: '02', MAR: '03', APR: '04', MAY: '05', JUN: '06',
    JUL: '07', AUG: '08', SEP: '09', OCT: '10', NOV: '11', DEC: '12',
  };
  const match = dateStr.match(/^(\d{1,2})-([A-Z]{3})-(\d{4})$/i);
  if (match) {
    const day = match[1].padStart(2, '0');
    const month = monthMap[match[2].toUpperCase()] || '01';
    const year = match[3];
    return `${year}-${month}-${day}`;
  }
  // Handle DD/MM/YYYY or DD.MM.YYYY
  const match2 = dateStr.match(/^(\d{1,2})[\/.](\d{1,2})[\/.](\d{4})$/);
  if (match2) {
    const day = match2[1].padStart(2, '0');
    const month = match2[2].padStart(2, '0');
    const year = match2[3];
    return `${year}-${month}-${day}`;
  }
  return dateStr;
 }
 /**
 * Compare extracted invoice against expected
 */
@@ -259,8 +315,10 @@ function compareInvoice(
    errors.push(`invoice_number: expected "${expected.invoice_number}", got "${extracted.invoice_number}"`);
  }
-  // Compare date
+  // Compare date (normalize format first)
-  if (extracted.invoice_date !== expected.invoice_date) {
+  const extDate = normalizeDate(extracted.invoice_date);
  const expDate = normalizeDate(expected.invoice_date);
  if (extDate !== expDate) {
    errors.push(`invoice_date: expected "${expected.invoice_date}", got "${extracted.invoice_date}"`);
  }
@@ -317,9 +375,9 @@ tap.test('setup: ensure Docker containers are running', async () => {
  const paddleOk = await ensurePaddleOcrVlFull();
  expect(paddleOk).toBeTrue();
-  // Ensure MiniCPM is running (for field extraction from Markdown)
+  // Ensure Qwen2.5 is available (for text-only JSON extraction)
-  const minicpmOk = await ensureMiniCpm();
+  const qwenOk = await ensureQwen25();
-  expect(minicpmOk).toBeTrue();
+  expect(qwenOk).toBeTrue();
  console.log('\n[Setup] All containers ready!\n');
 });
@@ -380,7 +438,7 @@ tap.test('summary', async () => {
  console.log(`\n======================================================`);
  console.log(`   Invoice Extraction Summary (PaddleOCR-VL Full)`);
  console.log(`======================================================`);
-  console.log(`  Method:    PaddleOCR-VL Full Pipeline -> MiniCPM`);
+  console.log(`  Method:    PaddleOCR-VL Full Pipeline -> Qwen2.5 (text-only)`);
  console.log(`  Passed:    ${passedCount}/${totalInvoices}`);
  console.log(`  Failed:    ${failedCount}/${totalInvoices}`);
  console.log(`  Accuracy:  ${accuracy.toFixed(1)}%`);
Author	SHA1	Message	Date
Juergen Kunz	0d8a1ebac2	v1.7.1 Some checks failed Docker (tags) / security (push) Successful in 31s Details Docker (tags) / test (push) Failing after 39s Details Docker (tags) / release (push) Has been skipped Details Docker (tags) / metadata (push) Has been skipped Details	2026-01-17 23:13:47 +00:00
Juergen Kunz	5a311dca2d	fix(docker): standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references	2026-01-17 23:13:47 +00:00
Juergen Kunz	ab288380f1	v1.7.0 Some checks failed Docker (tags) / security (push) Successful in 30s Details Docker (tags) / test (push) Failing after 40s Details Docker (tags) / release (push) Has been skipped Details Docker (tags) / metadata (push) Has been skipped Details	2026-01-17 21:50:09 +00:00
Juergen Kunz	30c73b24c1	feat(tests): use Qwen2.5 (Ollama) for invoice extraction tests and add helpers for model management; normalize dates and coerce numeric fields	2026-01-17 21:50:09 +00:00