feat(paddleocr-vl): add PaddleOCR-VL GPU Dockerfile, pin vllm, update CPU image deps, and improve entrypoint and tests
This commit is contained in:
@@ -39,11 +39,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|||||||
torchvision \
|
torchvision \
|
||||||
--index-url https://download.pytorch.org/whl/cu124
|
--index-url https://download.pytorch.org/whl/cu124
|
||||||
|
|
||||||
# Install vLLM (nightly for PaddleOCR-VL support)
|
# Install vLLM 0.11.1 (first stable release with PaddleOCR-VL support)
|
||||||
RUN pip install --no-cache-dir \
|
RUN pip install --no-cache-dir \
|
||||||
vllm \
|
vllm==0.11.1 \
|
||||||
--pre \
|
|
||||||
--extra-index-url https://wheels.vllm.ai/nightly \
|
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu124
|
--extra-index-url https://download.pytorch.org/whl/cu124
|
||||||
|
|
||||||
# Install additional dependencies
|
# Install additional dependencies
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
# Install Python dependencies
|
# Install Python dependencies
|
||||||
RUN pip install --no-cache-dir --upgrade pip && \
|
RUN pip install --no-cache-dir --upgrade pip && \
|
||||||
pip install --no-cache-dir \
|
pip install --no-cache-dir \
|
||||||
torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu && \
|
torch==2.5.1 torchvision==0.20.1 --index-url https://download.pytorch.org/whl/cpu && \
|
||||||
pip install --no-cache-dir \
|
pip install --no-cache-dir \
|
||||||
transformers \
|
transformers \
|
||||||
accelerate \
|
accelerate \
|
||||||
@@ -37,7 +37,10 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|||||||
fastapi \
|
fastapi \
|
||||||
uvicorn[standard] \
|
uvicorn[standard] \
|
||||||
python-multipart \
|
python-multipart \
|
||||||
httpx
|
httpx \
|
||||||
|
protobuf \
|
||||||
|
sentencepiece \
|
||||||
|
einops
|
||||||
|
|
||||||
# Copy server files
|
# Copy server files
|
||||||
COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
|
COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
|
||||||
|
|||||||
71
Dockerfile_paddleocr_vl_gpu
Normal file
71
Dockerfile_paddleocr_vl_gpu
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# PaddleOCR-VL GPU Variant (Transformers-based, not vLLM)
|
||||||
|
# Vision-Language Model for document parsing using transformers with CUDA
|
||||||
|
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
|
||||||
|
|
||||||
|
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
|
||||||
|
LABEL description="PaddleOCR-VL 0.9B GPU - Vision-Language Model using transformers"
|
||||||
|
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"
|
||||||
|
|
||||||
|
# Environment configuration
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV HF_HOME=/root/.cache/huggingface
|
||||||
|
ENV SERVER_PORT=8000
|
||||||
|
ENV SERVER_HOST=0.0.0.0
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
python3.11 \
|
||||||
|
python3.11-venv \
|
||||||
|
python3.11-dev \
|
||||||
|
python3-pip \
|
||||||
|
libgl1-mesa-glx \
|
||||||
|
libglib2.0-0 \
|
||||||
|
libgomp1 \
|
||||||
|
curl \
|
||||||
|
git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \
|
||||||
|
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
|
||||||
|
|
||||||
|
# Create and activate virtual environment
|
||||||
|
RUN python -m venv /opt/venv
|
||||||
|
ENV PATH="/opt/venv/bin:$PATH"
|
||||||
|
|
||||||
|
# Install PyTorch with CUDA support
|
||||||
|
RUN pip install --no-cache-dir --upgrade pip && \
|
||||||
|
pip install --no-cache-dir \
|
||||||
|
torch==2.5.1 \
|
||||||
|
torchvision \
|
||||||
|
--index-url https://download.pytorch.org/whl/cu124
|
||||||
|
|
||||||
|
# Install Python dependencies (transformers-based, not vLLM)
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
transformers \
|
||||||
|
accelerate \
|
||||||
|
safetensors \
|
||||||
|
pillow \
|
||||||
|
fastapi \
|
||||||
|
uvicorn[standard] \
|
||||||
|
python-multipart \
|
||||||
|
httpx \
|
||||||
|
protobuf \
|
||||||
|
sentencepiece \
|
||||||
|
einops
|
||||||
|
|
||||||
|
# Copy server files (same as CPU variant - it auto-detects CUDA)
|
||||||
|
COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
|
||||||
|
COPY image_support_files/paddleocr-vl-cpu-entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||||
|
RUN chmod +x /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||||
|
|
||||||
|
# Expose API port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8000/health || exit 1
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/paddleocr-vl-entrypoint.sh"]
|
||||||
10
changelog.md
10
changelog.md
@@ -1,5 +1,15 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-01-17 - 1.5.0 - feat(paddleocr-vl)
|
||||||
|
add PaddleOCR-VL GPU Dockerfile, pin vllm, update CPU image deps, and improve entrypoint and tests
|
||||||
|
|
||||||
|
- Add a new GPU Dockerfile for PaddleOCR-VL (transformers-based) with CUDA support, healthcheck, and entrypoint.
|
||||||
|
- Pin vllm to 0.11.1 in Dockerfile_paddleocr_vl to use the first stable release with PaddleOCR-VL support.
|
||||||
|
- Update CPU image: add torchvision==0.20.1 and extra Python deps (protobuf, sentencepiece, einops) required by the transformers-based server.
|
||||||
|
- Rewrite paddleocr-vl-entrypoint.sh to build vllm args array, add MAX_MODEL_LEN and ENFORCE_EAGER env vars, include --limit-mm-per-prompt and optional --enforce-eager, and switch to exec vllm with constructed args.
|
||||||
|
- Update tests to use the OpenAI-compatible PaddleOCR-VL chat completions API (/v1/chat/completions) with image+text message payload and model 'paddleocr-vl'.
|
||||||
|
- Add @types/node to package.json dependencies and tidy devDependencies ordering.
|
||||||
|
|
||||||
## 2026-01-16 - 1.4.0 - feat(invoices)
|
## 2026-01-16 - 1.4.0 - feat(invoices)
|
||||||
add hybrid OCR + vision invoice/document parsing with PaddleOCR, consensus voting, and prompt/test refactors
|
add hybrid OCR + vision invoice/document parsing with PaddleOCR, consensus voting, and prompt/test refactors
|
||||||
|
|
||||||
|
|||||||
@@ -11,12 +11,16 @@ HOST="${HOST:-0.0.0.0}"
|
|||||||
PORT="${PORT:-8000}"
|
PORT="${PORT:-8000}"
|
||||||
MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
|
MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
|
||||||
GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
|
GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
|
||||||
|
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
|
||||||
|
ENFORCE_EAGER="${ENFORCE_EAGER:-false}"
|
||||||
|
|
||||||
echo "Model: ${MODEL_NAME}"
|
echo "Model: ${MODEL_NAME}"
|
||||||
echo "Host: ${HOST}"
|
echo "Host: ${HOST}"
|
||||||
echo "Port: ${PORT}"
|
echo "Port: ${PORT}"
|
||||||
echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
|
echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
|
||||||
echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
|
echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
|
||||||
|
echo "Max model length: ${MAX_MODEL_LEN}"
|
||||||
|
echo "Enforce eager: ${ENFORCE_EAGER}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Check GPU availability
|
# Check GPU availability
|
||||||
@@ -31,13 +35,25 @@ fi
|
|||||||
echo "Starting vLLM server..."
|
echo "Starting vLLM server..."
|
||||||
echo "==================================="
|
echo "==================================="
|
||||||
|
|
||||||
# Start vLLM server with PaddleOCR-VL
|
# Build vLLM command
|
||||||
exec vllm serve "${MODEL_NAME}" \
|
VLLM_ARGS=(
|
||||||
--trust-remote-code \
|
serve "${MODEL_NAME}"
|
||||||
--host "${HOST}" \
|
--trust-remote-code
|
||||||
--port "${PORT}" \
|
--host "${HOST}"
|
||||||
--max-num-batched-tokens "${MAX_BATCHED_TOKENS}" \
|
--port "${PORT}"
|
||||||
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}" \
|
--max-num-batched-tokens "${MAX_BATCHED_TOKENS}"
|
||||||
--no-enable-prefix-caching \
|
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}"
|
||||||
--mm-processor-cache-gb 0 \
|
--max-model-len "${MAX_MODEL_LEN}"
|
||||||
|
--no-enable-prefix-caching
|
||||||
|
--mm-processor-cache-gb 0
|
||||||
--served-model-name "paddleocr-vl"
|
--served-model-name "paddleocr-vl"
|
||||||
|
--limit-mm-per-prompt '{"image": 1}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add enforce-eager if enabled (disables CUDA graphs, saves memory)
|
||||||
|
if [ "${ENFORCE_EAGER}" = "true" ]; then
|
||||||
|
VLLM_ARGS+=(--enforce-eager)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Start vLLM server with PaddleOCR-VL
|
||||||
|
exec vllm "${VLLM_ARGS[@]}"
|
||||||
|
|||||||
@@ -13,8 +13,8 @@
|
|||||||
"test": "tstest test/ --verbose"
|
"test": "tstest test/ --verbose"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@git.zone/tstest": "^1.0.90",
|
"@git.zone/tsrun": "^1.3.3",
|
||||||
"@git.zone/tsrun": "^1.3.3"
|
"@git.zone/tstest": "^1.0.90"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
@@ -28,5 +28,8 @@
|
|||||||
"minicpm",
|
"minicpm",
|
||||||
"ollama",
|
"ollama",
|
||||||
"multimodal"
|
"multimodal"
|
||||||
]
|
],
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "^25.0.9"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
4
pnpm-lock.yaml
generated
4
pnpm-lock.yaml
generated
@@ -7,6 +7,10 @@ settings:
|
|||||||
importers:
|
importers:
|
||||||
|
|
||||||
.:
|
.:
|
||||||
|
dependencies:
|
||||||
|
'@types/node':
|
||||||
|
specifier: ^25.0.9
|
||||||
|
version: 25.0.9
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@git.zone/tsrun':
|
'@git.zone/tsrun':
|
||||||
specifier: ^1.3.3
|
specifier: ^1.3.3
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import * as os from 'os';
|
|||||||
|
|
||||||
const OLLAMA_URL = 'http://localhost:11434';
|
const OLLAMA_URL = 'http://localhost:11434';
|
||||||
const MODEL = 'openbmb/minicpm-v4.5:q8_0';
|
const MODEL = 'openbmb/minicpm-v4.5:q8_0';
|
||||||
const PADDLEOCR_URL = 'http://localhost:5000';
|
const PADDLEOCR_VL_URL = 'http://localhost:8000';
|
||||||
|
|
||||||
interface IInvoice {
|
interface IInvoice {
|
||||||
invoice_number: string;
|
invoice_number: string;
|
||||||
@@ -19,24 +19,33 @@ interface IInvoice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract OCR text from an image using PaddleOCR
|
* Extract OCR text from an image using PaddleOCR-VL (OpenAI-compatible API)
|
||||||
*/
|
*/
|
||||||
async function extractOcrText(imageBase64: string): Promise<string> {
|
async function extractOcrText(imageBase64: string): Promise<string> {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`${PADDLEOCR_URL}/ocr`, {
|
const response = await fetch(`${PADDLEOCR_VL_URL}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ image: imageBase64 }),
|
body: JSON.stringify({
|
||||||
|
model: 'paddleocr-vl',
|
||||||
|
messages: [{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
|
||||||
|
{ type: 'text', text: 'OCR:' }
|
||||||
|
]
|
||||||
|
}],
|
||||||
|
temperature: 0.0,
|
||||||
|
max_tokens: 4096
|
||||||
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) return '';
|
if (!response.ok) return '';
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
if (data.success && data.results) {
|
return data.choices?.[0]?.message?.content || '';
|
||||||
return data.results.map((r: { text: string }) => r.text).join('\n');
|
|
||||||
}
|
|
||||||
} catch {
|
} catch {
|
||||||
// PaddleOCR unavailable
|
// PaddleOCR-VL unavailable
|
||||||
}
|
}
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user