fix(docker): standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references
This commit is contained in:
@@ -14,7 +14,7 @@ ENV OLLAMA_ORIGINS="*"
|
||||
ENV CUDA_VISIBLE_DEVICES=""
|
||||
|
||||
# Copy and setup entrypoint
|
||||
COPY image_support_files/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
COPY image_support_files/minicpm45v_entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||
|
||||
# Expose Ollama API port
|
||||
|
||||
@@ -12,7 +12,7 @@ ENV OLLAMA_HOST="0.0.0.0"
|
||||
ENV OLLAMA_ORIGINS="*"
|
||||
|
||||
# Copy and setup entrypoint
|
||||
COPY image_support_files/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
COPY image_support_files/minicpm45v_entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||
|
||||
# Expose Ollama API port
|
||||
@@ -1,70 +0,0 @@
|
||||
# PaddleOCR-VL GPU Variant
|
||||
# Vision-Language Model for document parsing using vLLM
|
||||
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
|
||||
|
||||
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
|
||||
LABEL description="PaddleOCR-VL 0.9B - Vision-Language Model for document parsing"
|
||||
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"
|
||||
|
||||
# Environment configuration
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV HF_HOME=/root/.cache/huggingface
|
||||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.11 \
|
||||
python3.11-venv \
|
||||
python3.11-dev \
|
||||
python3-pip \
|
||||
git \
|
||||
curl \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \
|
||||
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
|
||||
|
||||
# Create and activate virtual environment
|
||||
RUN python -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Install PyTorch with CUDA support
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir \
|
||||
torch==2.5.1 \
|
||||
torchvision \
|
||||
--index-url https://download.pytorch.org/whl/cu124
|
||||
|
||||
# Install vLLM 0.11.1 (first stable release with PaddleOCR-VL support)
|
||||
RUN pip install --no-cache-dir \
|
||||
vllm==0.11.1 \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu124
|
||||
|
||||
# Install additional dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
transformers \
|
||||
accelerate \
|
||||
safetensors \
|
||||
pillow \
|
||||
fastapi \
|
||||
uvicorn[standard] \
|
||||
python-multipart \
|
||||
openai \
|
||||
httpx
|
||||
|
||||
# Copy entrypoint script
|
||||
COPY image_support_files/paddleocr-vl-entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||
|
||||
# Expose vLLM API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/paddleocr-vl-entrypoint.sh"]
|
||||
@@ -44,7 +44,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
||||
|
||||
# Copy server files
|
||||
COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
|
||||
COPY image_support_files/paddleocr-vl-cpu-entrypoint.sh /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
|
||||
COPY image_support_files/paddleocr_vl_entrypoint.sh /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/paddleocr-vl-cpu-entrypoint.sh
|
||||
|
||||
# Expose API port
|
||||
|
||||
@@ -58,7 +58,7 @@ RUN pip install --no-cache-dir \
|
||||
|
||||
# Copy server files (same as CPU variant - it auto-detects CUDA)
|
||||
COPY image_support_files/paddleocr_vl_server.py /app/paddleocr_vl_server.py
|
||||
COPY image_support_files/paddleocr-vl-cpu-entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||
COPY image_support_files/paddleocr_vl_entrypoint.sh /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/paddleocr-vl-entrypoint.sh
|
||||
|
||||
# Expose API port
|
||||
|
||||
@@ -16,7 +16,7 @@ echo -e "${BLUE}Building ht-docker-ai images...${NC}"
|
||||
# Build GPU variant
|
||||
echo -e "${GREEN}Building MiniCPM-V 4.5 GPU variant...${NC}"
|
||||
docker build \
|
||||
-f Dockerfile_minicpm45v \
|
||||
-f Dockerfile_minicpm45v_gpu \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v-gpu \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:latest \
|
||||
@@ -29,10 +29,10 @@ docker build \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:minicpm45v-cpu \
|
||||
.
|
||||
|
||||
# Build PaddleOCR-VL GPU variant (vLLM)
|
||||
echo -e "${GREEN}Building PaddleOCR-VL GPU variant (vLLM)...${NC}"
|
||||
# Build PaddleOCR-VL GPU variant
|
||||
echo -e "${GREEN}Building PaddleOCR-VL GPU variant...${NC}"
|
||||
docker build \
|
||||
-f Dockerfile_paddleocr_vl \
|
||||
-f Dockerfile_paddleocr_vl_gpu \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:paddleocr-vl \
|
||||
-t ${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}:paddleocr-vl-gpu \
|
||||
.
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-01-17 - 1.7.1 - fix(docker)
|
||||
standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references
|
||||
|
||||
- Added Dockerfile_minicpm45v_gpu and image_support_files/minicpm45v_entrypoint.sh; removed the old Dockerfile_minicpm45v and docker-entrypoint.sh
|
||||
- Renamed and simplified PaddleOCR entrypoint to image_support_files/paddleocr_vl_entrypoint.sh and updated CPU/GPU Dockerfile references
|
||||
- Updated build-images.sh to use *_gpu Dockerfiles and clarified PaddleOCR GPU build log
|
||||
- Updated test/helpers/docker.ts to point to Dockerfile_minicpm45v_gpu so tests build the GPU variant
|
||||
|
||||
## 2026-01-17 - 1.7.0 - feat(tests)
|
||||
use Qwen2.5 (Ollama) for invoice extraction tests and add helpers for model management; normalize dates and coerce numeric fields
|
||||
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "==================================="
|
||||
echo "PaddleOCR-VL Server"
|
||||
echo "==================================="
|
||||
|
||||
# Configuration
|
||||
MODEL_NAME="${MODEL_NAME:-PaddlePaddle/PaddleOCR-VL}"
|
||||
HOST="${HOST:-0.0.0.0}"
|
||||
PORT="${PORT:-8000}"
|
||||
MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
|
||||
GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
|
||||
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
|
||||
ENFORCE_EAGER="${ENFORCE_EAGER:-false}"
|
||||
|
||||
echo "Model: ${MODEL_NAME}"
|
||||
echo "Host: ${HOST}"
|
||||
echo "Port: ${PORT}"
|
||||
echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
|
||||
echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
|
||||
echo "Max model length: ${MAX_MODEL_LEN}"
|
||||
echo "Enforce eager: ${ENFORCE_EAGER}"
|
||||
echo ""
|
||||
|
||||
# Check GPU availability
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "GPU Information:"
|
||||
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
|
||||
echo ""
|
||||
else
|
||||
echo "WARNING: nvidia-smi not found. GPU may not be available."
|
||||
fi
|
||||
|
||||
echo "Starting vLLM server..."
|
||||
echo "==================================="
|
||||
|
||||
# Build vLLM command
|
||||
VLLM_ARGS=(
|
||||
serve "${MODEL_NAME}"
|
||||
--trust-remote-code
|
||||
--host "${HOST}"
|
||||
--port "${PORT}"
|
||||
--max-num-batched-tokens "${MAX_BATCHED_TOKENS}"
|
||||
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}"
|
||||
--max-model-len "${MAX_MODEL_LEN}"
|
||||
--no-enable-prefix-caching
|
||||
--mm-processor-cache-gb 0
|
||||
--served-model-name "paddleocr-vl"
|
||||
--limit-mm-per-prompt '{"image": 1}'
|
||||
)
|
||||
|
||||
# Add enforce-eager if enabled (disables CUDA graphs, saves memory)
|
||||
if [ "${ENFORCE_EAGER}" = "true" ]; then
|
||||
VLLM_ARGS+=(--enforce-eager)
|
||||
fi
|
||||
|
||||
# Start vLLM server with PaddleOCR-VL
|
||||
exec vllm "${VLLM_ARGS[@]}"
|
||||
@@ -49,7 +49,7 @@ export const IMAGES = {
|
||||
|
||||
minicpm: {
|
||||
name: 'minicpm45v',
|
||||
dockerfile: 'Dockerfile_minicpm45v',
|
||||
dockerfile: 'Dockerfile_minicpm45v_gpu',
|
||||
buildContext: '.',
|
||||
containerName: 'minicpm-test',
|
||||
ports: ['11434:11434'],
|
||||
|
||||
Reference in New Issue
Block a user