2026-01-16 10:22:15 +00:00
|
|
|
# PaddleOCR CPU Variant
|
|
|
|
|
# OCR processing optimized for CPU-only inference
|
2026-01-16 13:23:01 +00:00
|
|
|
FROM python:3.10-slim-bookworm
|
2026-01-16 10:22:15 +00:00
|
|
|
|
|
|
|
|
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
|
|
|
|
|
LABEL description="PaddleOCR PP-OCRv4 - CPU optimized"
|
|
|
|
|
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"
|
|
|
|
|
|
|
|
|
|
# Environment configuration for CPU-only mode
|
|
|
|
|
ENV OCR_LANGUAGE="en"
|
|
|
|
|
ENV SERVER_PORT="5000"
|
|
|
|
|
ENV SERVER_HOST="0.0.0.0"
|
|
|
|
|
ENV PYTHONUNBUFFERED=1
|
|
|
|
|
# Disable GPU usage for CPU-only variant
|
|
|
|
|
ENV CUDA_VISIBLE_DEVICES="-1"
|
|
|
|
|
|
|
|
|
|
# Set working directory
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
# Install system dependencies
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
libgl1-mesa-glx \
|
|
|
|
|
libglib2.0-0 \
|
2026-01-16 13:23:01 +00:00
|
|
|
libgomp1 \
|
2026-01-16 10:22:15 +00:00
|
|
|
curl \
|
|
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
2026-01-16 13:23:01 +00:00
|
|
|
# Install Python dependencies (CPU version of PaddlePaddle - using stable 2.x versions)
|
2026-01-16 10:22:15 +00:00
|
|
|
RUN pip install --no-cache-dir \
|
2026-01-16 13:23:01 +00:00
|
|
|
paddlepaddle==2.6.2 \
|
|
|
|
|
paddleocr==2.8.1 \
|
2026-01-16 10:22:15 +00:00
|
|
|
fastapi \
|
|
|
|
|
uvicorn[standard] \
|
|
|
|
|
python-multipart \
|
|
|
|
|
opencv-python-headless \
|
|
|
|
|
pillow
|
|
|
|
|
|
|
|
|
|
# Copy server files
|
2026-01-16 10:23:32 +00:00
|
|
|
COPY image_support_files/paddleocr_server.py /app/paddleocr_server.py
|
2026-01-16 10:22:15 +00:00
|
|
|
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
|
|
|
|
|
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh
|
|
|
|
|
|
2026-01-16 13:23:01 +00:00
|
|
|
# Note: OCR models will be downloaded on first run
|
|
|
|
|
# This avoids build-time segfaults with certain CPU architectures
|
2026-01-16 10:22:15 +00:00
|
|
|
|
|
|
|
|
# Expose API port
|
|
|
|
|
EXPOSE 5000
|
|
|
|
|
|
|
|
|
|
# Health check (longer start-period for CPU variant)
|
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
|
|
|
|
CMD curl -f http://localhost:5000/health || exit 1
|
|
|
|
|
|
|
|
|
|
ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]
|