# PaddleOCR CPU Variant # OCR processing optimized for CPU-only inference FROM python:3.10-slim LABEL maintainer="Task Venture Capital GmbH " LABEL description="PaddleOCR PP-OCRv4 - CPU optimized" LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai" # Environment configuration for CPU-only mode ENV OCR_LANGUAGE="en" ENV SERVER_PORT="5000" ENV SERVER_HOST="0.0.0.0" ENV PYTHONUNBUFFERED=1 # Disable GPU usage for CPU-only variant ENV CUDA_VISIBLE_DEVICES="-1" # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ libgl1-mesa-glx \ libglib2.0-0 \ curl \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies (CPU version of PaddlePaddle) RUN pip install --no-cache-dir \ paddlepaddle \ paddleocr \ fastapi \ uvicorn[standard] \ python-multipart \ opencv-python-headless \ pillow # Copy server files COPY image_support_files/paddleocr-server.py /app/paddleocr-server.py COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh # Pre-download OCR models during build (PP-OCRv4) RUN python -c "from paddleocr import PaddleOCR; \ ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=True); \ print('English model downloaded')" # Expose API port EXPOSE 5000 # Health check (longer start-period for CPU variant) HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD curl -f http://localhost:5000/health || exit 1 ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]