# PaddleOCR GPU Variant
# OCR processing with NVIDIA GPU support using PaddlePaddle
FROM paddlepaddle/paddle:3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6

LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
LABEL description="PaddleOCR PP-OCRv4 - GPU optimized"
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"

# Environment configuration
ENV OCR_LANGUAGE="en"
ENV SERVER_PORT="5000"
ENV SERVER_HOST="0.0.0.0"
ENV PYTHONUNBUFFERED=1

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libgl1-mesa-glx \
    libglib2.0-0 \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install Python dependencies
RUN pip install --no-cache-dir \
    paddleocr \
    fastapi \
    uvicorn[standard] \
    python-multipart \
    opencv-python-headless \
    pillow

# Copy server files
COPY image_support_files/paddleocr-server.py /app/paddleocr-server.py
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh

# Pre-download OCR models during build (PP-OCRv4)
RUN python -c "from paddleocr import PaddleOCR; \
    ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=True); \
    print('English model downloaded')"

# Expose API port
EXPOSE 5000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:5000/health || exit 1

ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]