tag/v1.4.0/Dockerfile_paddleocr

# PaddleOCR GPU Variant
# OCR processing with NVIDIA GPU support using PaddlePaddle
FROM paddlepaddle/paddle:2.6.2-gpu-cuda11.7-cudnn8.4-trt8.4

LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
LABEL description="PaddleOCR PP-OCRv4 - GPU optimized"
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"

# Environment configuration
ENV OCR_LANGUAGE="en"
ENV SERVER_PORT="5000"
ENV SERVER_HOST="0.0.0.0"
ENV PYTHONUNBUFFERED=1

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libgl1-mesa-glx \
    libglib2.0-0 \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install Python dependencies (using stable paddleocr 2.x)
RUN pip install --no-cache-dir \
    paddleocr==2.8.1 \
    fastapi \
    uvicorn[standard] \
    python-multipart \
    opencv-python-headless \
    pillow

# Copy server files
COPY image_support_files/paddleocr_server.py /app/paddleocr_server.py
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh

# Note: OCR models will be downloaded on first run
# This ensures compatibility across different GPU architectures

# Expose API port
EXPOSE 5000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:5000/health || exit 1

ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00			`# PaddleOCR GPU Variant`
			`# OCR processing with NVIDIA GPU support using PaddlePaddle`
feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows 2026-01-16 13:23:01 +00:00			`FROM paddlepaddle/paddle:2.6.2-gpu-cuda11.7-cudnn8.4-trt8.4`
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00
			`LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"`
			`LABEL description="PaddleOCR PP-OCRv4 - GPU optimized"`
			`LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"`

			`# Environment configuration`
			`ENV OCR_LANGUAGE="en"`
			`ENV SERVER_PORT="5000"`
			`ENV SERVER_HOST="0.0.0.0"`
			`ENV PYTHONUNBUFFERED=1`

			`# Set working directory`
			`WORKDIR /app`

			`# Install system dependencies`
			`RUN apt-get update && apt-get install -y --no-install-recommends \`
			`libgl1-mesa-glx \`
			`libglib2.0-0 \`
			`curl \`
			`&& rm -rf /var/lib/apt/lists/*`

feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows 2026-01-16 13:23:01 +00:00			`# Install Python dependencies (using stable paddleocr 2.x)`
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00			`RUN pip install --no-cache-dir \`
feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows 2026-01-16 13:23:01 +00:00			`paddleocr==2.8.1 \`
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00			`fastapi \`
			`uvicorn[standard] \`
			`python-multipart \`
			`opencv-python-headless \`
			`pillow`

			`# Copy server files`
feat(paddleocr): add PaddleOCR support: Docker images, FastAPI server, entrypoint and tests 2026-01-16 10:23:32 +00:00			`COPY image_support_files/paddleocr_server.py /app/paddleocr_server.py`
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00			`COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh`
			`RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh`

feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows 2026-01-16 13:23:01 +00:00			`# Note: OCR models will be downloaded on first run`
			`# This ensures compatibility across different GPU architectures`
feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing 2026-01-16 10:22:15 +00:00
			`# Expose API port`
			`EXPOSE 5000`

			`# Health check`
			`HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \`
			`CMD curl -f http://localhost:5000/health \|\| exit 1`

			`ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]`