feat(ocr): add PaddleOCR GPU Docker image and FastAPI OCR server with entrypoint; implement OCR endpoints and consensus extraction testing
This commit is contained in:
51
Dockerfile_paddleocr
Normal file
51
Dockerfile_paddleocr
Normal file
@@ -0,0 +1,51 @@
|
||||
# PaddleOCR GPU Variant
|
||||
# OCR processing with NVIDIA GPU support using PaddlePaddle
|
||||
FROM paddlepaddle/paddle:3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6
|
||||
|
||||
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
|
||||
LABEL description="PaddleOCR PP-OCRv4 - GPU optimized"
|
||||
LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai"
|
||||
|
||||
# Environment configuration
|
||||
ENV OCR_LANGUAGE="en"
|
||||
ENV SERVER_PORT="5000"
|
||||
ENV SERVER_HOST="0.0.0.0"
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
paddleocr \
|
||||
fastapi \
|
||||
uvicorn[standard] \
|
||||
python-multipart \
|
||||
opencv-python-headless \
|
||||
pillow
|
||||
|
||||
# Copy server files
|
||||
COPY image_support_files/paddleocr-server.py /app/paddleocr-server.py
|
||||
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh
|
||||
|
||||
# Pre-download OCR models during build (PP-OCRv4)
|
||||
RUN python -c "from paddleocr import PaddleOCR; \
|
||||
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=True); \
|
||||
print('English model downloaded')"
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 5000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD curl -f http://localhost:5000/health || exit 1
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/paddleocr-entrypoint.sh"]
|
||||
Reference in New Issue
Block a user