feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows

This commit is contained in:
2026-01-16 13:23:01 +00:00
parent 67c38eeb67
commit bec379e9ca
10 changed files with 624 additions and 71 deletions

View File

@@ -1,6 +1,6 @@
# PaddleOCR GPU Variant
# OCR processing with NVIDIA GPU support using PaddlePaddle
FROM paddlepaddle/paddle:3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6
FROM paddlepaddle/paddle:2.6.2-gpu-cuda11.7-cudnn8.4-trt8.4
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
LABEL description="PaddleOCR PP-OCRv4 - GPU optimized"
@@ -22,9 +22,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
# Install Python dependencies (using stable paddleocr 2.x)
RUN pip install --no-cache-dir \
paddleocr \
paddleocr==2.8.1 \
fastapi \
uvicorn[standard] \
python-multipart \
@@ -36,10 +36,8 @@ COPY image_support_files/paddleocr_server.py /app/paddleocr_server.py
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh
# Pre-download OCR models during build (PP-OCRv4)
RUN python -c "from paddleocr import PaddleOCR; \
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=True); \
print('English model downloaded')"
# Note: OCR models will be downloaded on first run
# This ensures compatibility across different GPU architectures
# Expose API port
EXPOSE 5000