feat(paddleocr): add PaddleOCR OCR service (Docker images, server, tests, docs) and CI workflows
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# PaddleOCR CPU Variant
|
||||
# OCR processing optimized for CPU-only inference
|
||||
FROM python:3.10-slim
|
||||
FROM python:3.10-slim-bookworm
|
||||
|
||||
LABEL maintainer="Task Venture Capital GmbH <hello@task.vc>"
|
||||
LABEL description="PaddleOCR PP-OCRv4 - CPU optimized"
|
||||
@@ -21,13 +21,14 @@ WORKDIR /app
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
libgomp1 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies (CPU version of PaddlePaddle)
|
||||
# Install Python dependencies (CPU version of PaddlePaddle - using stable 2.x versions)
|
||||
RUN pip install --no-cache-dir \
|
||||
paddlepaddle \
|
||||
paddleocr \
|
||||
paddlepaddle==2.6.2 \
|
||||
paddleocr==2.8.1 \
|
||||
fastapi \
|
||||
uvicorn[standard] \
|
||||
python-multipart \
|
||||
@@ -39,10 +40,8 @@ COPY image_support_files/paddleocr_server.py /app/paddleocr_server.py
|
||||
COPY image_support_files/paddleocr-entrypoint.sh /usr/local/bin/paddleocr-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/paddleocr-entrypoint.sh
|
||||
|
||||
# Pre-download OCR models during build (PP-OCRv4)
|
||||
RUN python -c "from paddleocr import PaddleOCR; \
|
||||
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=True); \
|
||||
print('English model downloaded')"
|
||||
# Note: OCR models will be downloaded on first run
|
||||
# This avoids build-time segfaults with certain CPU architectures
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 5000
|
||||
|
||||
Reference in New Issue
Block a user