feat(paddleocr): add PaddleOCR support: Docker images, FastAPI server, entrypoint and tests

This commit is contained in:
2026-01-16 10:23:32 +00:00
parent bc65ea4ece
commit ae4bb26931
7 changed files with 112 additions and 12 deletions

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""
PaddleOCR FastAPI Server
Provides REST API for OCR operations using PaddleOCR
"""
import os
import io
import base64
import logging
from typing import Optional, List, Any
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Environment configuration
OCR_LANGUAGE = os.environ.get('OCR_LANGUAGE', 'en')
USE_GPU = os.environ.get('CUDA_VISIBLE_DEVICES', '') != '-1'
# Initialize FastAPI app
app = FastAPI(
title="PaddleOCR Server",
description="REST API for OCR operations using PaddleOCR PP-OCRv4",
version="1.0.0"
)
# Global OCR instance
ocr_instance: Optional[PaddleOCR] = None
class OCRRequest(BaseModel):
"""Request model for base64 image OCR"""
image: str
language: Optional[str] = None
class BoundingBox(BaseModel):
"""Bounding box for detected text"""
points: List[List[float]]
class OCRResult(BaseModel):
"""Single OCR detection result"""
text: str
confidence: float
box: List[List[float]]
class OCRResponse(BaseModel):
"""OCR response model"""
success: bool
results: List[OCRResult]
error: Optional[str] = None
class HealthResponse(BaseModel):
"""Health check response"""
status: str
model: str
language: str
gpu_enabled: bool
def get_ocr() -> PaddleOCR:
"""Get or initialize the OCR instance"""
global ocr_instance
if ocr_instance is None:
logger.info(f"Initializing PaddleOCR with language={OCR_LANGUAGE}, use_gpu={USE_GPU}")
ocr_instance = PaddleOCR(
use_angle_cls=True,
lang=OCR_LANGUAGE,
use_gpu=USE_GPU,
show_log=False
)
logger.info("PaddleOCR initialized successfully")
return ocr_instance
def decode_base64_image(base64_string: str) -> np.ndarray:
"""Decode base64 string to numpy array"""
# Remove data URL prefix if present
if ',' in base64_string:
base64_string = base64_string.split(',')[1]
image_data = base64.b64decode(base64_string)
image = Image.open(io.BytesIO(image_data))
# Convert to RGB if necessary
if image.mode != 'RGB':
image = image.convert('RGB')
return np.array(image)
def process_ocr_result(result: Any) -> List[OCRResult]:
"""Process PaddleOCR result into structured format"""
results = []
if result is None or len(result) == 0:
return results
# PaddleOCR returns list of results per image
# Each result is a list of [box, (text, confidence)]
for line in result[0] if result[0] else []:
if line is None:
continue
box = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text_info = line[1] # (text, confidence)
results.append(OCRResult(
text=text_info[0],
confidence=float(text_info[1]),
box=[[float(p[0]), float(p[1])] for p in box]
))
return results
@app.on_event("startup")
async def startup_event():
"""Pre-warm the OCR model on startup"""
logger.info("Pre-warming OCR model...")
try:
ocr = get_ocr()
# Create a small test image to warm up the model
test_image = np.zeros((100, 100, 3), dtype=np.uint8)
test_image.fill(255) # White image
ocr.ocr(test_image, cls=True)
logger.info("OCR model pre-warmed successfully")
except Exception as e:
logger.error(f"Failed to pre-warm OCR model: {e}")
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint"""
try:
# Ensure OCR is initialized
get_ocr()
return HealthResponse(
status="healthy",
model="PP-OCRv4",
language=OCR_LANGUAGE,
gpu_enabled=USE_GPU
)
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(status_code=503, detail=str(e))
@app.post("/ocr", response_model=OCRResponse)
async def ocr_base64(request: OCRRequest):
"""
Perform OCR on a base64-encoded image
Args:
request: OCRRequest with base64 image and optional language
Returns:
OCRResponse with detected text, confidence scores, and bounding boxes
"""
try:
# Decode image
image = decode_base64_image(request.image)
# Get OCR instance (use request language if provided)
ocr = get_ocr()
# If a different language is requested, create a new instance
if request.language and request.language != OCR_LANGUAGE:
logger.info(f"Creating OCR instance for language: {request.language}")
temp_ocr = PaddleOCR(
use_angle_cls=True,
lang=request.language,
use_gpu=USE_GPU,
show_log=False
)
result = temp_ocr.ocr(image, cls=True)
else:
result = ocr.ocr(image, cls=True)
# Process results
results = process_ocr_result(result)
return OCRResponse(success=True, results=results)
except Exception as e:
logger.error(f"OCR processing failed: {e}")
return OCRResponse(success=False, results=[], error=str(e))
@app.post("/ocr/upload", response_model=OCRResponse)
async def ocr_upload(
img: UploadFile = File(...),
language: Optional[str] = Form(None)
):
"""
Perform OCR on an uploaded image file
Args:
img: Uploaded image file
language: Optional language code (default: env OCR_LANGUAGE)
Returns:
OCRResponse with detected text, confidence scores, and bounding boxes
"""
try:
# Read image
contents = await img.read()
image = Image.open(io.BytesIO(contents))
# Convert to RGB if necessary
if image.mode != 'RGB':
image = image.convert('RGB')
image_array = np.array(image)
# Get OCR instance
ocr = get_ocr()
# If a different language is requested, create a new instance
if language and language != OCR_LANGUAGE:
logger.info(f"Creating OCR instance for language: {language}")
temp_ocr = PaddleOCR(
use_angle_cls=True,
lang=language,
use_gpu=USE_GPU,
show_log=False
)
result = temp_ocr.ocr(image_array, cls=True)
else:
result = ocr.ocr(image_array, cls=True)
# Process results
results = process_ocr_result(result)
return OCRResponse(success=True, results=results)
except Exception as e:
logger.error(f"OCR processing failed: {e}")
return OCRResponse(success=False, results=[], error=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=5000)