259 lines
7.0 KiB
Python
259 lines
7.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PaddleOCR FastAPI Server
|
|
Provides REST API for OCR operations using PaddleOCR
|
|
"""
|
|
|
|
import os
|
|
import io
|
|
import base64
|
|
import logging
|
|
from typing import Optional, List, Any
|
|
|
|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
from pydantic import BaseModel
|
|
import numpy as np
|
|
from PIL import Image
|
|
from paddleocr import PaddleOCR
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Environment configuration
|
|
OCR_LANGUAGE = os.environ.get('OCR_LANGUAGE', 'en')
|
|
USE_GPU = os.environ.get('CUDA_VISIBLE_DEVICES', '') != '-1'
|
|
|
|
# Initialize FastAPI app
|
|
app = FastAPI(
|
|
title="PaddleOCR Server",
|
|
description="REST API for OCR operations using PaddleOCR PP-OCRv4",
|
|
version="1.0.0"
|
|
)
|
|
|
|
# Global OCR instance
|
|
ocr_instance: Optional[PaddleOCR] = None
|
|
|
|
|
|
class OCRRequest(BaseModel):
|
|
"""Request model for base64 image OCR"""
|
|
image: str
|
|
language: Optional[str] = None
|
|
|
|
|
|
class BoundingBox(BaseModel):
|
|
"""Bounding box for detected text"""
|
|
points: List[List[float]]
|
|
|
|
|
|
class OCRResult(BaseModel):
|
|
"""Single OCR detection result"""
|
|
text: str
|
|
confidence: float
|
|
box: List[List[float]]
|
|
|
|
|
|
class OCRResponse(BaseModel):
|
|
"""OCR response model"""
|
|
success: bool
|
|
results: List[OCRResult]
|
|
error: Optional[str] = None
|
|
|
|
|
|
class HealthResponse(BaseModel):
|
|
"""Health check response"""
|
|
status: str
|
|
model: str
|
|
language: str
|
|
gpu_enabled: bool
|
|
|
|
|
|
def get_ocr() -> PaddleOCR:
|
|
"""Get or initialize the OCR instance"""
|
|
global ocr_instance
|
|
if ocr_instance is None:
|
|
logger.info(f"Initializing PaddleOCR with language={OCR_LANGUAGE}, use_gpu={USE_GPU}")
|
|
ocr_instance = PaddleOCR(
|
|
use_angle_cls=True,
|
|
lang=OCR_LANGUAGE,
|
|
use_gpu=USE_GPU,
|
|
show_log=False
|
|
)
|
|
logger.info("PaddleOCR initialized successfully")
|
|
return ocr_instance
|
|
|
|
|
|
def decode_base64_image(base64_string: str) -> np.ndarray:
|
|
"""Decode base64 string to numpy array"""
|
|
# Remove data URL prefix if present
|
|
if ',' in base64_string:
|
|
base64_string = base64_string.split(',')[1]
|
|
|
|
image_data = base64.b64decode(base64_string)
|
|
image = Image.open(io.BytesIO(image_data))
|
|
|
|
# Convert to RGB if necessary
|
|
if image.mode != 'RGB':
|
|
image = image.convert('RGB')
|
|
|
|
return np.array(image)
|
|
|
|
|
|
def process_ocr_result(result: Any) -> List[OCRResult]:
|
|
"""Process PaddleOCR result into structured format"""
|
|
results = []
|
|
|
|
if result is None or len(result) == 0:
|
|
return results
|
|
|
|
# PaddleOCR returns list of results per image
|
|
# Each result is a list of [box, (text, confidence)]
|
|
for line in result[0] if result[0] else []:
|
|
if line is None:
|
|
continue
|
|
|
|
box = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
|
text_info = line[1] # (text, confidence)
|
|
|
|
results.append(OCRResult(
|
|
text=text_info[0],
|
|
confidence=float(text_info[1]),
|
|
box=[[float(p[0]), float(p[1])] for p in box]
|
|
))
|
|
|
|
return results
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup_event():
|
|
"""Pre-warm the OCR model on startup"""
|
|
logger.info("Pre-warming OCR model...")
|
|
try:
|
|
ocr = get_ocr()
|
|
# Create a small test image to warm up the model
|
|
test_image = np.zeros((100, 100, 3), dtype=np.uint8)
|
|
test_image.fill(255) # White image
|
|
ocr.ocr(test_image, cls=True)
|
|
logger.info("OCR model pre-warmed successfully")
|
|
except Exception as e:
|
|
logger.error(f"Failed to pre-warm OCR model: {e}")
|
|
|
|
|
|
@app.get("/health", response_model=HealthResponse)
|
|
async def health_check():
|
|
"""Health check endpoint"""
|
|
try:
|
|
# Ensure OCR is initialized
|
|
get_ocr()
|
|
return HealthResponse(
|
|
status="healthy",
|
|
model="PP-OCRv4",
|
|
language=OCR_LANGUAGE,
|
|
gpu_enabled=USE_GPU
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Health check failed: {e}")
|
|
raise HTTPException(status_code=503, detail=str(e))
|
|
|
|
|
|
@app.post("/ocr", response_model=OCRResponse)
|
|
async def ocr_base64(request: OCRRequest):
|
|
"""
|
|
Perform OCR on a base64-encoded image
|
|
|
|
Args:
|
|
request: OCRRequest with base64 image and optional language
|
|
|
|
Returns:
|
|
OCRResponse with detected text, confidence scores, and bounding boxes
|
|
"""
|
|
try:
|
|
# Decode image
|
|
image = decode_base64_image(request.image)
|
|
|
|
# Get OCR instance (use request language if provided)
|
|
ocr = get_ocr()
|
|
|
|
# If a different language is requested, create a new instance
|
|
if request.language and request.language != OCR_LANGUAGE:
|
|
logger.info(f"Creating OCR instance for language: {request.language}")
|
|
temp_ocr = PaddleOCR(
|
|
use_angle_cls=True,
|
|
lang=request.language,
|
|
use_gpu=USE_GPU,
|
|
show_log=False
|
|
)
|
|
result = temp_ocr.ocr(image, cls=True)
|
|
else:
|
|
result = ocr.ocr(image, cls=True)
|
|
|
|
# Process results
|
|
results = process_ocr_result(result)
|
|
|
|
return OCRResponse(success=True, results=results)
|
|
|
|
except Exception as e:
|
|
logger.error(f"OCR processing failed: {e}")
|
|
return OCRResponse(success=False, results=[], error=str(e))
|
|
|
|
|
|
@app.post("/ocr/upload", response_model=OCRResponse)
|
|
async def ocr_upload(
|
|
img: UploadFile = File(...),
|
|
language: Optional[str] = Form(None)
|
|
):
|
|
"""
|
|
Perform OCR on an uploaded image file
|
|
|
|
Args:
|
|
img: Uploaded image file
|
|
language: Optional language code (default: env OCR_LANGUAGE)
|
|
|
|
Returns:
|
|
OCRResponse with detected text, confidence scores, and bounding boxes
|
|
"""
|
|
try:
|
|
# Read image
|
|
contents = await img.read()
|
|
image = Image.open(io.BytesIO(contents))
|
|
|
|
# Convert to RGB if necessary
|
|
if image.mode != 'RGB':
|
|
image = image.convert('RGB')
|
|
|
|
image_array = np.array(image)
|
|
|
|
# Get OCR instance
|
|
ocr = get_ocr()
|
|
|
|
# If a different language is requested, create a new instance
|
|
if language and language != OCR_LANGUAGE:
|
|
logger.info(f"Creating OCR instance for language: {language}")
|
|
temp_ocr = PaddleOCR(
|
|
use_angle_cls=True,
|
|
lang=language,
|
|
use_gpu=USE_GPU,
|
|
show_log=False
|
|
)
|
|
result = temp_ocr.ocr(image_array, cls=True)
|
|
else:
|
|
result = ocr.ocr(image_array, cls=True)
|
|
|
|
# Process results
|
|
results = process_ocr_result(result)
|
|
|
|
return OCRResponse(success=True, results=results)
|
|
|
|
except Exception as e:
|
|
logger.error(f"OCR processing failed: {e}")
|
|
return OCRResponse(success=False, results=[], error=str(e))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=5000)
|