#!/usr/bin/env python3
"""
PaddleOCR-VL FastAPI Server (CPU variant)
Provides OpenAI-compatible REST API for document parsing using PaddleOCR-VL
"""

import os
import io
import base64
import logging
import time
from typing import Optional, List, Any, Dict, Union

from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import torch
from PIL import Image

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Environment configuration
SERVER_HOST = os.environ.get('SERVER_HOST', '0.0.0.0')
SERVER_PORT = int(os.environ.get('SERVER_PORT', '8000'))
MODEL_NAME = os.environ.get('MODEL_NAME', 'PaddlePaddle/PaddleOCR-VL')

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")

# Task prompts for PaddleOCR-VL
TASK_PROMPTS = {
    "ocr": "OCR:",
    "table": "Table Recognition:",
    "formula": "Formula Recognition:",
    "chart": "Chart Recognition:",
}

# Initialize FastAPI app
app = FastAPI(
    title="PaddleOCR-VL Server",
    description="OpenAI-compatible REST API for document parsing using PaddleOCR-VL",
    version="1.0.0"
)

# Global model instances
model = None
processor = None


# Request/Response models (OpenAI-compatible)
class ImageUrl(BaseModel):
    url: str


class ContentItem(BaseModel):
    type: str
    text: Optional[str] = None
    image_url: Optional[ImageUrl] = None


class Message(BaseModel):
    role: str
    content: Union[str, List[ContentItem]]


class ChatCompletionRequest(BaseModel):
    model: str = "paddleocr-vl"
    messages: List[Message]
    temperature: Optional[float] = 0.0
    max_tokens: Optional[int] = 4096


class Choice(BaseModel):
    index: int
    message: Message
    finish_reason: str


class Usage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int


class ChatCompletionResponse(BaseModel):
    id: str
    object: str = "chat.completion"
    created: int
    model: str
    choices: List[Choice]
    usage: Usage


class HealthResponse(BaseModel):
    status: str
    model: str
    device: str


def load_model():
    """Load the PaddleOCR-VL model and processor"""
    global model, processor

    if model is not None:
        return

    logger.info(f"Loading PaddleOCR-VL model: {MODEL_NAME}")

    from transformers import AutoModelForCausalLM, AutoProcessor

    # Load processor
    processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)

    # Load model with appropriate settings for CPU/GPU
    if DEVICE == "cuda":
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True,
            torch_dtype=torch.bfloat16,
        ).to(DEVICE).eval()
    else:
        # CPU mode - use float32 for compatibility
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True,
            torch_dtype=torch.float32,
            low_cpu_mem_usage=True,
        ).eval()

    logger.info("PaddleOCR-VL model loaded successfully")


def optimize_image_resolution(image: Image.Image, max_size: int = 2048, min_size: int = 1080) -> Image.Image:
    """
    Optimize image resolution for PaddleOCR-VL.

    Best results are achieved with images in the 1080p-2K range.
    - Images larger than max_size are scaled down
    - Very small images are scaled up to min_size
    """
    width, height = image.size
    max_dim = max(width, height)
    min_dim = min(width, height)

    # Scale down if too large (4K+ images often miss text)
    if max_dim > max_size:
        scale = max_size / max_dim
        new_width = int(width * scale)
        new_height = int(height * scale)
        logger.info(f"Scaling down image from {width}x{height} to {new_width}x{new_height}")
        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    # Scale up if too small
    elif max_dim < min_size and min_dim < min_size:
        scale = min_size / max_dim
        new_width = int(width * scale)
        new_height = int(height * scale)
        logger.info(f"Scaling up image from {width}x{height} to {new_width}x{new_height}")
        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    else:
        logger.info(f"Image size {width}x{height} is optimal, no scaling needed")

    return image


def decode_image(image_source: str, optimize: bool = True) -> Image.Image:
    """
    Decode image from various sources.

    Supported formats:
    - Base64 data URL: data:image/png;base64,... or data:image/jpeg;base64,...
    - HTTP/HTTPS URL: https://example.com/image.png
    - Raw base64 string
    - Local file path

    Supported image types: PNG, JPEG, WebP, BMP, GIF, TIFF
    """
    image = None

    if image_source.startswith("data:"):
        # Base64 encoded image with MIME type header
        # Supports: data:image/png;base64,... data:image/jpeg;base64,... etc.
        header, data = image_source.split(",", 1)
        image_data = base64.b64decode(data)
        image = Image.open(io.BytesIO(image_data)).convert("RGB")
        logger.debug(f"Decoded base64 image with header: {header}")
    elif image_source.startswith("http://") or image_source.startswith("https://"):
        # URL - fetch image
        import httpx
        response = httpx.get(image_source, timeout=30.0)
        response.raise_for_status()
        image = Image.open(io.BytesIO(response.content)).convert("RGB")
        logger.debug(f"Fetched image from URL: {image_source[:50]}...")
    else:
        # Assume it's a file path or raw base64
        try:
            image_data = base64.b64decode(image_source)
            image = Image.open(io.BytesIO(image_data)).convert("RGB")
            logger.debug("Decoded raw base64 image")
        except:
            # Try as file path
            image = Image.open(image_source).convert("RGB")
            logger.debug(f"Loaded image from file: {image_source}")

    # Optimize resolution for best OCR results
    if optimize:
        image = optimize_image_resolution(image)

    return image


def extract_image_and_text(content: Union[str, List[ContentItem]]) -> tuple:
    """Extract image and text prompt from message content"""
    if isinstance(content, str):
        return None, content

    image = None
    text = ""

    for item in content:
        if item.type == "image_url" and item.image_url:
            image = decode_image(item.image_url.url)
        elif item.type == "text" and item.text:
            text = item.text

    return image, text


def generate_response(image: Image.Image, prompt: str, max_tokens: int = 4096) -> str:
    """Generate response using PaddleOCR-VL"""
    load_model()

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt},
            ]
        }
    ]

    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )

    if DEVICE == "cuda":
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=False,
            use_cache=True
        )

    response = processor.batch_decode(outputs, skip_special_tokens=True)[0]

    # Extract the assistant's response (after the prompt)
    if "assistant" in response.lower():
        parts = response.split("assistant")
        if len(parts) > 1:
            response = parts[-1].strip()

    return response


@app.on_event("startup")
async def startup_event():
    """Pre-load the model on startup"""
    logger.info("Pre-loading PaddleOCR-VL model...")
    try:
        load_model()
        logger.info("Model pre-loaded successfully")
    except Exception as e:
        logger.error(f"Failed to pre-load model: {e}")
        # Don't fail startup - model will be loaded on first request


@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint"""
    return HealthResponse(
        status="healthy" if model is not None else "loading",
        model=MODEL_NAME,
        device=DEVICE
    )


@app.get("/formats")
async def supported_formats():
    """List supported image formats and input methods"""
    return {
        "image_formats": {
            "supported": ["PNG", "JPEG", "WebP", "BMP", "GIF", "TIFF"],
            "recommended": ["PNG", "JPEG"],
            "mime_types": [
                "image/png",
                "image/jpeg",
                "image/webp",
                "image/bmp",
                "image/gif",
                "image/tiff"
            ]
        },
        "input_methods": {
            "base64_data_url": {
                "description": "Base64 encoded image with MIME type header",
                "example": "data:image/png;base64,iVBORw0KGgo..."
            },
            "http_url": {
                "description": "Direct HTTP/HTTPS URL to image",
                "example": "https://example.com/image.png"
            },
            "raw_base64": {
                "description": "Raw base64 string without header",
                "example": "iVBORw0KGgo..."
            }
        },
        "resolution": {
            "optimal_range": "1080p to 2K (1080-2048 pixels on longest side)",
            "auto_scaling": True,
            "note": "Images are automatically scaled to optimal range. 4K+ images are scaled down for better accuracy."
        },
        "task_prompts": TASK_PROMPTS
    }


@app.get("/v1/models")
async def list_models():
    """List available models (OpenAI-compatible)"""
    return {
        "object": "list",
        "data": [
            {
                "id": "paddleocr-vl",
                "object": "model",
                "created": int(time.time()),
                "owned_by": "paddlepaddle"
            }
        ]
    }


@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(request: ChatCompletionRequest):
    """
    OpenAI-compatible chat completions endpoint for PaddleOCR-VL

    Supports tasks:
    - "OCR:" - Text recognition
    - "Table Recognition:" - Table extraction
    - "Formula Recognition:" - Formula extraction
    - "Chart Recognition:" - Chart extraction
    """
    try:
        # Get the last user message
        user_message = None
        for msg in reversed(request.messages):
            if msg.role == "user":
                user_message = msg
                break

        if not user_message:
            raise HTTPException(status_code=400, detail="No user message found")

        # Extract image and prompt
        image, prompt = extract_image_and_text(user_message.content)

        if image is None:
            raise HTTPException(status_code=400, detail="No image provided in message")

        # Default to OCR if no specific prompt
        if not prompt or prompt.strip() == "":
            prompt = "OCR:"

        logger.info(f"Processing request with prompt: {prompt[:50]}...")

        # Generate response
        start_time = time.time()
        response_text = generate_response(image, prompt, request.max_tokens or 4096)
        elapsed = time.time() - start_time

        logger.info(f"Generated response in {elapsed:.2f}s ({len(response_text)} chars)")

        # Build OpenAI-compatible response
        return ChatCompletionResponse(
            id=f"chatcmpl-{int(time.time()*1000)}",
            created=int(time.time()),
            model=request.model,
            choices=[
                Choice(
                    index=0,
                    message=Message(role="assistant", content=response_text),
                    finish_reason="stop"
                )
            ],
            usage=Usage(
                prompt_tokens=100,  # Approximate
                completion_tokens=len(response_text) // 4,
                total_tokens=100 + len(response_text) // 4
            )
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error processing request: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# Legacy endpoint for compatibility with old PaddleOCR API
class LegacyOCRRequest(BaseModel):
    image: str
    task: Optional[str] = "ocr"


class LegacyOCRResponse(BaseModel):
    success: bool
    result: str
    task: str
    error: Optional[str] = None


@app.post("/ocr", response_model=LegacyOCRResponse)
async def legacy_ocr(request: LegacyOCRRequest):
    """
    Legacy OCR endpoint for backwards compatibility

    Tasks: ocr, table, formula, chart
    """
    try:
        image = decode_image(request.image)
        prompt = TASK_PROMPTS.get(request.task, TASK_PROMPTS["ocr"])

        result = generate_response(image, prompt)

        return LegacyOCRResponse(
            success=True,
            result=result,
            task=request.task
        )
    except Exception as e:
        logger.error(f"Legacy OCR error: {e}")
        return LegacyOCRResponse(
            success=False,
            result="",
            task=request.task,
            error=str(e)
        )


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host=SERVER_HOST, port=SERVER_PORT)