44 lines
1.2 KiB
Bash
44 lines
1.2 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
set -e
|
||
|
|
|
||
|
|
echo "==================================="
|
||
|
|
echo "PaddleOCR-VL Server"
|
||
|
|
echo "==================================="
|
||
|
|
|
||
|
|
# Configuration
|
||
|
|
MODEL_NAME="${MODEL_NAME:-PaddlePaddle/PaddleOCR-VL}"
|
||
|
|
HOST="${HOST:-0.0.0.0}"
|
||
|
|
PORT="${PORT:-8000}"
|
||
|
|
MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
|
||
|
|
GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
|
||
|
|
|
||
|
|
echo "Model: ${MODEL_NAME}"
|
||
|
|
echo "Host: ${HOST}"
|
||
|
|
echo "Port: ${PORT}"
|
||
|
|
echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
|
||
|
|
echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Check GPU availability
|
||
|
|
if command -v nvidia-smi &> /dev/null; then
|
||
|
|
echo "GPU Information:"
|
||
|
|
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
|
||
|
|
echo ""
|
||
|
|
else
|
||
|
|
echo "WARNING: nvidia-smi not found. GPU may not be available."
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo "Starting vLLM server..."
|
||
|
|
echo "==================================="
|
||
|
|
|
||
|
|
# Start vLLM server with PaddleOCR-VL
|
||
|
|
exec vllm serve "${MODEL_NAME}" \
|
||
|
|
--trust-remote-code \
|
||
|
|
--host "${HOST}" \
|
||
|
|
--port "${PORT}" \
|
||
|
|
--max-num-batched-tokens "${MAX_BATCHED_TOKENS}" \
|
||
|
|
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}" \
|
||
|
|
--no-enable-prefix-caching \
|
||
|
|
--mm-processor-cache-gb 0 \
|
||
|
|
--served-model-name "paddleocr-vl"
|