fix(docker): standardize Dockerfile and entrypoint filenames; add GPU-specific Dockerfiles and update build and test references
This commit is contained in:
@@ -1,59 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "==================================="
|
||||
echo "PaddleOCR-VL Server"
|
||||
echo "==================================="
|
||||
|
||||
# Configuration
|
||||
MODEL_NAME="${MODEL_NAME:-PaddlePaddle/PaddleOCR-VL}"
|
||||
HOST="${HOST:-0.0.0.0}"
|
||||
PORT="${PORT:-8000}"
|
||||
MAX_BATCHED_TOKENS="${MAX_BATCHED_TOKENS:-16384}"
|
||||
GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.9}"
|
||||
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
|
||||
ENFORCE_EAGER="${ENFORCE_EAGER:-false}"
|
||||
|
||||
echo "Model: ${MODEL_NAME}"
|
||||
echo "Host: ${HOST}"
|
||||
echo "Port: ${PORT}"
|
||||
echo "Max batched tokens: ${MAX_BATCHED_TOKENS}"
|
||||
echo "GPU memory utilization: ${GPU_MEMORY_UTILIZATION}"
|
||||
echo "Max model length: ${MAX_MODEL_LEN}"
|
||||
echo "Enforce eager: ${ENFORCE_EAGER}"
|
||||
echo ""
|
||||
|
||||
# Check GPU availability
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "GPU Information:"
|
||||
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
|
||||
echo ""
|
||||
else
|
||||
echo "WARNING: nvidia-smi not found. GPU may not be available."
|
||||
fi
|
||||
|
||||
echo "Starting vLLM server..."
|
||||
echo "==================================="
|
||||
|
||||
# Build vLLM command
|
||||
VLLM_ARGS=(
|
||||
serve "${MODEL_NAME}"
|
||||
--trust-remote-code
|
||||
--host "${HOST}"
|
||||
--port "${PORT}"
|
||||
--max-num-batched-tokens "${MAX_BATCHED_TOKENS}"
|
||||
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION}"
|
||||
--max-model-len "${MAX_MODEL_LEN}"
|
||||
--no-enable-prefix-caching
|
||||
--mm-processor-cache-gb 0
|
||||
--served-model-name "paddleocr-vl"
|
||||
--limit-mm-per-prompt '{"image": 1}'
|
||||
)
|
||||
|
||||
# Add enforce-eager if enabled (disables CUDA graphs, saves memory)
|
||||
if [ "${ENFORCE_EAGER}" = "true" ]; then
|
||||
VLLM_ARGS+=(--enforce-eager)
|
||||
fi
|
||||
|
||||
# Start vLLM server with PaddleOCR-VL
|
||||
exec vllm "${VLLM_ARGS[@]}"
|
||||
Reference in New Issue
Block a user