27 lines
808 B
Plaintext
27 lines
808 B
Plaintext
|
|
# Qwen3-VL-30B-A3B Vision Language Model
|
||
|
|
# Q4_K_M quantization (~20GB model)
|
||
|
|
#
|
||
|
|
# Most powerful Qwen vision model:
|
||
|
|
# - 256K context (expandable to 1M)
|
||
|
|
# - Visual agent capabilities
|
||
|
|
# - Code generation from images
|
||
|
|
#
|
||
|
|
# Build: docker build -f Dockerfile_qwen3vl -t qwen3vl .
|
||
|
|
# Run: docker run --gpus all -p 11434:11434 -v ht-ollama-models:/root/.ollama qwen3vl
|
||
|
|
|
||
|
|
FROM ollama/ollama:latest
|
||
|
|
|
||
|
|
# Pre-pull the model during build (optional - can also pull at runtime)
|
||
|
|
# This makes the image larger but faster to start
|
||
|
|
# RUN ollama serve & sleep 5 && ollama pull qwen3-vl:30b-a3b && pkill ollama
|
||
|
|
|
||
|
|
# Expose Ollama API port
|
||
|
|
EXPOSE 11434
|
||
|
|
|
||
|
|
# Health check
|
||
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||
|
|
CMD curl -f http://localhost:11434/api/tags || exit 1
|
||
|
|
|
||
|
|
# Start Ollama server
|
||
|
|
CMD ["serve"]
|