# Qwen3-VL-30B-A3B Vision Language Model # Q4_K_M quantization (~20GB model) # # Most powerful Qwen vision model: # - 256K context (expandable to 1M) # - Visual agent capabilities # - Code generation from images # # Build: docker build -f Dockerfile_qwen3vl -t qwen3vl . # Run: docker run --gpus all -p 11434:11434 -v ht-ollama-models:/root/.ollama qwen3vl FROM ollama/ollama:latest # Pre-pull the model during build (optional - can also pull at runtime) # This makes the image larger but faster to start # RUN ollama serve & sleep 5 && ollama pull qwen3-vl:30b-a3b && pkill ollama # Expose Ollama API port EXPOSE 11434 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:11434/api/tags || exit 1 # Start Ollama server CMD ["serve"]