Dockerfile_qwen3vl_ollama_gpu_VRAM20GB

# Qwen3-VL-30B-A3B Vision Language Model
# Q4_K_M quantization (~20GB model)
#
# Most powerful Qwen vision model:
# - 256K context (expandable to 1M)
# - Visual agent capabilities
# - Code generation from images
#
# Build: docker build -f Dockerfile_qwen3vl -t qwen3vl .
# Run:   docker run --gpus all -p 11434:11434 -v ht-ollama-models:/root/.ollama qwen3vl

FROM ollama/ollama:latest

# Pre-pull the model during build (optional - can also pull at runtime)
# This makes the image larger but faster to start
# RUN ollama serve & sleep 5 && ollama pull qwen3-vl:30b-a3b && pkill ollama

# Expose Ollama API port
EXPOSE 11434

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
  CMD curl -f http://localhost:11434/api/tags || exit 1

# Start Ollama server
CMD ["serve"]
feat(vision): add Qwen3-VL vision model support with Dockerfile and tests; improve invoice OCR conversion and prompts; simplify extraction flow by removing consensus voting 2026-01-18 03:35:05 +00:00			`# Qwen3-VL-30B-A3B Vision Language Model`
			`# Q4_K_M quantization (~20GB model)`
			`#`
			`# Most powerful Qwen vision model:`
			`# - 256K context (expandable to 1M)`
			`# - Visual agent capabilities`
			`# - Code generation from images`
			`#`
			`# Build: docker build -f Dockerfile_qwen3vl -t qwen3vl .`
			`# Run: docker run --gpus all -p 11434:11434 -v ht-ollama-models:/root/.ollama qwen3vl`

			`FROM ollama/ollama:latest`

			`# Pre-pull the model during build (optional - can also pull at runtime)`
			`# This makes the image larger but faster to start`
			`# RUN ollama serve & sleep 5 && ollama pull qwen3-vl:30b-a3b && pkill ollama`

			`# Expose Ollama API port`
			`EXPOSE 11434`

			`# Health check`
			`HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \`
			`CMD curl -f http://localhost:11434/api/tags \|\| exit 1`

			`# Start Ollama server`
			`CMD ["serve"]`