# Nanonets-OCR2-3B Vision Language Model # Based on Qwen2.5-VL-3B, fine-tuned for document OCR (Oct 2025 release) # Improvements over OCR-s: better semantic tagging, LaTeX equations, flowcharts # ~12-16GB VRAM with 30K context, outputs structured markdown with semantic tags # # Build: docker build -f Dockerfile_nanonets_vllm_gpu_VRAM10GB -t nanonets-ocr . # Run: docker run --gpus all -p 8000:8000 -v ht-huggingface-cache:/root/.cache/huggingface nanonets-ocr FROM vllm/vllm-openai:latest LABEL maintainer="Task Venture Capital GmbH " LABEL description="Nanonets-OCR2-3B - Document OCR optimized Vision Language Model" LABEL org.opencontainers.image.source="https://code.foss.global/host.today/ht-docker-ai" # Environment configuration ENV MODEL_NAME="nanonets/Nanonets-OCR2-3B" ENV HOST="0.0.0.0" ENV PORT="8000" ENV MAX_MODEL_LEN="30000" ENV GPU_MEMORY_UTILIZATION="0.9" # Expose OpenAI-compatible API port EXPOSE 8000 # Health check - vLLM exposes /health endpoint HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=5 \ CMD curl -f http://localhost:8000/health || exit 1 # Start vLLM server with Nanonets-OCR2-3B model CMD ["--model", "nanonets/Nanonets-OCR2-3B", \ "--trust-remote-code", \ "--max-model-len", "30000", \ "--host", "0.0.0.0", \ "--port", "8000"]