/** * ModelGrid Constants * * Central location for all timeout, interval, and configuration values. * This makes configuration easier and code more self-documenting. */ export const VERSION = '1.0.1'; /** * Default timing values in milliseconds */ export const TIMING = { /** Default interval between container health checks (30 seconds) */ CHECK_INTERVAL_MS: 30000, /** Interval for idle monitoring mode (60 seconds) */ IDLE_CHECK_INTERVAL_MS: 60000, /** Interval for checking config file changes (60 seconds) */ CONFIG_CHECK_INTERVAL_MS: 60000, /** Interval for logging periodic status updates (5 minutes) */ LOG_INTERVAL_MS: 5 * 60 * 1000, /** Timeout for GPU driver detection (10 seconds) */ GPU_DETECTION_TIMEOUT_MS: 10000, /** Timeout for Docker commands (30 seconds) */ DOCKER_COMMAND_TIMEOUT_MS: 30000, /** Timeout for container startup (2 minutes) */ CONTAINER_STARTUP_TIMEOUT_MS: 2 * 60 * 1000, /** Timeout for model loading (10 minutes) */ MODEL_LOAD_TIMEOUT_MS: 10 * 60 * 1000, /** Greenlit model list cache duration (1 hour) */ GREENLIST_CACHE_DURATION_MS: 60 * 60 * 1000, } as const; /** * API Server constants */ export const API_SERVER = { /** Default API server port */ DEFAULT_PORT: 8080, /** Default API server host */ DEFAULT_HOST: '0.0.0.0', /** Default rate limit (requests per minute) */ DEFAULT_RATE_LIMIT: 60, /** Request timeout (30 seconds) */ REQUEST_TIMEOUT_MS: 30000, /** Stream keep-alive interval (15 seconds) */ STREAM_KEEPALIVE_MS: 15000, } as const; /** * Docker/Container constants */ export const DOCKER = { /** Default Docker network name */ DEFAULT_NETWORK: 'modelgrid', /** Container health check interval (10 seconds) */ HEALTH_CHECK_INTERVAL_MS: 10000, /** Container restart delay (5 seconds) */ RESTART_DELAY_MS: 5000, /** Maximum container restart attempts */ MAX_RESTART_ATTEMPTS: 3, } as const; /** * GPU-related constants */ export const GPU = { /** Minimum VRAM for most models (8GB) */ MIN_VRAM_GB: 8, /** Recommended VRAM for larger models (24GB) */ RECOMMENDED_VRAM_GB: 24, /** GPU utilization threshold for load balancing (80%) */ UTILIZATION_THRESHOLD_PERCENT: 80, } as const; /** * Container port mapping defaults */ export const CONTAINER_PORTS = { /** Ollama default port */ OLLAMA: 11434, /** vLLM default port */ VLLM: 8000, /** TGI (Text Generation Inference) default port */ TGI: 8080, } as const; /** * Container image defaults */ export const CONTAINER_IMAGES = { /** vLLM official image */ VLLM: 'vllm/vllm-openai:latest', /** TGI official image */ TGI: 'ghcr.io/huggingface/text-generation-inference:latest', } as const; /** * Model registry constants */ export const MODEL_REGISTRY = { /** Default public catalog URL */ DEFAULT_CATALOG_URL: 'https://list.modelgrid.com/catalog/models.json', /** Fallback catalog if remote fetch fails */ FALLBACK_CATALOG: [ { id: 'Qwen/Qwen2.5-7B-Instruct', aliases: ['qwen2.5-7b-instruct'], engine: 'vllm', source: { repo: 'Qwen/Qwen2.5-7B-Instruct', license: 'apache-2.0', }, capabilities: { chat: true, completions: true, tools: true, }, requirements: { minVramGb: 16, recommendedVramGb: 24, minGpuCount: 1, }, metadata: { family: 'Qwen2.5', parameterCount: '7B', contextWindow: 131072, summary: 'General purpose instruct model for chat and tool use.', tags: ['chat', 'tool-use', 'instruct'], }, }, { id: 'meta-llama/Llama-3.1-8B-Instruct', aliases: ['llama-3.1-8b-instruct'], engine: 'vllm', source: { repo: 'meta-llama/Llama-3.1-8B-Instruct', license: 'llama3.1', }, capabilities: { chat: true, completions: true, tools: true, }, requirements: { minVramGb: 18, recommendedVramGb: 24, minGpuCount: 1, }, metadata: { family: 'Llama 3.1', parameterCount: '8B', contextWindow: 131072, summary: 'High quality instruct model with good ecosystem support.', tags: ['chat', 'tool-use', 'instruct'], }, }, { id: 'BAAI/bge-m3', aliases: ['bge-m3'], engine: 'vllm', source: { repo: 'BAAI/bge-m3', license: 'mit', }, capabilities: { embeddings: true, }, requirements: { minVramGb: 8, recommendedVramGb: 12, minGpuCount: 1, }, metadata: { family: 'BGE', summary: 'Multilingual embedding model for retrieval workloads.', tags: ['embeddings', 'retrieval', 'multilingual'], }, }, ], } as const; export const CLUSTER = { DEFAULT_BIND_HOST: '0.0.0.0', DEFAULT_GOSSIP_PORT: 7946, DEFAULT_HEARTBEAT_INTERVAL_MS: 5000, NODE_STALE_AFTER_MS: 20000, AUTH_HEADER_NAME: 'x-modelgrid-cluster-secret', } as const; /** * Configuration paths */ export const PATHS = { /** Default configuration directory */ CONFIG_DIR: '/etc/modelgrid', /** Default configuration file */ CONFIG_FILE: '/etc/modelgrid/config.json', /** Default data directory */ DATA_DIR: '/var/lib/modelgrid', /** Default log directory */ LOG_DIR: '/var/log/modelgrid', /** Systemd service file path */ SYSTEMD_SERVICE: '/etc/systemd/system/modelgrid.service', /** Binary installation path */ BINARY_PATH: '/usr/local/bin/modelgrid', /** Working directory */ WORK_DIR: '/opt/modelgrid', } as const; /** * UI/Display constants */ export const UI = { /** Default width for log boxes */ DEFAULT_BOX_WIDTH: 50, /** Wide box width for status displays */ WIDE_BOX_WIDTH: 65, /** Extra wide box width for detailed info */ EXTRA_WIDE_BOX_WIDTH: 80, } as const;