diff --git a/readme.md b/readme.md index f6c155d..1cb36e6 100644 --- a/readme.md +++ b/readme.md @@ -318,15 +318,15 @@ modelgrid cluster activate NODE # Mark a node active again High-performance inference with PagedAttention and continuous batching. -```bash +```jsonc { "id": "vllm-1", "type": "vllm", "name": "vLLM Server", - "gpuIds": ["nvidia-0", "nvidia-1"], # Tensor parallelism + "gpuIds": ["nvidia-0", "nvidia-1"], // Tensor parallelism "port": 8000, "env": { - "HF_TOKEN": "your-huggingface-token" # For gated models + "HF_TOKEN": "your-huggingface-token" // For gated models } } ```