Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4af9d3de69 | |||
| 405fff91af | |||
| 9022c8dbf3 | |||
| 703cceb512 | |||
| 9d925f9401 | |||
| fe4fdb32d7 | |||
| d6b4c0def1 | |||
| 9608540792 | |||
| 3762fc661e | |||
| 6541b2db1c | |||
| da7375c889 | |||
| 44eb9b9173 | |||
| 1f24df0d80 | |||
| c95961d596 | |||
| 0921dfbe5e | |||
| 5172002ec0 | |||
| 58eabba84d | |||
| 5e8ce6690d | |||
| 0ea98caed6 | |||
| 871afedbb7 | |||
| 1f6cf51794 | |||
| 054875abb5 | |||
| 3e341bbfda | |||
| 9f7308498c | |||
| 952bf394d3 | |||
| 3b2a16b151 | |||
| 9c9c0c90ae | |||
| 24bb6b3058 | |||
| cec102e54e |
@@ -1,6 +1,9 @@
|
|||||||
# Compiled Deno binaries (built by scripts/compile-all.sh)
|
# Compiled Deno binaries (built by scripts/compile-all.sh)
|
||||||
dist/binaries/
|
dist/binaries/
|
||||||
|
|
||||||
|
# Generated UI bundle (built by scripts/bundle-ui.ts)
|
||||||
|
ts_bundled/
|
||||||
|
|
||||||
# Deno cache and lock file
|
# Deno cache and lock file
|
||||||
.deno/
|
.deno/
|
||||||
deno.lock
|
deno.lock
|
||||||
|
|||||||
@@ -4,9 +4,10 @@
|
|||||||
"exports": "./mod.ts",
|
"exports": "./mod.ts",
|
||||||
"nodeModulesDir": "auto",
|
"nodeModulesDir": "auto",
|
||||||
"tasks": {
|
"tasks": {
|
||||||
"dev": "deno run --allow-all mod.ts",
|
"dev": "UI_ASSET_SOURCE=disk deno run --allow-all mod.ts",
|
||||||
|
"bundle:ui": "deno run --allow-read --allow-write scripts/bundle-ui.ts",
|
||||||
"compile": "deno task compile:all",
|
"compile": "deno task compile:all",
|
||||||
"compile:all": "bash scripts/compile-all.sh",
|
"compile:all": "deno task bundle:ui && bash scripts/compile-all.sh",
|
||||||
"test": "deno test --allow-all test/",
|
"test": "deno test --allow-all test/",
|
||||||
"test:watch": "deno test --allow-all --watch test/",
|
"test:watch": "deno test --allow-all --watch test/",
|
||||||
"check": "deno check mod.ts",
|
"check": "deno check mod.ts",
|
||||||
|
|||||||
+3
-2
@@ -37,8 +37,9 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"postinstall": "node scripts/install-binary.js",
|
"postinstall": "node scripts/install-binary.js",
|
||||||
"prepublishOnly": "echo 'Publishing ModelGrid binaries to npm...'",
|
"prepublishOnly": "echo 'Publishing ModelGrid binaries to npm...'",
|
||||||
"test": "echo 'Tests are run with Deno: deno task test'",
|
"test": "deno task test",
|
||||||
"build": "echo 'no build needed'"
|
"check": "deno task check",
|
||||||
|
"build": "deno task bundle:ui"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"bin/",
|
"bin/",
|
||||||
|
|||||||
+15
-7
@@ -3,7 +3,7 @@
|
|||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers
|
ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers
|
||||||
(Ollama, vLLM, TGI) with an OpenAI-compatible API interface.
|
(vLLM, TGI) with an OpenAI-compatible API interface.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
@@ -84,13 +84,12 @@ ts/
|
|||||||
|
|
||||||
### Greenlit Model System
|
### Greenlit Model System
|
||||||
|
|
||||||
- Only pre-approved models can be auto-pulled for security
|
- Only catalog-listed models can be auto-deployed on demand
|
||||||
- Greenlist fetched from remote URL (configurable)
|
- Catalog fetched from a remote URL (configurable)
|
||||||
- VRAM requirements checked before loading
|
- VRAM requirements checked before loading
|
||||||
|
|
||||||
### Container Types
|
### Container Types
|
||||||
|
|
||||||
- **Ollama**: Easy to use, native API converted to OpenAI format
|
|
||||||
- **vLLM**: High performance, natively OpenAI-compatible
|
- **vLLM**: High performance, natively OpenAI-compatible
|
||||||
- **TGI**: HuggingFace Text Generation Inference
|
- **TGI**: HuggingFace Text Generation Inference
|
||||||
|
|
||||||
@@ -111,12 +110,20 @@ interface IModelGridConfig {
|
|||||||
port: number; // Default: 8080
|
port: number; // Default: 8080
|
||||||
host: string; // Default: '0.0.0.0'
|
host: string; // Default: '0.0.0.0'
|
||||||
apiKeys: string[]; // Valid API keys
|
apiKeys: string[]; // Valid API keys
|
||||||
|
rateLimit?: number;
|
||||||
cors: boolean;
|
cors: boolean;
|
||||||
corsOrigins: string[];
|
corsOrigins: string[];
|
||||||
};
|
};
|
||||||
|
ui: {
|
||||||
|
enabled: boolean;
|
||||||
|
port: number; // Default: 8081
|
||||||
|
host: string; // Default: '0.0.0.0'
|
||||||
|
assetSource: 'bundle' | 'disk';
|
||||||
|
};
|
||||||
docker: {
|
docker: {
|
||||||
networkName: string; // Default: 'modelgrid'
|
networkName: string; // Default: 'modelgrid'
|
||||||
runtime: 'docker' | 'podman';
|
runtime: 'docker' | 'podman';
|
||||||
|
socketPath?: string;
|
||||||
};
|
};
|
||||||
gpus: {
|
gpus: {
|
||||||
autoDetect: boolean;
|
autoDetect: boolean;
|
||||||
@@ -124,11 +131,12 @@ interface IModelGridConfig {
|
|||||||
};
|
};
|
||||||
containers: IContainerConfig[];
|
containers: IContainerConfig[];
|
||||||
models: {
|
models: {
|
||||||
greenlistUrl: string;
|
registryUrl: string;
|
||||||
autoPull: boolean;
|
autoDeploy: boolean;
|
||||||
defaultContainer: string;
|
defaultEngine: 'vllm';
|
||||||
autoLoad: string[];
|
autoLoad: string[];
|
||||||
};
|
};
|
||||||
|
cluster: IClusterConfig;
|
||||||
checkInterval: number;
|
checkInterval: number;
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -318,15 +318,15 @@ modelgrid cluster activate NODE # Mark a node active again
|
|||||||
|
|
||||||
High-performance inference with PagedAttention and continuous batching.
|
High-performance inference with PagedAttention and continuous batching.
|
||||||
|
|
||||||
```bash
|
```jsonc
|
||||||
{
|
{
|
||||||
"id": "vllm-1",
|
"id": "vllm-1",
|
||||||
"type": "vllm",
|
"type": "vllm",
|
||||||
"name": "vLLM Server",
|
"name": "vLLM Server",
|
||||||
"gpuIds": ["nvidia-0", "nvidia-1"], # Tensor parallelism
|
"gpuIds": ["nvidia-0", "nvidia-1"], // Tensor parallelism
|
||||||
"port": 8000,
|
"port": 8000,
|
||||||
"env": {
|
"env": {
|
||||||
"HF_TOKEN": "your-huggingface-token" # For gated models
|
"HF_TOKEN": "your-huggingface-token" // For gated models
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -555,6 +555,12 @@ deno run --allow-all mod.ts help
|
|||||||
# Run tests
|
# Run tests
|
||||||
deno task test
|
deno task test
|
||||||
|
|
||||||
|
# Run the main regression suite used during focused changes
|
||||||
|
deno test --allow-all test/test.ts
|
||||||
|
|
||||||
|
# Run the full suite, including focused seam tests
|
||||||
|
deno test --allow-all test/
|
||||||
|
|
||||||
# Type check
|
# Type check
|
||||||
deno task check
|
deno task check
|
||||||
|
|
||||||
@@ -595,6 +601,14 @@ modelgrid/
|
|||||||
└── bin/ # npm wrapper
|
└── bin/ # npm wrapper
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Focused seam tests live alongside `test/test.ts`:
|
||||||
|
|
||||||
|
- `test/api-router_test.ts` covers routing, auth failures, and request-size handling
|
||||||
|
- `test/api-server_test.ts` covers health, metrics, and authenticated model listing
|
||||||
|
- `test/modelgrid-config_test.ts` covers config normalization and ignored-key warnings
|
||||||
|
- `test/model-registry_test.ts` covers fallback and file-backed catalog loading
|
||||||
|
- `test/cluster-manager-persistence_test.ts` covers persisted cluster state loading and pruning
|
||||||
|
|
||||||
## 🗑️ Uninstallation
|
## 🗑️ Uninstallation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
+17
-7
@@ -26,9 +26,9 @@
|
|||||||
┌─────────────────────────────────────────────────────────────────┐
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
│ Container Runtime │
|
│ Container Runtime │
|
||||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||||
│ │ Ollama │ │ vLLM │ │ TGI │ │ Custom │ │
|
│ │ vLLM │ │ TGI │ │ Custom │ │
|
||||||
│ │Container │ │Container │ │Container │ │Container │ │
|
│ │Container │ │Container │ │Container │ │
|
||||||
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
|
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@
|
|||||||
### Pending Tasks
|
### Pending Tasks
|
||||||
|
|
||||||
- [ ] Integration testing with real GPUs
|
- [ ] Integration testing with real GPUs
|
||||||
- [ ] End-to-end API testing
|
- [x] End-to-end API smoke testing for health, metrics, and authenticated model listings
|
||||||
- [ ] Documentation improvements
|
- [ ] Documentation improvements
|
||||||
- [ ] First release (v1.0.0)
|
- [ ] First release (v1.0.0)
|
||||||
|
|
||||||
@@ -116,8 +116,7 @@ modelgrid/
|
|||||||
│ │ │ └── embeddings.ts # /v1/embeddings
|
│ │ │ └── embeddings.ts # /v1/embeddings
|
||||||
│ │ └── middleware/ # Request processing
|
│ │ └── middleware/ # Request processing
|
||||||
│ │ ├── auth.ts # API key validation
|
│ │ ├── auth.ts # API key validation
|
||||||
│ │ ├── sanity.ts # Request validation
|
│ │ └── sanity.ts # Request validation
|
||||||
│ │ └── proxy.ts # Container proxy
|
|
||||||
│ ├── models/ # Model management
|
│ ├── models/ # Model management
|
||||||
│ │ ├── index.ts
|
│ │ ├── index.ts
|
||||||
│ │ ├── registry.ts # Model registry
|
│ │ ├── registry.ts # Model registry
|
||||||
@@ -134,6 +133,17 @@ modelgrid/
|
|||||||
└── docs/ # Documentation
|
└── docs/ # Documentation
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Test Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
deno task check
|
||||||
|
deno test --allow-all test/test.ts
|
||||||
|
deno test --allow-all test/
|
||||||
|
```
|
||||||
|
|
||||||
|
The focused seam tests currently cover API routing, API server endpoints, config normalization,
|
||||||
|
model registry loading, and cluster state persistence.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## CLI Commands
|
## CLI Commands
|
||||||
@@ -177,7 +187,7 @@ modelgrid config init # Initialize configuration
|
|||||||
|
|
||||||
## Greenlit Model System
|
## Greenlit Model System
|
||||||
|
|
||||||
Models are controlled via a remote greenlist to prevent arbitrary downloads:
|
Models are resolved through a remote catalog so deployments come from an explicit allowlist:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|||||||
+414
@@ -0,0 +1,414 @@
|
|||||||
|
# 🖥️ ModelGrid — UI Concept
|
||||||
|
|
||||||
|
**A browser-based operations console for ModelGrid, served by the same daemon that
|
||||||
|
already exposes the OpenAI-compatible API.**
|
||||||
|
|
||||||
|
This document sketches the user interface that will sit on top of the ModelGrid
|
||||||
|
daemon: what it shows, how it is organized, how an operator moves through it,
|
||||||
|
and how it stays in sync with a running node or a small cluster. It is a
|
||||||
|
concept, not a final spec — the goal is to lock the shape of the product
|
||||||
|
before any frontend code is written.
|
||||||
|
|
||||||
|
The structural idioms (tabbed top-level views, route-origin awareness,
|
||||||
|
embedded ops dashboard on a dedicated port, API-first with a thin UI on top)
|
||||||
|
are adapted from `@serve.zone/dcrouter`'s Ops dashboard. ModelGrid's UI should
|
||||||
|
feel familiar to anyone who has operated dcrouter, while staying grounded in
|
||||||
|
ModelGrid's own domain: GPUs, vLLM deployments, a public model catalog, and a
|
||||||
|
cluster of gateway-capable nodes.
|
||||||
|
|
||||||
|
## 🎯 Purpose & Audience
|
||||||
|
|
||||||
|
- **Primary user:** the operator of one or a few ModelGrid nodes. Often the
|
||||||
|
same person who provisioned the GPU host and ran `modelgrid service enable`.
|
||||||
|
- **Secondary user:** a platform engineer wiring ModelGrid into an internal
|
||||||
|
AI platform who needs to manage API keys, audit deployments, and watch
|
||||||
|
request traffic.
|
||||||
|
- **Not an end-user chat UI.** Consumers of the OpenAI-compatible API keep
|
||||||
|
using their own SDKs and tools. The browser UI is for operating the fleet,
|
||||||
|
not for prompting models.
|
||||||
|
|
||||||
|
The UI should collapse gracefully from a full cluster view down to a
|
||||||
|
single-node, standalone deployment, because both shapes are first-class in
|
||||||
|
ModelGrid's `cluster.role` model (`standalone` / `control-plane` / `worker`).
|
||||||
|
|
||||||
|
## 🧭 Top-Level Information Architecture
|
||||||
|
|
||||||
|
URLs follow `/{view}` for flat views and `/{view}/{subview}` for tabbed
|
||||||
|
views, matching dcrouter's routing idiom.
|
||||||
|
|
||||||
|
```
|
||||||
|
/overview
|
||||||
|
/stats
|
||||||
|
/configuration
|
||||||
|
|
||||||
|
/cluster
|
||||||
|
/nodes
|
||||||
|
/placements
|
||||||
|
/desired
|
||||||
|
|
||||||
|
/gpus
|
||||||
|
/devices
|
||||||
|
/drivers
|
||||||
|
|
||||||
|
/deployments
|
||||||
|
/active
|
||||||
|
/history
|
||||||
|
|
||||||
|
/models
|
||||||
|
/catalog
|
||||||
|
/deployed
|
||||||
|
|
||||||
|
/access
|
||||||
|
/apikeys
|
||||||
|
/clients
|
||||||
|
|
||||||
|
/logs (flat)
|
||||||
|
/metrics (flat)
|
||||||
|
/settings (flat)
|
||||||
|
```
|
||||||
|
|
||||||
|
Rationale for the split:
|
||||||
|
|
||||||
|
- **Overview** is the landing page — one screen that answers "is the fleet
|
||||||
|
healthy right now?"
|
||||||
|
- **Cluster / GPUs / Deployments / Models** are the four nouns an operator
|
||||||
|
actually reasons about when running ModelGrid. Keeping them at the top
|
||||||
|
level matches the CLI verbs (`modelgrid cluster`, `modelgrid gpu`,
|
||||||
|
`modelgrid container`, `modelgrid model`) so muscle memory transfers.
|
||||||
|
- **Access** consolidates the authn/authz surface (API keys today,
|
||||||
|
user/OIDC later) into one place, the way dcrouter groups `apitokens` and
|
||||||
|
`users` under `access`.
|
||||||
|
- **Logs** and **Metrics** are flat because they are cross-cutting streams,
|
||||||
|
not noun-scoped tabs.
|
||||||
|
|
||||||
|
The navigation chrome itself is a persistent left rail on desktop, collapsing
|
||||||
|
into a top hamburger on narrow viewports. The selected view is indicated
|
||||||
|
there; subviews surface as a tab strip at the top of the content area.
|
||||||
|
|
||||||
|
```
|
||||||
|
┌────────────┬──────────────────────────────────────────────────────────────┐
|
||||||
|
│ ModelGrid │ Overview ▸ Stats Configuration │
|
||||||
|
│ ├──────────────────────────────────────────────────────────────┤
|
||||||
|
│ Overview ●│ │
|
||||||
|
│ Cluster │ ┌─ Fleet Health ─────────────────────────────────────┐ │
|
||||||
|
│ GPUs │ │ 2 nodes • 3 GPUs • 4 deployments • api OK │ │
|
||||||
|
│ Deploys │ └───────────────────────────────────────────────────┘ │
|
||||||
|
│ Models │ ┌─ Live Traffic ──────────────┐ ┌─ GPU Utilization ─┐ │
|
||||||
|
│ Access │ │ 42 req/s p95 820 ms │ │ ▁▂▄▅▇█▇▅▄▂▁ │ │
|
||||||
|
│ │ │ ▁▂▃▅▇▇▅▃▂▁▁▂▄▆ │ │ avg 64% │ │
|
||||||
|
│ Logs │ └─────────────────────────────┘ └───────────────────┘ │
|
||||||
|
│ Metrics │ ┌─ Deployments ────────────────────────────────────┐ │
|
||||||
|
│ Settings │ │ llama-3.1-8b running 2/2 nvidia-0,1 │ │
|
||||||
|
│ │ │ qwen2.5-7b running 1/1 nvidia-2 │ │
|
||||||
|
│ node: ctrl │ │ bge-m3 pending 0/1 (no capacity) │ │
|
||||||
|
│ v1.1.0 │ └──────────────────────────────────────────────────┘ │
|
||||||
|
└────────────┴──────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
The footer of the rail surfaces the local node's identity (`nodeName`,
|
||||||
|
`role`), the daemon version, and a small link to the API base URL —
|
||||||
|
equivalent to how dcrouter surfaces its runtime identity in the sidebar.
|
||||||
|
|
||||||
|
## 📄 Per-View Sketches
|
||||||
|
|
||||||
|
### Overview ▸ Stats (landing page)
|
||||||
|
|
||||||
|
A dashboard of the things that an on-call operator wants to see in under
|
||||||
|
two seconds:
|
||||||
|
|
||||||
|
- **Fleet health band**: green/yellow/red status tiles for nodes, GPUs,
|
||||||
|
deployments, API.
|
||||||
|
- **Live traffic**: requests/sec, p50/p95/p99 latency, error rate. Sparkline
|
||||||
|
for the last 15 minutes, streaming from `/metrics` and a server-pushed
|
||||||
|
channel.
|
||||||
|
- **GPU utilization strip**: one micro-sparkline per GPU, colored by VRAM
|
||||||
|
pressure.
|
||||||
|
- **Deployment summary**: the `modelgrid ps` output, but clickable. Each
|
||||||
|
row deep-links into Deployments ▸ Active.
|
||||||
|
- **Catalog drift**: a small callout when `list.modelgrid.com` has newer
|
||||||
|
model entries than the node's cached catalog.
|
||||||
|
|
||||||
|
### Overview ▸ Configuration
|
||||||
|
|
||||||
|
A read-only rendering of the resolved `/etc/modelgrid/config.json` with
|
||||||
|
section headers (`api`, `docker`, `gpus`, `models`, `cluster`). Operators
|
||||||
|
can copy the JSON; editing config is intentionally kept to the Settings view
|
||||||
|
(or the CLI) to avoid a "two sources of truth" problem.
|
||||||
|
|
||||||
|
### Cluster ▸ Nodes
|
||||||
|
|
||||||
|
Mirrors `modelgrid cluster nodes`. Each row: node name, role badge
|
||||||
|
(`standalone` / `control-plane` / `worker`), advertised URL, last heartbeat,
|
||||||
|
GPU inventory summary, status (`active` / `cordoned` / `draining`).
|
||||||
|
|
||||||
|
Row actions: `cordon`, `drain`, `activate` — the same verbs as the CLI.
|
||||||
|
Hitting an action fires the corresponding control-plane call and shows an
|
||||||
|
in-row toast on success.
|
||||||
|
|
||||||
|
```
|
||||||
|
┌ Nodes ───────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Name Role Advertised URL Heartbeat │
|
||||||
|
│ ────────────────────────────────────────────────────────────────────── │
|
||||||
|
│ control-a control-plane http://ctrl.internal:8080 2s ago ● │
|
||||||
|
│ worker-a worker http://wa.internal:8080 3s ago ● │
|
||||||
|
│ worker-b worker http://wb.internal:8080 41s ago ◐ │
|
||||||
|
│ [cordon] [drain]
|
||||||
|
└──────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cluster ▸ Placements
|
||||||
|
|
||||||
|
A live map of where every deployed model is currently running, read from
|
||||||
|
the control-plane's placement state. Grouped by model, with a column per
|
||||||
|
node. Cells show replica count and health. This is where the operator
|
||||||
|
answers "where did `llama-3.1-8b` actually end up?".
|
||||||
|
|
||||||
|
### Cluster ▸ Desired
|
||||||
|
|
||||||
|
The companion to Placements: the desired-state table. Each row is a model
|
||||||
|
with a target replica count. Rows can be added (`cluster ensure`), edited
|
||||||
|
(`cluster scale`), or removed (`cluster clear`). The reconciler's pending
|
||||||
|
work is surfaced as a diff badge: e.g. `+1 replica`, `moving from worker-b
|
||||||
|
→ worker-a`.
|
||||||
|
|
||||||
|
### GPUs ▸ Devices
|
||||||
|
|
||||||
|
Mirrors `modelgrid gpu list` / `gpu status`, rendered as a card per GPU:
|
||||||
|
vendor, model, VRAM free/total, driver version, temperature, current
|
||||||
|
utilization, and which deployment is using it. Cards stream their
|
||||||
|
utilization via the realtime channel; no full page reloads.
|
||||||
|
|
||||||
|
### GPUs ▸ Drivers
|
||||||
|
|
||||||
|
Status per vendor (NVIDIA / AMD / Intel): driver installed? version? any
|
||||||
|
known issue? Includes a button to run `modelgrid gpu install`
|
||||||
|
interactively — but since the install flow is privileged and interactive,
|
||||||
|
the UI only kicks off the CLI walk-through in a terminal session rather
|
||||||
|
than trying to reimplement it in the browser. A small "copy the command"
|
||||||
|
affordance makes this explicit.
|
||||||
|
|
||||||
|
### Deployments ▸ Active
|
||||||
|
|
||||||
|
The core operational table. One row per active vLLM deployment:
|
||||||
|
|
||||||
|
- container ID, display name, model, GPU bindings, port, uptime, request
|
||||||
|
rate, error rate
|
||||||
|
- status pill (`running`, `pending`, `restarting`, `failed`)
|
||||||
|
- row actions: `logs`, `stop`, `restart`, `remove`
|
||||||
|
|
||||||
|
Clicking a row opens a detail drawer with sub-tabs:
|
||||||
|
|
||||||
|
- **Summary** — the effective container config and the scheduling
|
||||||
|
decision that landed it on this node
|
||||||
|
- **Logs** — a live tail (SSE)
|
||||||
|
- **Metrics** — request latency histogram, token throughput, VRAM
|
||||||
|
occupancy
|
||||||
|
- **Events** — a timeline of lifecycle events (scheduled, pulled image,
|
||||||
|
started, health check, restart, stopped)
|
||||||
|
|
||||||
|
### Deployments ▸ History
|
||||||
|
|
||||||
|
Deployments that have been stopped or removed, with the reason and the
|
||||||
|
last-known logs. Useful for post-mortem on a failed deploy.
|
||||||
|
|
||||||
|
### Models ▸ Catalog
|
||||||
|
|
||||||
|
The current catalog resolved from `list.modelgrid.com`, with a "refresh"
|
||||||
|
action that calls `modelgrid model refresh`. Each entry shows canonical
|
||||||
|
ID, aliases, capabilities (chat / completions / embeddings), minimum
|
||||||
|
VRAM, default GPU count, and a `Deploy` button. Deploying opens a small
|
||||||
|
form that mirrors `modelgrid run`: target node (or auto), desired replica
|
||||||
|
count, optional env overrides (e.g. `HF_TOKEN`).
|
||||||
|
|
||||||
|
A visible "source" badge marks whether the entry came from the public
|
||||||
|
catalog or a custom `registryUrl`, so operators can tell at a glance which
|
||||||
|
models the cluster will actually trust for auto-deploy.
|
||||||
|
|
||||||
|
### Models ▸ Deployed
|
||||||
|
|
||||||
|
Shows the union of what is running across the cluster, with replica
|
||||||
|
counts, keyed by canonical model ID. This is the view a developer asks
|
||||||
|
the operator for when they want to know "what models can I hit on this
|
||||||
|
endpoint?". It is effectively a pretty rendering of `/v1/models`.
|
||||||
|
|
||||||
|
### Access ▸ API Keys
|
||||||
|
|
||||||
|
Mirrors `modelgrid config apikey list`. Columns: label, prefix (first
|
||||||
|
8 chars), created, last used, status. Actions: `generate`, `revoke`.
|
||||||
|
Generating a key shows the secret once in a modal with a copy button,
|
||||||
|
then never shows it again — the same contract as dcrouter's API tokens.
|
||||||
|
|
||||||
|
### Access ▸ Clients
|
||||||
|
|
||||||
|
Placeholder for per-consumer rate limits, quotas, and request labels.
|
||||||
|
This view is explicitly future work; it renders as "not yet configured"
|
||||||
|
until the daemon exposes client records. Listing it now reserves the IA
|
||||||
|
slot so it doesn't have to be retrofitted later.
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
|
||||||
|
A unified tail across daemon, scheduler, and deployments, with filters
|
||||||
|
by source (`daemon`, `scheduler`, `deployment:<id>`), level, and
|
||||||
|
free-text. Streamed via SSE. A "pause" toggle freezes the view for
|
||||||
|
reading; a "download" action exports the current buffer as NDJSON.
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
The `/metrics` endpoint rendered as a small set of charts (request rate,
|
||||||
|
latency, error rate, VRAM occupancy, model throughput). This is
|
||||||
|
deliberately lightweight — serious monitoring is expected to come from
|
||||||
|
Prometheus scraping `/metrics` into Grafana, and the UI says so with a
|
||||||
|
link to the recommended dashboard snippet.
|
||||||
|
|
||||||
|
### Settings
|
||||||
|
|
||||||
|
Editable configuration, grouped to match the config file:
|
||||||
|
|
||||||
|
- **API** — port, bind host, CORS, rate limit
|
||||||
|
- **Docker** — runtime, network name, socket path
|
||||||
|
- **GPUs** — auto-detect toggle, per-GPU assignments
|
||||||
|
- **Models** — registry URL, auto-deploy, default engine, auto-load list
|
||||||
|
- **Cluster** — role, advertise URL, control-plane URL, shared secret,
|
||||||
|
heartbeat interval, seeds
|
||||||
|
|
||||||
|
Edits write through the daemon's config API (to be defined) and reload
|
||||||
|
without a restart wherever possible. Settings that require a restart are
|
||||||
|
marked with a `restart required` badge, and the UI surfaces a single
|
||||||
|
"restart daemon" action at the top of the view when any are pending.
|
||||||
|
|
||||||
|
## 🛤️ Key User Journeys
|
||||||
|
|
||||||
|
### Deploy a model from the catalog
|
||||||
|
|
||||||
|
1. Operator opens **Models ▸ Catalog**, filters for chat-capable models
|
||||||
|
with VRAM ≤ 24 GB.
|
||||||
|
2. Clicks `Deploy` on `meta-llama/Llama-3.1-8B-Instruct`.
|
||||||
|
3. Dialog appears with target node (`auto` / specific worker), replica
|
||||||
|
count (default from catalog), optional env (`HF_TOKEN`).
|
||||||
|
4. On submit, the UI calls the control plane (`cluster ensure` + `scale`
|
||||||
|
under the hood). The dialog closes and the new row appears in
|
||||||
|
**Deployments ▸ Active** in `pending` state.
|
||||||
|
5. SSE updates walk the row through `pulling image → starting → running`.
|
||||||
|
6. A toast links to the deployment detail drawer for logs.
|
||||||
|
|
||||||
|
### Add a worker node to an existing control plane
|
||||||
|
|
||||||
|
1. Operator opens **Cluster ▸ Nodes** on the control plane.
|
||||||
|
2. Clicks `Add node`, which opens a helper that pre-fills the worker's
|
||||||
|
expected `cluster` config block — role, control-plane URL, shared
|
||||||
|
secret — and exposes a one-liner install command.
|
||||||
|
3. The operator runs the install command on the worker host. The UI does
|
||||||
|
**not** SSH into anything; it just hands out the exact snippet.
|
||||||
|
4. Once the worker's daemon starts and registers, the new node appears
|
||||||
|
in the Nodes table with its first heartbeat. The helper closes
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
### Rotate an API key
|
||||||
|
|
||||||
|
1. **Access ▸ API Keys** → `Generate`.
|
||||||
|
2. Name the key, pick a scope (today: single scope; later: per-model).
|
||||||
|
3. The secret is shown once in a modal; copy-to-clipboard and a clear
|
||||||
|
"you will not see this again" note.
|
||||||
|
4. Old key row gets a `revoke` action. Revoke is a confirm-then-apply
|
||||||
|
flow because it will break live traffic.
|
||||||
|
|
||||||
|
### Investigate a failing deployment
|
||||||
|
|
||||||
|
1. **Overview ▸ Stats** shows a red tile: `1 deployment failed`.
|
||||||
|
2. Click drills into **Deployments ▸ Active**, filtered to `failed`.
|
||||||
|
3. Open the row drawer → **Events** tab to see the lifecycle timeline.
|
||||||
|
4. Jump to **Logs** tab for the live tail. If the deployment is down,
|
||||||
|
fall back to the last 500 lines from its event buffer.
|
||||||
|
5. From the drawer, `restart` retries the deployment; if it fails again,
|
||||||
|
the `Summary` tab shows the scheduling decision so the operator can
|
||||||
|
see whether VRAM, GPU pinning, or image pull is the root cause.
|
||||||
|
|
||||||
|
## 📡 Realtime, Auth, and API Contract
|
||||||
|
|
||||||
|
- **Realtime updates.** Metrics, logs, GPU utilization, heartbeats, and
|
||||||
|
deployment state changes stream over Server-Sent Events. A single
|
||||||
|
`/v1/_ui/events?topics=...` endpoint is preferred over per-feature
|
||||||
|
sockets so the browser holds exactly one connection. WebSocket is
|
||||||
|
reserved for bidirectional features (e.g. an interactive install
|
||||||
|
walkthrough) that we do not need in v1.
|
||||||
|
- **Auth model.** The UI runs behind the same daemon process as the
|
||||||
|
OpenAI-compatible API, on a dedicated `uiPort` (default `8081`) to
|
||||||
|
keep the data-plane clean. Login uses a session cookie; the first-boot
|
||||||
|
bootstrap seeds an `admin` user with a one-time password printed to
|
||||||
|
`journalctl -u modelgrid`, the same way dcrouter prints its initial
|
||||||
|
`admin`/`admin`. SSO/OIDC is a later add-on.
|
||||||
|
- **API contract.** Every UI action maps to an HTTP endpoint on the
|
||||||
|
daemon (`/v1/_ui/...`). The UI must not talk to any private internals
|
||||||
|
directly; this keeps `@modelgrid.com/modelgrid-apiclient` (a future
|
||||||
|
sibling to `@serve.zone/dcrouter-apiclient`) able to do everything the
|
||||||
|
UI can do, from scripts.
|
||||||
|
- **Origin badges.** Similar to dcrouter's `config` / `email` / `dns` /
|
||||||
|
`api` route-origin model, ModelGrid should tag each deployment with
|
||||||
|
its origin: `config` (seeded via `containers` in config.json),
|
||||||
|
`catalog` (auto-deployed from `models.autoLoad`), `api` (created via
|
||||||
|
UI/API). Origin determines what the UI allows: `config`-origin
|
||||||
|
deployments are toggle-only, `api`-origin deployments are full CRUD.
|
||||||
|
|
||||||
|
## 🧱 Implementation Notes (non-binding)
|
||||||
|
|
||||||
|
- **Web component stack.** Match the dcrouter OpsServer approach:
|
||||||
|
component-per-view under `ts_web/elements/<area>/`, a tiny
|
||||||
|
SmartRouter-style client router (`ts_web/router.ts`), and a single
|
||||||
|
`appstate.ts` as the store.
|
||||||
|
- **Bundled into the binary via `ts_bundled/bundle.ts`.** ModelGrid is a
|
||||||
|
Deno project that ships as a `deno compile` single binary, so the UI
|
||||||
|
follows the `@stack.gallery/registry` pattern: a build step bundles
|
||||||
|
the `ts_web/` sources (HTML, JS, CSS, fonts, icons) into a single
|
||||||
|
generated `ts_bundled/bundle.ts` module that exports a
|
||||||
|
`{ path → bytes | string }` map. The daemon dynamically imports that
|
||||||
|
module at startup and hands the map to **typedserver**, which serves
|
||||||
|
it on the UI port. Result: no external asset directory, no runtime
|
||||||
|
filesystem dependency, one binary still ships the entire console.
|
||||||
|
- **Dev vs prod asset source.** In `deno task dev`, typedserver is
|
||||||
|
pointed at `ts_web/` on disk so UI edits are hot-reloadable without
|
||||||
|
re-running the bundler. In `deno task compile` / prod, the bundler
|
||||||
|
regenerates `ts_bundled/bundle.ts` first and the compiled binary
|
||||||
|
serves exclusively from the embedded map. A single flag
|
||||||
|
(`UI_ASSET_SOURCE=disk|bundle`, default `bundle`) picks the strategy
|
||||||
|
at runtime.
|
||||||
|
- **Bundler placement.** Mirrors `@stack.gallery/registry`: keep the
|
||||||
|
bundler in `scripts/bundle-ui.ts`, invoke it from a `deno task
|
||||||
|
bundle:ui` that the `compile:all` task depends on, and `.gitignore`
|
||||||
|
the generated `ts_bundled/bundle.ts` so it is only produced during
|
||||||
|
release builds (or regenerated on demand for local prod testing).
|
||||||
|
- **Packaging.** Follow dcrouter's module split: `@modelgrid.com/modelgrid`
|
||||||
|
ships the daemon and the embedded UI bundle; a future
|
||||||
|
`@modelgrid.com/modelgrid-web` can carve out the web sources as their
|
||||||
|
own publishable boundary if the bundle grows large or the UI needs to
|
||||||
|
be consumed independently.
|
||||||
|
- **Dark theme default** (black background, high-contrast foreground) to
|
||||||
|
match dcrouter and the expected server-ops environment. Light theme
|
||||||
|
is a later toggle.
|
||||||
|
- **No server-side rendering.** The UI is a static SPA; typedserver
|
||||||
|
returns the asset map's `index.html` for the app shell and the rest
|
||||||
|
of the state comes from the API. This keeps the runtime surface
|
||||||
|
small and makes the UI-less `curl` story identical to the UI story.
|
||||||
|
|
||||||
|
## ❓ Open Questions
|
||||||
|
|
||||||
|
- **Edit config from the UI or keep it CLI/file-first?** Current lean:
|
||||||
|
UI is authoritative only for API keys, deployments, and cluster
|
||||||
|
actions. Config editing is exposed but optional, with CLI still the
|
||||||
|
canonical path for reproducible installs.
|
||||||
|
- **Do we expose a model prompt playground?** Nice to have for smoke
|
||||||
|
tests, but it blurs the operator/consumer line. Defer to v2.
|
||||||
|
- **Cluster-wide vs per-node view.** On a worker node, should the UI
|
||||||
|
show only local state, or proxy the control plane's cluster view? The
|
||||||
|
current lean: workers show local-only, and link to the control plane
|
||||||
|
for cluster views. This avoids split-brain confusion.
|
||||||
|
- **Access control granularity.** API keys today are coarse (all or
|
||||||
|
nothing). A future model might scope keys per deployment or per
|
||||||
|
model. Reserve the column in the Access ▸ API Keys table now.
|
||||||
|
|
||||||
|
## 🛑 Out of Scope (for this concept)
|
||||||
|
|
||||||
|
- End-user chat or prompt UIs for the OpenAI-compatible API.
|
||||||
|
- Billing, quotas, or usage-based pricing dashboards.
|
||||||
|
- Multi-tenant isolation beyond per-API-key separation.
|
||||||
|
- Anything specific to non-vLLM runtimes — the UI assumes the v1.1.0
|
||||||
|
reorientation around vLLM as the only first-class runtime.
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env -S deno run --allow-read --allow-write
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bundle-ui.ts
|
||||||
|
*
|
||||||
|
* Walks `ts_web/` and emits `ts_bundled/bundle.ts`, a single TypeScript
|
||||||
|
* module that exports every UI asset as base64 in order. The daemon's
|
||||||
|
* UI server imports this module at runtime to serve the console without
|
||||||
|
* any external filesystem dependency — the entire browser app ends up
|
||||||
|
* embedded in the `deno compile` binary.
|
||||||
|
*
|
||||||
|
* The output shape matches the `@stack.gallery/registry` convention so
|
||||||
|
* a consumer can loop `files` as `{ path, contentBase64 }` entries.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { walk } from 'jsr:@std/fs@^1.0.0/walk';
|
||||||
|
import { fromFileUrl, join, relative } from 'jsr:@std/path@^1.0.0';
|
||||||
|
|
||||||
|
const here = fromFileUrl(new URL('./', import.meta.url));
|
||||||
|
const repoRoot = join(here, '..');
|
||||||
|
const sourceDir = join(repoRoot, 'ts_web');
|
||||||
|
const outDir = join(repoRoot, 'ts_bundled');
|
||||||
|
const outFile = join(outDir, 'bundle.ts');
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const entries: Array<{ path: string; contentBase64: string; size: number }> = [];
|
||||||
|
|
||||||
|
for await (
|
||||||
|
const entry of walk(sourceDir, {
|
||||||
|
includeDirs: false,
|
||||||
|
includeSymlinks: false,
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
const rel = relative(sourceDir, entry.path).replaceAll('\\', '/');
|
||||||
|
const bytes = await Deno.readFile(entry.path);
|
||||||
|
entries.push({
|
||||||
|
path: rel,
|
||||||
|
contentBase64: encodeBase64(bytes),
|
||||||
|
size: bytes.byteLength,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.sort((a, b) => a.path.localeCompare(b.path));
|
||||||
|
|
||||||
|
const generatedAt = new Date().toISOString();
|
||||||
|
const totalBytes = entries.reduce((sum, e) => sum + e.size, 0);
|
||||||
|
|
||||||
|
const header = [
|
||||||
|
'// AUTO-GENERATED — do not edit.',
|
||||||
|
'// Regenerate with: deno task bundle:ui',
|
||||||
|
`// Source: ts_web/ (${entries.length} files, ${totalBytes} bytes)`,
|
||||||
|
`// Generated: ${generatedAt}`,
|
||||||
|
'',
|
||||||
|
'export interface IBundledFile {',
|
||||||
|
' path: string;',
|
||||||
|
' contentBase64: string;',
|
||||||
|
'}',
|
||||||
|
'',
|
||||||
|
'export const files: IBundledFile[] = [',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
const body = entries.map((e) =>
|
||||||
|
` { path: ${JSON.stringify(e.path)}, contentBase64: ${JSON.stringify(e.contentBase64)} },`
|
||||||
|
).join('\n');
|
||||||
|
|
||||||
|
const footer = '\n];\n';
|
||||||
|
|
||||||
|
await Deno.mkdir(outDir, { recursive: true });
|
||||||
|
await Deno.writeTextFile(outFile, header + '\n' + body + footer);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`bundle-ui: wrote ${entries.length} file(s), ${totalBytes} bytes → ${
|
||||||
|
relative(repoRoot, outFile)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function encodeBase64(bytes: Uint8Array): string {
|
||||||
|
let binary = '';
|
||||||
|
for (let i = 0; i < bytes.length; i++) {
|
||||||
|
binary += String.fromCharCode(bytes[i]);
|
||||||
|
}
|
||||||
|
return btoa(binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (import.meta.main) {
|
||||||
|
await main();
|
||||||
|
}
|
||||||
@@ -0,0 +1,131 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { EventEmitter } from 'node:events';
|
||||||
|
import { AuthMiddleware } from '../ts/api/middleware/auth.ts';
|
||||||
|
import { ApiRouter } from '../ts/api/router.ts';
|
||||||
|
|
||||||
|
class TestResponse {
|
||||||
|
public statusCode = 200;
|
||||||
|
public headers: Record<string, string> = {};
|
||||||
|
public body = '';
|
||||||
|
|
||||||
|
public writeHead(statusCode: number, headers: Record<string, string>): TestResponse {
|
||||||
|
this.statusCode = statusCode;
|
||||||
|
this.headers = headers;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public end(body = ''): TestResponse {
|
||||||
|
this.body = body;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class TestRequest extends EventEmitter {
|
||||||
|
public method: string;
|
||||||
|
public headers: Record<string, string>;
|
||||||
|
public destroyed = false;
|
||||||
|
public paused = false;
|
||||||
|
|
||||||
|
constructor(method: string, headers: Record<string, string>) {
|
||||||
|
super();
|
||||||
|
this.method = method;
|
||||||
|
this.headers = headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public pause(): this {
|
||||||
|
this.paused = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public destroy(): this {
|
||||||
|
this.destroyed = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function createRouter(): ApiRouter {
|
||||||
|
return new ApiRouter(
|
||||||
|
{} as never,
|
||||||
|
{} as never,
|
||||||
|
{} as never,
|
||||||
|
{} as never,
|
||||||
|
['valid-key'],
|
||||||
|
{
|
||||||
|
authMiddleware: new AuthMiddleware(['valid-key']),
|
||||||
|
sanityMiddleware: {
|
||||||
|
validateChatRequest() {
|
||||||
|
return { valid: true };
|
||||||
|
},
|
||||||
|
sanitizeChatRequest(body: Record<string, unknown>) {
|
||||||
|
return body;
|
||||||
|
},
|
||||||
|
validateEmbeddingsRequest() {
|
||||||
|
return { valid: true };
|
||||||
|
},
|
||||||
|
sanitizeEmbeddingsRequest(body: Record<string, unknown>) {
|
||||||
|
return body;
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
chatHandler: {
|
||||||
|
async handleChatCompletion() {
|
||||||
|
throw new Error('chat handler should not run in this test');
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
modelsHandler: {
|
||||||
|
async handleListModels() {
|
||||||
|
throw new Error('models handler should not run in this test');
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
embeddingsHandler: {
|
||||||
|
async handleEmbeddings() {
|
||||||
|
throw new Error('embeddings handler should not run in this test');
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Deno.test('ApiRouter returns 404 for unknown endpoints', async () => {
|
||||||
|
const router = createRouter();
|
||||||
|
const response = new TestResponse();
|
||||||
|
|
||||||
|
await router.route(
|
||||||
|
{ method: 'GET', headers: {} } as never,
|
||||||
|
response as never,
|
||||||
|
'/does-not-exist',
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(response.statusCode, 404);
|
||||||
|
assertEquals(JSON.parse(response.body).error.type, 'invalid_request_error');
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiRouter rejects protected endpoints without a bearer token', async () => {
|
||||||
|
const router = createRouter();
|
||||||
|
const response = new TestResponse();
|
||||||
|
|
||||||
|
await router.route(
|
||||||
|
{ method: 'GET', headers: {} } as never,
|
||||||
|
response as never,
|
||||||
|
'/v1/models',
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(response.statusCode, 401);
|
||||||
|
assertEquals(JSON.parse(response.body).error.type, 'authentication_error');
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiRouter returns 413 for oversized request bodies', async () => {
|
||||||
|
const router = createRouter();
|
||||||
|
const request = new TestRequest('POST', {
|
||||||
|
authorization: 'Bearer valid-key',
|
||||||
|
});
|
||||||
|
const response = new TestResponse();
|
||||||
|
|
||||||
|
const routePromise = router.route(request as never, response as never, '/v1/chat/completions');
|
||||||
|
request.emit('data', 'x'.repeat(10 * 1024 * 1024 + 1));
|
||||||
|
await routePromise;
|
||||||
|
|
||||||
|
assertEquals(response.statusCode, 413);
|
||||||
|
assertEquals(request.paused, true);
|
||||||
|
assertEquals(request.destroyed, true);
|
||||||
|
assertEquals(JSON.parse(response.body).error.message, 'Request body too large');
|
||||||
|
});
|
||||||
@@ -0,0 +1,315 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ApiServer } from '../ts/api/server.ts';
|
||||||
|
|
||||||
|
Deno.test('ApiServer serves health metrics and authenticated model listings', async () => {
|
||||||
|
const port = 18100 + Math.floor(Math.random() * 1000);
|
||||||
|
const server = new ApiServer(
|
||||||
|
{
|
||||||
|
host: '127.0.0.1',
|
||||||
|
port,
|
||||||
|
apiKeys: ['valid-key'],
|
||||||
|
cors: false,
|
||||||
|
corsOrigins: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map([
|
||||||
|
['vllm-1', { running: true, health: 'healthy' }],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
return new Map([
|
||||||
|
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getAllModels() {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
||||||
|
engine: 'vllm',
|
||||||
|
source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' },
|
||||||
|
capabilities: { chat: true },
|
||||||
|
requirements: { minVramGb: 18 },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{} as never,
|
||||||
|
{
|
||||||
|
getStatus() {
|
||||||
|
return {
|
||||||
|
localNode: null,
|
||||||
|
nodes: [],
|
||||||
|
models: {},
|
||||||
|
desiredDeployments: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
gpuDetector: {
|
||||||
|
async detectGpus() {
|
||||||
|
return [{ id: 'nvidia-0' }];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const healthResponse = await fetch(`http://127.0.0.1:${port}/health`);
|
||||||
|
const healthBody = await healthResponse.json();
|
||||||
|
assertEquals(healthResponse.status, 200);
|
||||||
|
assertEquals(healthBody.status, 'ok');
|
||||||
|
assertEquals(healthBody.models, 1);
|
||||||
|
assertEquals(Array.isArray(healthBody.reasons), true);
|
||||||
|
assertEquals(healthBody.reasons.length, 0);
|
||||||
|
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
||||||
|
|
||||||
|
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||||
|
const metricsBody = await metricsResponse.text();
|
||||||
|
assertEquals(metricsResponse.status, 200);
|
||||||
|
assertEquals(metricsBody.includes('modelgrid_uptime_seconds'), true);
|
||||||
|
assertEquals(metricsBody.includes('modelgrid_models_available 1'), true);
|
||||||
|
|
||||||
|
const unauthenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`);
|
||||||
|
const unauthenticatedBody = await unauthenticatedModels.json();
|
||||||
|
assertEquals(unauthenticatedModels.status, 401);
|
||||||
|
assertEquals(unauthenticatedBody.error.type, 'authentication_error');
|
||||||
|
|
||||||
|
const authenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
||||||
|
headers: {
|
||||||
|
Authorization: 'Bearer valid-key',
|
||||||
|
'X-Request-Id': 'req-test-models',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const authenticatedBody = await authenticatedModels.json();
|
||||||
|
assertEquals(authenticatedModels.status, 200);
|
||||||
|
assertEquals(authenticatedBody.object, 'list');
|
||||||
|
assertEquals(authenticatedBody.data[0].id, 'meta-llama/Llama-3.1-8B-Instruct');
|
||||||
|
assertEquals(authenticatedModels.headers.get('x-request-id'), 'req-test-models');
|
||||||
|
|
||||||
|
const metricsAfterRequests = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||||
|
const metricsAfterRequestsBody = await metricsAfterRequests.text();
|
||||||
|
assertEquals(
|
||||||
|
metricsAfterRequestsBody.includes('modelgrid_api_requests_total{path="/v1/models"} 2'),
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
metricsAfterRequestsBody.includes('modelgrid_api_auth_failures_total{path="/v1/models"} 1'),
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () => {
|
||||||
|
const port = 19100 + Math.floor(Math.random() * 1000);
|
||||||
|
let failModelListing = true;
|
||||||
|
const server = new ApiServer(
|
||||||
|
{
|
||||||
|
host: '127.0.0.1',
|
||||||
|
port,
|
||||||
|
apiKeys: ['valid-key'],
|
||||||
|
cors: false,
|
||||||
|
corsOrigins: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
if (failModelListing) {
|
||||||
|
failModelListing = false;
|
||||||
|
throw new Error('models unavailable');
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getAllModels() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{} as never,
|
||||||
|
{
|
||||||
|
getStatus() {
|
||||||
|
return {
|
||||||
|
localNode: null,
|
||||||
|
nodes: [],
|
||||||
|
models: {},
|
||||||
|
desiredDeployments: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
gpuDetector: {
|
||||||
|
async detectGpus() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const failedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
||||||
|
headers: {
|
||||||
|
Authorization: 'Bearer valid-key',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assertEquals(failedModels.status, 500);
|
||||||
|
await failedModels.text();
|
||||||
|
|
||||||
|
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||||
|
const metricsBody = await metricsResponse.text();
|
||||||
|
assertEquals(
|
||||||
|
metricsBody.includes('modelgrid_api_server_errors_total{path="/v1/models"} 1'),
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiServer health reports degraded reasons', async () => {
|
||||||
|
const port = 19300 + Math.floor(Math.random() * 1000);
|
||||||
|
const server = new ApiServer(
|
||||||
|
{
|
||||||
|
host: '127.0.0.1',
|
||||||
|
port,
|
||||||
|
apiKeys: ['valid-key'],
|
||||||
|
cors: false,
|
||||||
|
corsOrigins: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map([
|
||||||
|
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getAllModels() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{} as never,
|
||||||
|
{
|
||||||
|
getStatus() {
|
||||||
|
return {
|
||||||
|
localNode: null,
|
||||||
|
nodes: [],
|
||||||
|
models: {},
|
||||||
|
desiredDeployments: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
gpuDetector: {
|
||||||
|
async detectGpus() {
|
||||||
|
return [{ id: 'nvidia-0' }];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
||||||
|
const body = await response.json();
|
||||||
|
|
||||||
|
assertEquals(response.status, 503);
|
||||||
|
assertEquals(body.status, 'degraded');
|
||||||
|
assertEquals(body.reasons.includes('unhealthy_container'), true);
|
||||||
|
assertEquals(body.reasons.includes('no_models_available'), true);
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
||||||
|
const port = 19200 + Math.floor(Math.random() * 1000);
|
||||||
|
const server = new ApiServer(
|
||||||
|
{
|
||||||
|
host: '127.0.0.1',
|
||||||
|
port,
|
||||||
|
apiKeys: ['valid-key'],
|
||||||
|
rateLimit: 2,
|
||||||
|
cors: false,
|
||||||
|
corsOrigins: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
return new Map([
|
||||||
|
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getAllModels() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{} as never,
|
||||||
|
{
|
||||||
|
getStatus() {
|
||||||
|
return {
|
||||||
|
localNode: null,
|
||||||
|
nodes: [],
|
||||||
|
models: {},
|
||||||
|
desiredDeployments: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
gpuDetector: {
|
||||||
|
async detectGpus() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const requestHeaders = {
|
||||||
|
Authorization: 'Bearer valid-key',
|
||||||
|
};
|
||||||
|
|
||||||
|
const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||||
|
assertEquals(first.status, 200);
|
||||||
|
await first.text();
|
||||||
|
|
||||||
|
const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||||
|
assertEquals(second.status, 200);
|
||||||
|
await second.text();
|
||||||
|
|
||||||
|
const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||||
|
assertEquals(third.status, 429);
|
||||||
|
assertEquals((await third.json()).error.type, 'rate_limit_exceeded');
|
||||||
|
|
||||||
|
const health = await fetch(`http://127.0.0.1:${port}/health`);
|
||||||
|
assertEquals(health.status, 200);
|
||||||
|
await health.text();
|
||||||
|
|
||||||
|
const metrics = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||||
|
assertEquals(metrics.status, 200);
|
||||||
|
await metrics.text();
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -0,0 +1,120 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ChatHandler } from '../ts/api/handlers/chat.ts';
|
||||||
|
import { EmbeddingsHandler } from '../ts/api/handlers/embeddings.ts';
|
||||||
|
import { UpstreamTimeoutError } from '../ts/containers/base-container.ts';
|
||||||
|
|
||||||
|
class TestResponse {
|
||||||
|
public statusCode = 200;
|
||||||
|
public headers: Record<string, string> = {};
|
||||||
|
public body = '';
|
||||||
|
|
||||||
|
public writeHead(statusCode: number, headers: Record<string, string>): TestResponse {
|
||||||
|
this.statusCode = statusCode;
|
||||||
|
this.headers = headers;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public end(body = ''): TestResponse {
|
||||||
|
this.body = body;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public write(_chunk: string | Uint8Array): boolean {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Deno.test('ChatHandler maps upstream timeouts to 504 responses', async () => {
|
||||||
|
const handler = new ChatHandler(
|
||||||
|
{
|
||||||
|
async findContainerForModel() {
|
||||||
|
return {
|
||||||
|
async chatCompletion() {
|
||||||
|
throw new UpstreamTimeoutError();
|
||||||
|
},
|
||||||
|
async chatCompletionStream() {
|
||||||
|
throw new UpstreamTimeoutError();
|
||||||
|
},
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getModel(modelName: string) {
|
||||||
|
return { id: modelName };
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async loadModel() {
|
||||||
|
return { success: false };
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
shouldDeployLocallyFirst() {
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
);
|
||||||
|
|
||||||
|
const response = new TestResponse();
|
||||||
|
await handler.handleChatCompletion(
|
||||||
|
{ headers: {} } as never,
|
||||||
|
response as never,
|
||||||
|
{ model: 'meta-llama/Llama-3.1-8B-Instruct', messages: [{ role: 'user', content: 'hi' }] },
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(response.statusCode, 504);
|
||||||
|
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('EmbeddingsHandler maps upstream timeouts to 504 responses', async () => {
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
globalThis.fetch = async () => {
|
||||||
|
const error = new Error('request aborted');
|
||||||
|
error.name = 'AbortError';
|
||||||
|
throw error;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const handler = new EmbeddingsHandler(
|
||||||
|
{
|
||||||
|
async findContainerForModel() {
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getModel(modelName: string) {
|
||||||
|
return { id: modelName };
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async ensureModelViaControlPlane(modelName: string) {
|
||||||
|
return {
|
||||||
|
location: {
|
||||||
|
modelId: modelName,
|
||||||
|
nodeName: 'worker-a',
|
||||||
|
endpoint: 'http://worker-a:8080',
|
||||||
|
healthy: true,
|
||||||
|
engine: 'vllm',
|
||||||
|
containerId: 'remote',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getLocalNodeName() {
|
||||||
|
return 'control';
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
);
|
||||||
|
|
||||||
|
const response = new TestResponse();
|
||||||
|
await handler.handleEmbeddings(
|
||||||
|
{ headers: {} } as never,
|
||||||
|
response as never,
|
||||||
|
{ model: 'BAAI/bge-m3', input: 'hello' },
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(response.statusCode, 504);
|
||||||
|
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||||
|
} finally {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
import { assertEquals, assertExists } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { CLUSTER, PATHS } from '../ts/constants.ts';
|
||||||
|
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||||
|
import type { IClusterNodeHeartbeat } from '../ts/interfaces/cluster.ts';
|
||||||
|
|
||||||
|
function createNode(nodeName: string, lastSeenAt: number): IClusterNodeHeartbeat {
|
||||||
|
return {
|
||||||
|
nodeName,
|
||||||
|
role: nodeName === 'control' ? 'control-plane' : 'worker',
|
||||||
|
endpoint: `http://${nodeName}:8080`,
|
||||||
|
healthy: true,
|
||||||
|
resources: {
|
||||||
|
gpuCount: 1,
|
||||||
|
totalVramGb: 24,
|
||||||
|
availableVramGb: 24,
|
||||||
|
maxSingleGpuVramGb: 24,
|
||||||
|
largestGpuGroupCount: 1,
|
||||||
|
largestGpuGroupVramGb: 24,
|
||||||
|
deploymentCount: 0,
|
||||||
|
topologyGroups: [
|
||||||
|
{
|
||||||
|
id: 'nvidia-0',
|
||||||
|
vendor: 'nvidia',
|
||||||
|
gpuIds: ['gpu-0'],
|
||||||
|
gpuCount: 1,
|
||||||
|
totalVramGb: 24,
|
||||||
|
maxSingleGpuVramGb: 24,
|
||||||
|
busNumbers: [1],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
deployments: [],
|
||||||
|
lastSeenAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function waitForPersistence(): Promise<void> {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 25));
|
||||||
|
}
|
||||||
|
|
||||||
|
Deno.test('ClusterManager initialize loads persisted state and prunes stale nodes', async () => {
|
||||||
|
const originalDataDir = PATHS.DATA_DIR;
|
||||||
|
const tempDir = await Deno.makeTempDir();
|
||||||
|
(PATHS as { DATA_DIR: string }).DATA_DIR = tempDir;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const now = Date.now();
|
||||||
|
await Deno.writeTextFile(
|
||||||
|
`${tempDir}/cluster-state.json`,
|
||||||
|
JSON.stringify({
|
||||||
|
nodes: [
|
||||||
|
createNode('control', now),
|
||||||
|
createNode('worker-fresh', now),
|
||||||
|
createNode('worker-stale', now - CLUSTER.NODE_STALE_AFTER_MS - 1000),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
await Deno.writeTextFile(
|
||||||
|
`${tempDir}/cluster-control-state.json`,
|
||||||
|
JSON.stringify({
|
||||||
|
desiredDeployments: [
|
||||||
|
{ modelId: 'meta-llama/Llama-3.1-8B-Instruct', desiredReplicas: 2, updatedAt: now },
|
||||||
|
],
|
||||||
|
nodeSchedulerStates: {
|
||||||
|
'worker-fresh': 'cordoned',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const clusterManager = new ClusterManager();
|
||||||
|
clusterManager.configure({
|
||||||
|
enabled: true,
|
||||||
|
nodeName: 'control',
|
||||||
|
role: 'control-plane',
|
||||||
|
bindHost: '0.0.0.0',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
await clusterManager.initialize();
|
||||||
|
|
||||||
|
assertEquals(clusterManager.getAllNodes().map((node) => node.nodeName), ['control', 'worker-fresh']);
|
||||||
|
assertExists(clusterManager.getLocalNode());
|
||||||
|
assertEquals(clusterManager.getDesiredDeployments().length, 1);
|
||||||
|
assertEquals(clusterManager.getNodeSchedulerState('worker-fresh'), 'cordoned');
|
||||||
|
} finally {
|
||||||
|
(PATHS as { DATA_DIR: string }).DATA_DIR = originalDataDir;
|
||||||
|
await Deno.remove(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ClusterManager persists state only after initialization completes', async () => {
|
||||||
|
const originalDataDir = PATHS.DATA_DIR;
|
||||||
|
const tempDir = await Deno.makeTempDir();
|
||||||
|
(PATHS as { DATA_DIR: string }).DATA_DIR = tempDir;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const clusterManager = new ClusterManager();
|
||||||
|
clusterManager.configure({
|
||||||
|
enabled: true,
|
||||||
|
nodeName: 'control',
|
||||||
|
role: 'control-plane',
|
||||||
|
bindHost: '0.0.0.0',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
clusterManager.updateLocalNode(createNode('control', Date.now()));
|
||||||
|
clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 1);
|
||||||
|
await waitForPersistence();
|
||||||
|
|
||||||
|
assertEquals(await Deno.stat(`${tempDir}/cluster-state.json`).catch(() => null), null);
|
||||||
|
assertEquals(await Deno.stat(`${tempDir}/cluster-control-state.json`).catch(() => null), null);
|
||||||
|
|
||||||
|
await clusterManager.initialize();
|
||||||
|
clusterManager.updateLocalNode(createNode('control', Date.now()));
|
||||||
|
clusterManager.setNodeSchedulerState('control', 'active');
|
||||||
|
clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 3);
|
||||||
|
await waitForPersistence();
|
||||||
|
|
||||||
|
const stateFile = JSON.parse(await Deno.readTextFile(`${tempDir}/cluster-state.json`));
|
||||||
|
const controlFile = JSON.parse(await Deno.readTextFile(`${tempDir}/cluster-control-state.json`));
|
||||||
|
|
||||||
|
assertEquals(stateFile.nodes.length, 1);
|
||||||
|
assertEquals(stateFile.nodes[0].nodeName, 'control');
|
||||||
|
assertEquals(controlFile.desiredDeployments[0].desiredReplicas, 3);
|
||||||
|
assertEquals(controlFile.nodeSchedulerStates.control, 'active');
|
||||||
|
} finally {
|
||||||
|
(PATHS as { DATA_DIR: string }).DATA_DIR = originalDataDir;
|
||||||
|
await Deno.remove(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ConfigHandler } from '../ts/cli/config-handler.ts';
|
||||||
|
import { PATHS } from '../ts/constants.ts';
|
||||||
|
import { logger } from '../ts/logger.ts';
|
||||||
|
|
||||||
|
Deno.test('ConfigHandler init writes the current default config shape', async () => {
|
||||||
|
const tempDir = await Deno.makeTempDir();
|
||||||
|
const originalConfigDir = PATHS.CONFIG_DIR;
|
||||||
|
const originalConfigFile = PATHS.CONFIG_FILE;
|
||||||
|
(PATHS as { CONFIG_DIR: string }).CONFIG_DIR = tempDir;
|
||||||
|
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = `${tempDir}/config.json`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const handler = new ConfigHandler();
|
||||||
|
await handler.init();
|
||||||
|
|
||||||
|
const config = JSON.parse(await Deno.readTextFile(`${tempDir}/config.json`));
|
||||||
|
assertEquals(config.ui.enabled, true);
|
||||||
|
assertEquals(config.ui.assetSource, 'bundle');
|
||||||
|
assertEquals(config.cluster.role, 'standalone');
|
||||||
|
assertEquals(config.models.registryUrl, 'https://list.modelgrid.com/catalog/models.json');
|
||||||
|
assertEquals(config.models.autoDeploy, true);
|
||||||
|
assertEquals(config.models.defaultEngine, 'vllm');
|
||||||
|
} finally {
|
||||||
|
(PATHS as { CONFIG_DIR: string }).CONFIG_DIR = originalConfigDir;
|
||||||
|
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = originalConfigFile;
|
||||||
|
await Deno.remove(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ConfigHandler show renders canonical model and ui settings', async () => {
|
||||||
|
const tempDir = await Deno.makeTempDir();
|
||||||
|
const originalConfigFile = PATHS.CONFIG_FILE;
|
||||||
|
const boxes: Array<{ title: string; lines: string[] }> = [];
|
||||||
|
const originalLog = logger.log;
|
||||||
|
const originalLogBox = logger.logBox;
|
||||||
|
|
||||||
|
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = `${tempDir}/config.json`;
|
||||||
|
|
||||||
|
logger.log = (_message: string) => {};
|
||||||
|
logger.logBox = (
|
||||||
|
title: string,
|
||||||
|
lines: string[],
|
||||||
|
) => {
|
||||||
|
boxes.push({ title, lines });
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
await Deno.writeTextFile(
|
||||||
|
`${tempDir}/config.json`,
|
||||||
|
JSON.stringify({
|
||||||
|
version: '1.0.0',
|
||||||
|
api: {
|
||||||
|
port: 8080,
|
||||||
|
host: '0.0.0.0',
|
||||||
|
apiKeys: ['sk-test'],
|
||||||
|
rateLimit: 60,
|
||||||
|
cors: true,
|
||||||
|
corsOrigins: ['*'],
|
||||||
|
},
|
||||||
|
ui: {
|
||||||
|
enabled: true,
|
||||||
|
port: 8081,
|
||||||
|
host: '0.0.0.0',
|
||||||
|
assetSource: 'bundle',
|
||||||
|
},
|
||||||
|
docker: {
|
||||||
|
networkName: 'modelgrid',
|
||||||
|
runtime: 'docker',
|
||||||
|
},
|
||||||
|
gpus: {
|
||||||
|
autoDetect: true,
|
||||||
|
assignments: {},
|
||||||
|
},
|
||||||
|
containers: [],
|
||||||
|
models: {
|
||||||
|
registryUrl: 'https://example.com/catalog.json',
|
||||||
|
autoDeploy: false,
|
||||||
|
defaultEngine: 'vllm',
|
||||||
|
autoLoad: ['meta-llama/Llama-3.1-8B-Instruct'],
|
||||||
|
},
|
||||||
|
cluster: {
|
||||||
|
enabled: false,
|
||||||
|
nodeName: 'modelgrid-local',
|
||||||
|
role: 'standalone',
|
||||||
|
bindHost: '0.0.0.0',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
},
|
||||||
|
checkInterval: 30000,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const handler = new ConfigHandler();
|
||||||
|
await handler.show();
|
||||||
|
|
||||||
|
const modelsBox = boxes.find((box) => box.title === 'Models');
|
||||||
|
assertEquals(modelsBox?.lines.some((line) => line.includes('Auto Deploy:')), true);
|
||||||
|
assertEquals(modelsBox?.lines.some((line) => line.includes('Default Engine: vllm')), true);
|
||||||
|
assertEquals(modelsBox?.lines.some((line) => line.includes('https://example.com/catalog.json')), true);
|
||||||
|
|
||||||
|
const apiBox = boxes.find((box) => box.title === 'API Server');
|
||||||
|
assertEquals(apiBox?.lines.some((line) => line.includes('Rate Limit: 60 req/min')), true);
|
||||||
|
} finally {
|
||||||
|
logger.log = originalLog;
|
||||||
|
logger.logBox = originalLogBox;
|
||||||
|
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = originalConfigFile;
|
||||||
|
await Deno.remove(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ModelRegistry } from '../ts/models/registry.ts';
|
||||||
|
|
||||||
|
Deno.test('ModelRegistry falls back to the built-in catalog when the source is unavailable', async () => {
|
||||||
|
const registry = new ModelRegistry('http://127.0.0.1:9/catalog.json');
|
||||||
|
const catalog = await registry.fetchCatalog(true);
|
||||||
|
|
||||||
|
assertEquals(catalog.version, '1.0');
|
||||||
|
assertEquals(catalog.models.length > 0, true);
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ModelRegistry reads catalog entries from a local file source', async () => {
|
||||||
|
const filePath = await Deno.makeTempFile({ suffix: '.json' });
|
||||||
|
await Deno.writeTextFile(
|
||||||
|
filePath,
|
||||||
|
JSON.stringify({
|
||||||
|
version: '1.0',
|
||||||
|
generatedAt: '2026-01-01T00:00:00.000Z',
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
id: 'Qwen/Qwen2.5-7B-Instruct',
|
||||||
|
aliases: ['qwen-local'],
|
||||||
|
engine: 'vllm',
|
||||||
|
source: { repo: 'Qwen/Qwen2.5-7B-Instruct' },
|
||||||
|
capabilities: { chat: true },
|
||||||
|
requirements: { minVramGb: 16 },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const registry = new ModelRegistry(filePath);
|
||||||
|
const model = await registry.getModel('qwen-local');
|
||||||
|
|
||||||
|
assertEquals(model?.id, 'Qwen/Qwen2.5-7B-Instruct');
|
||||||
|
} finally {
|
||||||
|
await Deno.remove(filePath);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -0,0 +1,119 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ConfigManager } from '../ts/config/config-manager.ts';
|
||||||
|
import type { IModelGridConfig } from '../ts/interfaces/config.ts';
|
||||||
|
import { logger } from '../ts/logger.ts';
|
||||||
|
|
||||||
|
Deno.test('ConfigManager normalizes current config defaults', () => {
|
||||||
|
const configManager = new ConfigManager();
|
||||||
|
|
||||||
|
const normalized = configManager.normalizeConfig({
|
||||||
|
version: '1.0.0',
|
||||||
|
api: {
|
||||||
|
port: 9000,
|
||||||
|
host: '127.0.0.1',
|
||||||
|
apiKeys: ['test-key'],
|
||||||
|
},
|
||||||
|
docker: {
|
||||||
|
networkName: 'modelgrid',
|
||||||
|
runtime: 'docker',
|
||||||
|
},
|
||||||
|
gpus: {
|
||||||
|
autoDetect: true,
|
||||||
|
assignments: {},
|
||||||
|
},
|
||||||
|
containers: [],
|
||||||
|
models: {
|
||||||
|
registryUrl: 'https://example.com/catalog.json',
|
||||||
|
autoDeploy: false,
|
||||||
|
defaultEngine: 'vllm',
|
||||||
|
autoLoad: ['Qwen/Qwen2.5-7B-Instruct'],
|
||||||
|
},
|
||||||
|
cluster: {
|
||||||
|
enabled: false,
|
||||||
|
nodeName: 'modelgrid-local',
|
||||||
|
role: 'standalone',
|
||||||
|
bindHost: '0.0.0.0',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
},
|
||||||
|
checkInterval: 15000,
|
||||||
|
});
|
||||||
|
|
||||||
|
assertEquals(normalized.models.registryUrl, 'https://example.com/catalog.json');
|
||||||
|
assertEquals(normalized.models.autoDeploy, false);
|
||||||
|
assertEquals(normalized.models.defaultEngine, 'vllm');
|
||||||
|
assertEquals(normalized.ui.enabled, true);
|
||||||
|
assertEquals(normalized.ui.port, 8081);
|
||||||
|
assertEquals(normalized.ui.assetSource, 'bundle');
|
||||||
|
});
|
||||||
|
|
||||||
|
Deno.test('ConfigManager warns when config contains ignored keys', () => {
|
||||||
|
const configManager = new ConfigManager();
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const originalWarn = logger.warn;
|
||||||
|
logger.warn = (message: string) => {
|
||||||
|
warnings.push(message);
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
configManager.normalizeConfig({
|
||||||
|
version: '1.0.0',
|
||||||
|
api: {
|
||||||
|
port: 8080,
|
||||||
|
host: '127.0.0.1',
|
||||||
|
apiKeys: [],
|
||||||
|
},
|
||||||
|
docker: {
|
||||||
|
networkName: 'modelgrid',
|
||||||
|
runtime: 'docker',
|
||||||
|
},
|
||||||
|
gpus: {
|
||||||
|
autoDetect: true,
|
||||||
|
assignments: {},
|
||||||
|
},
|
||||||
|
containers: [
|
||||||
|
{ id: 'legacy', type: 'ollama' } as never,
|
||||||
|
],
|
||||||
|
models: {
|
||||||
|
registryUrl: 'https://example.com/catalog.json',
|
||||||
|
autoDeploy: true,
|
||||||
|
defaultEngine: 'vllm',
|
||||||
|
autoLoad: [],
|
||||||
|
greenlistUrl: 'https://legacy.example.com/catalog.json',
|
||||||
|
autoPull: true,
|
||||||
|
defaultContainer: 'legacy-container',
|
||||||
|
} as IModelGridConfig['models'] & {
|
||||||
|
greenlistUrl: string;
|
||||||
|
autoPull: boolean;
|
||||||
|
defaultContainer: string;
|
||||||
|
},
|
||||||
|
cluster: {
|
||||||
|
enabled: false,
|
||||||
|
nodeName: 'modelgrid-local',
|
||||||
|
role: 'standalone',
|
||||||
|
bindHost: '0.0.0.0',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
},
|
||||||
|
checkInterval: 30000,
|
||||||
|
legacySection: true,
|
||||||
|
} as Partial<IModelGridConfig> & {
|
||||||
|
legacySection: boolean;
|
||||||
|
models: IModelGridConfig['models'] & {
|
||||||
|
greenlistUrl: string;
|
||||||
|
autoPull: boolean;
|
||||||
|
defaultContainer: string;
|
||||||
|
};
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
logger.warn = originalWarn;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(warnings.includes('Ignoring unknown config key: legacySection'), true);
|
||||||
|
assertEquals(warnings.includes('Ignoring removed config key: models.greenlistUrl'), true);
|
||||||
|
assertEquals(warnings.includes('Ignoring removed config key: models.autoPull'), true);
|
||||||
|
assertEquals(warnings.includes('Ignoring removed config key: models.defaultContainer'), true);
|
||||||
|
assertEquals(warnings.includes('Ignoring unsupported container type: ollama'), true);
|
||||||
|
});
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
// Smoke test for the UI server: bundle mode serves /index.html,
|
||||||
|
// disk mode serves /app.js, /_ui/overview returns structured JSON.
|
||||||
|
// Run with: deno run --allow-all test/ui-server.smoke.ts
|
||||||
|
|
||||||
|
import { UiServer } from '../ts/ui/server.ts';
|
||||||
|
import { ContainerManager } from '../ts/containers/container-manager.ts';
|
||||||
|
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||||
|
|
||||||
|
async function probe(source: 'bundle' | 'disk', port: number): Promise<void> {
|
||||||
|
const cm = new ContainerManager();
|
||||||
|
const cluster = new ClusterManager();
|
||||||
|
cluster.configure({
|
||||||
|
enabled: false,
|
||||||
|
nodeName: 'test-node',
|
||||||
|
role: 'standalone',
|
||||||
|
bindHost: '127.0.0.1',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const server = new UiServer(
|
||||||
|
{ enabled: true, port, host: '127.0.0.1', assetSource: source },
|
||||||
|
cm,
|
||||||
|
cluster,
|
||||||
|
);
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const index = await fetch(`http://127.0.0.1:${port}/`);
|
||||||
|
const indexBody = await index.text();
|
||||||
|
if (!index.ok || !indexBody.includes('ModelGrid')) {
|
||||||
|
throw new Error(`[${source}] index.html missing expected content (status=${index.status})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const app = await fetch(`http://127.0.0.1:${port}/app.js`);
|
||||||
|
const appBody = await app.text();
|
||||||
|
if (!app.ok || !appBody.includes('ModelGrid UI')) {
|
||||||
|
throw new Error(`[${source}] app.js missing expected content (status=${app.status})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const spa = await fetch(`http://127.0.0.1:${port}/cluster/nodes`);
|
||||||
|
const spaBody = await spa.text();
|
||||||
|
if (!spa.ok || !spaBody.includes('ModelGrid')) {
|
||||||
|
throw new Error(`[${source}] SPA fallback did not return index.html (status=${spa.status})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const overview = await fetch(`http://127.0.0.1:${port}/_ui/overview`);
|
||||||
|
const data = await overview.json();
|
||||||
|
if (!overview.ok || data.node?.name !== 'test-node' || !data.health?.status) {
|
||||||
|
throw new Error(`[${source}] /_ui/overview unexpected: ${JSON.stringify(data)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const missing = await fetch(`http://127.0.0.1:${port}/nope.png`);
|
||||||
|
if (missing.status !== 404) {
|
||||||
|
throw new Error(`[${source}] expected 404 for missing asset, got ${missing.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`ok: ${source} mode — index, app.js, SPA fallback, /_ui/overview, 404`);
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await probe('bundle', 18081);
|
||||||
|
await probe('disk', 18082);
|
||||||
|
console.log('UI server smoke test passed');
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||||
|
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||||
|
import { UiServer } from '../ts/ui/server.ts';
|
||||||
|
|
||||||
|
Deno.test('UiServer overview mirrors degraded API health semantics', async () => {
|
||||||
|
const port = 20300 + Math.floor(Math.random() * 1000);
|
||||||
|
const cluster = new ClusterManager();
|
||||||
|
cluster.configure({
|
||||||
|
enabled: false,
|
||||||
|
nodeName: 'ui-test-node',
|
||||||
|
role: 'standalone',
|
||||||
|
bindHost: '127.0.0.1',
|
||||||
|
gossipPort: 7946,
|
||||||
|
heartbeatIntervalMs: 5000,
|
||||||
|
seedNodes: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const server = new UiServer(
|
||||||
|
{ enabled: true, port, host: '127.0.0.1', assetSource: 'disk' },
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map([
|
||||||
|
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
cluster,
|
||||||
|
);
|
||||||
|
|
||||||
|
(server as unknown as {
|
||||||
|
gpuDetector: { detectGpus: () => Promise<unknown[]> };
|
||||||
|
}).gpuDetector = {
|
||||||
|
async detectGpus() {
|
||||||
|
return [{ id: 'nvidia-0' }];
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`http://127.0.0.1:${port}/_ui/overview`);
|
||||||
|
const body = await response.json();
|
||||||
|
|
||||||
|
assertEquals(response.status, 200);
|
||||||
|
assertEquals(body.health.status, 'degraded');
|
||||||
|
assertEquals(body.health.reasons.includes('unhealthy_container'), true);
|
||||||
|
assertEquals(body.health.reasons.includes('no_models_available'), true);
|
||||||
|
assertEquals(body.node.name, 'ui-test-node');
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
+16
-1
@@ -6,6 +6,8 @@ import * as http from 'node:http';
|
|||||||
import type { IApiError, IChatCompletionRequest } from '../../interfaces/api.ts';
|
import type { IApiError, IChatCompletionRequest } from '../../interfaces/api.ts';
|
||||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||||
|
import { UpstreamTimeoutError } from '../../containers/base-container.ts';
|
||||||
|
import { API_SERVER } from '../../constants.ts';
|
||||||
import { logger } from '../../logger.ts';
|
import { logger } from '../../logger.ts';
|
||||||
import { ModelRegistry } from '../../models/registry.ts';
|
import { ModelRegistry } from '../../models/registry.ts';
|
||||||
import { ModelLoader } from '../../models/loader.ts';
|
import { ModelLoader } from '../../models/loader.ts';
|
||||||
@@ -85,6 +87,11 @@ export class ChatHandler {
|
|||||||
|
|
||||||
await this.proxyChatRequest(req, res, ensured.location.endpoint, requestBody);
|
await this.proxyChatRequest(req, res, ensured.location.endpoint, requestBody);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error instanceof UpstreamTimeoutError) {
|
||||||
|
this.sendError(res, 504, error.message, 'upstream_timeout');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
logger.error(`Chat completion error: ${message}`);
|
logger.error(`Chat completion error: ${message}`);
|
||||||
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||||||
@@ -158,11 +165,19 @@ export class ChatHandler {
|
|||||||
targetEndpoint: string,
|
targetEndpoint: string,
|
||||||
body: IChatCompletionRequest,
|
body: IChatCompletionRequest,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||||
const response = await fetch(`${targetEndpoint}/v1/chat/completions`, {
|
const response = await fetch(`${targetEndpoint}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: this.buildForwardHeaders(req),
|
headers: this.buildForwardHeaders(req),
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
});
|
signal: controller.signal,
|
||||||
|
}).catch((error) => {
|
||||||
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
|
throw new UpstreamTimeoutError();
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}).finally(() => clearTimeout(timeout));
|
||||||
|
|
||||||
if (body.stream) {
|
if (body.stream) {
|
||||||
res.writeHead(response.status, {
|
res.writeHead(response.status, {
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ import type {
|
|||||||
} from '../../interfaces/api.ts';
|
} from '../../interfaces/api.ts';
|
||||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||||
|
import { UpstreamTimeoutError } from '../../containers/base-container.ts';
|
||||||
|
import { API_SERVER } from '../../constants.ts';
|
||||||
import { logger } from '../../logger.ts';
|
import { logger } from '../../logger.ts';
|
||||||
import { ModelRegistry } from '../../models/registry.ts';
|
import { ModelRegistry } from '../../models/registry.ts';
|
||||||
|
|
||||||
@@ -80,7 +82,7 @@ export class EmbeddingsHandler {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(`${ensured.location.endpoint}/v1/embeddings`, {
|
const response = await this.fetchWithTimeout(`${ensured.location.endpoint}/v1/embeddings`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: this.buildForwardHeaders(req),
|
headers: this.buildForwardHeaders(req),
|
||||||
body: JSON.stringify(requestBody),
|
body: JSON.stringify(requestBody),
|
||||||
@@ -92,6 +94,11 @@ export class EmbeddingsHandler {
|
|||||||
});
|
});
|
||||||
res.end(text);
|
res.end(text);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error instanceof UpstreamTimeoutError) {
|
||||||
|
this.sendError(res, 504, error.message, 'upstream_timeout');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
logger.error(`Embeddings error: ${message}`);
|
logger.error(`Embeddings error: ${message}`);
|
||||||
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
||||||
@@ -159,7 +166,7 @@ export class EmbeddingsHandler {
|
|||||||
model: string,
|
model: string,
|
||||||
input: string,
|
input: string,
|
||||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||||
const response = await fetch(`${endpoint}/v1/embeddings`, {
|
const response = await this.fetchWithTimeout(`${endpoint}/v1/embeddings`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ model, input }),
|
body: JSON.stringify({ model, input }),
|
||||||
@@ -181,7 +188,7 @@ export class EmbeddingsHandler {
|
|||||||
_model: string,
|
_model: string,
|
||||||
input: string,
|
input: string,
|
||||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||||
const response = await fetch(`${endpoint}/embed`, {
|
const response = await this.fetchWithTimeout(`${endpoint}/embed`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ inputs: input }),
|
body: JSON.stringify({ inputs: input }),
|
||||||
@@ -214,6 +221,25 @@ export class EmbeddingsHandler {
|
|||||||
return headers;
|
return headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async fetchWithTimeout(url: string, init: RequestInit): Promise<Response> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await fetch(url, {
|
||||||
|
...init,
|
||||||
|
signal: controller.signal,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
|
throw new UpstreamTimeoutError();
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private sendError(
|
private sendError(
|
||||||
res: http.ServerResponse,
|
res: http.ServerResponse,
|
||||||
statusCode: number,
|
statusCode: number,
|
||||||
|
|||||||
+64
-17
@@ -17,6 +17,19 @@ import { EmbeddingsHandler } from './handlers/embeddings.ts';
|
|||||||
import { AuthMiddleware } from './middleware/auth.ts';
|
import { AuthMiddleware } from './middleware/auth.ts';
|
||||||
import { SanityMiddleware } from './middleware/sanity.ts';
|
import { SanityMiddleware } from './middleware/sanity.ts';
|
||||||
|
|
||||||
|
interface IParsedRequestBody {
|
||||||
|
kind: 'ok' | 'invalid' | 'too_large';
|
||||||
|
body?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IApiRouterOptions {
|
||||||
|
chatHandler?: ChatHandler;
|
||||||
|
modelsHandler?: ModelsHandler;
|
||||||
|
embeddingsHandler?: EmbeddingsHandler;
|
||||||
|
authMiddleware?: AuthMiddleware;
|
||||||
|
sanityMiddleware?: SanityMiddleware;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* API Router - routes requests to handlers
|
* API Router - routes requests to handlers
|
||||||
*/
|
*/
|
||||||
@@ -37,6 +50,7 @@ export class ApiRouter {
|
|||||||
modelLoader: ModelLoader,
|
modelLoader: ModelLoader,
|
||||||
clusterCoordinator: ClusterCoordinator,
|
clusterCoordinator: ClusterCoordinator,
|
||||||
apiKeys: string[],
|
apiKeys: string[],
|
||||||
|
options: IApiRouterOptions = {},
|
||||||
) {
|
) {
|
||||||
this.containerManager = containerManager;
|
this.containerManager = containerManager;
|
||||||
this.modelRegistry = modelRegistry;
|
this.modelRegistry = modelRegistry;
|
||||||
@@ -44,22 +58,23 @@ export class ApiRouter {
|
|||||||
this.clusterCoordinator = clusterCoordinator;
|
this.clusterCoordinator = clusterCoordinator;
|
||||||
|
|
||||||
// Initialize handlers
|
// Initialize handlers
|
||||||
this.chatHandler = new ChatHandler(
|
this.chatHandler = options.chatHandler || new ChatHandler(
|
||||||
containerManager,
|
containerManager,
|
||||||
modelRegistry,
|
modelRegistry,
|
||||||
modelLoader,
|
modelLoader,
|
||||||
clusterCoordinator,
|
clusterCoordinator,
|
||||||
);
|
);
|
||||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
|
this.modelsHandler =
|
||||||
this.embeddingsHandler = new EmbeddingsHandler(
|
options.modelsHandler || new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
|
||||||
|
this.embeddingsHandler = options.embeddingsHandler || new EmbeddingsHandler(
|
||||||
containerManager,
|
containerManager,
|
||||||
modelRegistry,
|
modelRegistry,
|
||||||
clusterCoordinator,
|
clusterCoordinator,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Initialize middleware
|
// Initialize middleware
|
||||||
this.authMiddleware = new AuthMiddleware(apiKeys);
|
this.authMiddleware = options.authMiddleware || new AuthMiddleware(apiKeys);
|
||||||
this.sanityMiddleware = new SanityMiddleware(modelRegistry);
|
this.sanityMiddleware = options.sanityMiddleware || new SanityMiddleware(modelRegistry);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -119,11 +134,16 @@ export class ApiRouter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse body
|
// Parse body
|
||||||
const body = await this.parseRequestBody(req);
|
const parsedBody = await this.parseRequestBody(req);
|
||||||
if (!body) {
|
if (parsedBody.kind === 'too_large') {
|
||||||
|
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parsedBody.kind !== 'ok') {
|
||||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const body = parsedBody.body;
|
||||||
|
|
||||||
// Validate request
|
// Validate request
|
||||||
const validation = this.sanityMiddleware.validateChatRequest(body);
|
const validation = this.sanityMiddleware.validateChatRequest(body);
|
||||||
@@ -155,11 +175,16 @@ export class ApiRouter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse body
|
// Parse body
|
||||||
const body = await this.parseRequestBody(req);
|
const parsedBody = await this.parseRequestBody(req);
|
||||||
if (!body) {
|
if (parsedBody.kind === 'too_large') {
|
||||||
|
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parsedBody.kind !== 'ok') {
|
||||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const body = parsedBody.body;
|
||||||
|
|
||||||
// Convert to chat format and handle
|
// Convert to chat format and handle
|
||||||
const chatBody = this.convertCompletionToChat(body as Record<string, unknown>);
|
const chatBody = this.convertCompletionToChat(body as Record<string, unknown>);
|
||||||
@@ -229,11 +254,16 @@ export class ApiRouter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse body
|
// Parse body
|
||||||
const body = await this.parseRequestBody(req);
|
const parsedBody = await this.parseRequestBody(req);
|
||||||
if (!body) {
|
if (parsedBody.kind === 'too_large') {
|
||||||
|
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parsedBody.kind !== 'ok') {
|
||||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const body = parsedBody.body;
|
||||||
|
|
||||||
const validation = this.sanityMiddleware.validateEmbeddingsRequest(body);
|
const validation = this.sanityMiddleware.validateEmbeddingsRequest(body);
|
||||||
if (!validation.valid) {
|
if (!validation.valid) {
|
||||||
@@ -250,28 +280,45 @@ export class ApiRouter {
|
|||||||
/**
|
/**
|
||||||
* Parse request body
|
* Parse request body
|
||||||
*/
|
*/
|
||||||
private async parseRequestBody(req: http.IncomingMessage): Promise<unknown | null> {
|
private async parseRequestBody(req: http.IncomingMessage): Promise<IParsedRequestBody> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
let body = '';
|
let body = '';
|
||||||
|
let resolved = false;
|
||||||
|
|
||||||
|
const finish = (result: IParsedRequestBody): void => {
|
||||||
|
if (resolved) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolved = true;
|
||||||
|
resolve(result);
|
||||||
|
};
|
||||||
|
|
||||||
req.on('data', (chunk) => {
|
req.on('data', (chunk) => {
|
||||||
|
if (resolved) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
body += chunk.toString();
|
body += chunk.toString();
|
||||||
// Limit body size
|
|
||||||
if (body.length > 10 * 1024 * 1024) {
|
if (body.length > 10 * 1024 * 1024) {
|
||||||
resolve(null);
|
req.pause();
|
||||||
|
req.destroy();
|
||||||
|
finish({ kind: 'too_large' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
req.on('end', () => {
|
req.on('end', () => {
|
||||||
try {
|
try {
|
||||||
resolve(JSON.parse(body));
|
finish({ kind: 'ok', body: JSON.parse(body) });
|
||||||
} catch {
|
} catch {
|
||||||
resolve(null);
|
finish({ kind: 'invalid' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
req.on('error', () => {
|
req.on('error', () => {
|
||||||
resolve(null);
|
if (!resolved) {
|
||||||
|
finish({ kind: 'invalid' });
|
||||||
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
+126
-35
@@ -16,6 +16,13 @@ import { ModelRegistry } from '../models/registry.ts';
|
|||||||
import { ModelLoader } from '../models/loader.ts';
|
import { ModelLoader } from '../models/loader.ts';
|
||||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||||
import { ClusterHandler } from './handlers/cluster.ts';
|
import { ClusterHandler } from './handlers/cluster.ts';
|
||||||
|
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||||
|
|
||||||
|
interface IApiServerOptions {
|
||||||
|
gpuDetector?: GpuDetector;
|
||||||
|
router?: ApiRouter;
|
||||||
|
clusterHandler?: ClusterHandler;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* API Server for ModelGrid
|
* API Server for ModelGrid
|
||||||
@@ -31,6 +38,10 @@ export class ApiServer {
|
|||||||
private clusterCoordinator: ClusterCoordinator;
|
private clusterCoordinator: ClusterCoordinator;
|
||||||
private clusterHandler: ClusterHandler;
|
private clusterHandler: ClusterHandler;
|
||||||
private startTime: number = 0;
|
private startTime: number = 0;
|
||||||
|
private requestCounts = new Map<string, number>();
|
||||||
|
private authFailureCounts = new Map<string, number>();
|
||||||
|
private serverErrorCounts = new Map<string, number>();
|
||||||
|
private rateLimitBuckets = new Map<string, { count: number; windowStart: number }>();
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
config: IApiConfig,
|
config: IApiConfig,
|
||||||
@@ -38,15 +49,16 @@ export class ApiServer {
|
|||||||
modelRegistry: ModelRegistry,
|
modelRegistry: ModelRegistry,
|
||||||
modelLoader: ModelLoader,
|
modelLoader: ModelLoader,
|
||||||
clusterCoordinator: ClusterCoordinator,
|
clusterCoordinator: ClusterCoordinator,
|
||||||
|
options: IApiServerOptions = {},
|
||||||
) {
|
) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.containerManager = containerManager;
|
this.containerManager = containerManager;
|
||||||
this.modelRegistry = modelRegistry;
|
this.modelRegistry = modelRegistry;
|
||||||
this.gpuDetector = new GpuDetector();
|
this.gpuDetector = options.gpuDetector || new GpuDetector();
|
||||||
this.modelLoader = modelLoader;
|
this.modelLoader = modelLoader;
|
||||||
this.clusterCoordinator = clusterCoordinator;
|
this.clusterCoordinator = clusterCoordinator;
|
||||||
this.clusterHandler = new ClusterHandler(clusterCoordinator);
|
this.clusterHandler = options.clusterHandler || new ClusterHandler(clusterCoordinator);
|
||||||
this.router = new ApiRouter(
|
this.router = options.router || new ApiRouter(
|
||||||
containerManager,
|
containerManager,
|
||||||
modelRegistry,
|
modelRegistry,
|
||||||
this.modelLoader,
|
this.modelLoader,
|
||||||
@@ -112,6 +124,7 @@ export class ApiServer {
|
|||||||
res: http.ServerResponse,
|
res: http.ServerResponse,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
const requestId = this.ensureRequestId(req, res);
|
||||||
|
|
||||||
// Set CORS headers if enabled
|
// Set CORS headers if enabled
|
||||||
if (this.config.cors) {
|
if (this.config.cors) {
|
||||||
@@ -131,18 +144,27 @@ export class ApiServer {
|
|||||||
|
|
||||||
if (path.startsWith('/_cluster')) {
|
if (path.startsWith('/_cluster')) {
|
||||||
await this.clusterHandler.handle(req, res, path, url);
|
await this.clusterHandler.handle(req, res, path, url);
|
||||||
|
this.recordRequest(path, res.statusCode);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Health check endpoint (no auth required)
|
// Health check endpoint (no auth required)
|
||||||
if (path === '/health' || path === '/healthz') {
|
if (path === '/health' || path === '/healthz') {
|
||||||
await this.handleHealthCheck(res);
|
await this.handleHealthCheck(res);
|
||||||
|
this.recordRequest(path, res.statusCode);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metrics endpoint (no auth required)
|
// Metrics endpoint (no auth required)
|
||||||
if (path === '/metrics') {
|
if (path === '/metrics') {
|
||||||
await this.handleMetrics(res);
|
await this.handleMetrics(res);
|
||||||
|
this.recordRequest(path, res.statusCode);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.isRequestWithinRateLimit(req)) {
|
||||||
|
this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded');
|
||||||
|
this.recordRequest(path, res.statusCode);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,7 +178,8 @@ export class ApiServer {
|
|||||||
|
|
||||||
// Log request
|
// Log request
|
||||||
const duration = Date.now() - startTime;
|
const duration = Date.now() - startTime;
|
||||||
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
this.recordRequest(path, res.statusCode);
|
||||||
|
logger.dim(`[${requestId}] ${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -187,44 +210,21 @@ export class ApiServer {
|
|||||||
const gpus = await this.gpuDetector.detectGpus();
|
const gpus = await this.gpuDetector.detectGpus();
|
||||||
const models = await this.containerManager.getAllAvailableModels();
|
const models = await this.containerManager.getAllAvailableModels();
|
||||||
|
|
||||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
const response: IHealthResponse = buildHealthSnapshot({
|
||||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
statuses,
|
||||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
modelCount: models.size,
|
||||||
|
gpus,
|
||||||
// Check container health
|
startTime: this.startTime,
|
||||||
for (const [id, containerStatus] of statuses) {
|
|
||||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
|
||||||
containerHealth[id] = 'healthy';
|
|
||||||
} else {
|
|
||||||
containerHealth[id] = 'unhealthy';
|
|
||||||
status = 'degraded';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check GPU status
|
|
||||||
for (const gpu of gpus) {
|
|
||||||
gpuStatus[gpu.id] = 'available';
|
|
||||||
}
|
|
||||||
|
|
||||||
const response: IHealthResponse = {
|
|
||||||
status,
|
|
||||||
version: VERSION,
|
version: VERSION,
|
||||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
});
|
||||||
containers: statuses.size,
|
|
||||||
models: models.size,
|
|
||||||
gpus: gpus.length,
|
|
||||||
details: {
|
|
||||||
containers: containerHealth,
|
|
||||||
gpus: gpuStatus,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify(response, null, 2));
|
res.end(JSON.stringify(response, null, 2));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify({
|
res.end(JSON.stringify({
|
||||||
status: 'error',
|
status: 'error',
|
||||||
|
reasons: ['gpu_detection_failed'],
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@@ -268,6 +268,28 @@ export class ApiServer {
|
|||||||
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
||||||
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
||||||
|
|
||||||
|
for (const [path, count] of this.requestCounts.entries()) {
|
||||||
|
metrics.push(`# HELP modelgrid_api_requests_total Total API requests by path`);
|
||||||
|
metrics.push(`# TYPE modelgrid_api_requests_total counter`);
|
||||||
|
metrics.push(`modelgrid_api_requests_total{path="${this.escapeMetricLabel(path)}"} ${count}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [path, count] of this.authFailureCounts.entries()) {
|
||||||
|
metrics.push(`# HELP modelgrid_api_auth_failures_total Total authentication failures by path`);
|
||||||
|
metrics.push(`# TYPE modelgrid_api_auth_failures_total counter`);
|
||||||
|
metrics.push(
|
||||||
|
`modelgrid_api_auth_failures_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [path, count] of this.serverErrorCounts.entries()) {
|
||||||
|
metrics.push(`# HELP modelgrid_api_server_errors_total Total 5xx responses by path`);
|
||||||
|
metrics.push(`# TYPE modelgrid_api_server_errors_total counter`);
|
||||||
|
metrics.push(
|
||||||
|
`modelgrid_api_server_errors_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
||||||
res.end(metrics.join('\n') + '\n');
|
res.end(metrics.join('\n') + '\n');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -310,4 +332,73 @@ export class ApiServer {
|
|||||||
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private recordRequest(path: string, statusCode: number): void {
|
||||||
|
this.incrementMetric(this.requestCounts, path);
|
||||||
|
|
||||||
|
if (statusCode === 401) {
|
||||||
|
this.incrementMetric(this.authFailureCounts, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (statusCode >= 500) {
|
||||||
|
this.incrementMetric(this.serverErrorCounts, path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private isRequestWithinRateLimit(req: http.IncomingMessage): boolean {
|
||||||
|
const configuredLimit = this.config.rateLimit;
|
||||||
|
if (!configuredLimit || configuredLimit <= 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = this.getRateLimitKey(req);
|
||||||
|
const now = Date.now();
|
||||||
|
const windowMs = 60 * 1000;
|
||||||
|
const bucket = this.rateLimitBuckets.get(key);
|
||||||
|
|
||||||
|
if (!bucket || now - bucket.windowStart >= windowMs) {
|
||||||
|
this.rateLimitBuckets.set(key, { count: 1, windowStart: now });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bucket.count >= configuredLimit) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bucket.count += 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private getRateLimitKey(req: http.IncomingMessage): string {
|
||||||
|
if (typeof req.headers.authorization === 'string') {
|
||||||
|
const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i);
|
||||||
|
if (match) {
|
||||||
|
return `api_key:${match[1]}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return `ip:${req.socket.remoteAddress || 'unknown'}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private incrementMetric(metric: Map<string, number>, path: string): void {
|
||||||
|
metric.set(path, (metric.get(path) || 0) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ensureRequestId(req: http.IncomingMessage, res: http.ServerResponse): string {
|
||||||
|
const existing = typeof req.headers['x-request-id'] === 'string'
|
||||||
|
? req.headers['x-request-id']
|
||||||
|
: undefined;
|
||||||
|
const requestId = existing || this.generateRequestId();
|
||||||
|
req.headers['x-request-id'] = requestId;
|
||||||
|
res.setHeader('X-Request-Id', requestId);
|
||||||
|
return requestId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private generateRequestId(): string {
|
||||||
|
return `req-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private escapeMetricLabel(value: string): string {
|
||||||
|
return value.replaceAll('\\', '\\\\').replaceAll('"', '\\"');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,11 +26,8 @@ export class ConfigHandler {
|
|||||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||||
const modelConfig = {
|
const modelConfig = {
|
||||||
registryUrl: config.models.registryUrl ||
|
registryUrl: config.models.registryUrl || 'https://list.modelgrid.com/catalog/models.json',
|
||||||
(config.models as { greenlistUrl?: string }).greenlistUrl ||
|
autoDeploy: config.models.autoDeploy ?? true,
|
||||||
'https://list.modelgrid.com/catalog/models.json',
|
|
||||||
autoDeploy: config.models.autoDeploy ??
|
|
||||||
(config.models as { autoPull?: boolean }).autoPull ?? true,
|
|
||||||
defaultEngine: config.models.defaultEngine || 'vllm',
|
defaultEngine: config.models.defaultEngine || 'vllm',
|
||||||
autoLoad: config.models.autoLoad || [],
|
autoLoad: config.models.autoLoad || [],
|
||||||
};
|
};
|
||||||
@@ -218,6 +215,12 @@ export class ConfigHandler {
|
|||||||
cors: true,
|
cors: true,
|
||||||
corsOrigins: ['*'],
|
corsOrigins: ['*'],
|
||||||
},
|
},
|
||||||
|
ui: {
|
||||||
|
enabled: true,
|
||||||
|
port: 8081,
|
||||||
|
host: '0.0.0.0',
|
||||||
|
assetSource: 'bundle',
|
||||||
|
},
|
||||||
docker: {
|
docker: {
|
||||||
networkName: 'modelgrid',
|
networkName: 'modelgrid',
|
||||||
runtime: 'docker',
|
runtime: 'docker',
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import os from 'node:os';
|
|
||||||
import * as fs from 'node:fs/promises';
|
import * as fs from 'node:fs/promises';
|
||||||
import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
|
import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
|
||||||
import type {
|
import type {
|
||||||
@@ -14,9 +13,10 @@ import type {
|
|||||||
import { CLUSTER, PATHS } from '../constants.ts';
|
import { CLUSTER, PATHS } from '../constants.ts';
|
||||||
|
|
||||||
export class ClusterManager {
|
export class ClusterManager {
|
||||||
|
private initialized = false;
|
||||||
private config: IClusterConfig = {
|
private config: IClusterConfig = {
|
||||||
enabled: false,
|
enabled: false,
|
||||||
nodeName: os.hostname(),
|
nodeName: 'modelgrid-local',
|
||||||
role: 'standalone',
|
role: 'standalone',
|
||||||
bindHost: CLUSTER.DEFAULT_BIND_HOST,
|
bindHost: CLUSTER.DEFAULT_BIND_HOST,
|
||||||
gossipPort: CLUSTER.DEFAULT_GOSSIP_PORT,
|
gossipPort: CLUSTER.DEFAULT_GOSSIP_PORT,
|
||||||
@@ -64,6 +64,8 @@ export class ClusterManager {
|
|||||||
} catch {
|
} catch {
|
||||||
// No persisted control state yet.
|
// No persisted control state yet.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public configure(config: IClusterConfig): void {
|
public configure(config: IClusterConfig): void {
|
||||||
@@ -385,6 +387,10 @@ export class ClusterManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private schedulePersist(): void {
|
private schedulePersist(): void {
|
||||||
|
if (!this.initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (this.persistQueued) {
|
if (this.persistQueued) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -397,6 +403,10 @@ export class ClusterManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private scheduleControlPersist(): void {
|
private scheduleControlPersist(): void {
|
||||||
|
if (!this.initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (this.controlPersistQueued) {
|
if (this.controlPersistQueued) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,112 @@
|
|||||||
|
import * as fs from 'node:fs/promises';
|
||||||
|
import { PATHS, VERSION } from '../constants.ts';
|
||||||
|
import type { IModelGridConfig } from '../interfaces/config.ts';
|
||||||
|
import { logger } from '../logger.ts';
|
||||||
|
|
||||||
|
export class ConfigManager {
|
||||||
|
public async loadConfig(): Promise<IModelGridConfig> {
|
||||||
|
try {
|
||||||
|
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
||||||
|
return this.normalizeConfig(JSON.parse(configContent) as Partial<IModelGridConfig>);
|
||||||
|
} catch (error) {
|
||||||
|
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||||
|
throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async saveConfig(config: IModelGridConfig): Promise<void> {
|
||||||
|
await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true });
|
||||||
|
await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(config, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public normalizeConfig(config: Partial<IModelGridConfig>): IModelGridConfig {
|
||||||
|
this.logIgnoredConfigKeys(config);
|
||||||
|
|
||||||
|
const filteredContainers = (config.containers || []).filter(
|
||||||
|
(container) => (container as { type?: string }).type !== 'ollama',
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
version: config.version || VERSION,
|
||||||
|
api: {
|
||||||
|
port: config.api?.port || 8080,
|
||||||
|
host: config.api?.host || '0.0.0.0',
|
||||||
|
apiKeys: config.api?.apiKeys || [],
|
||||||
|
rateLimit: config.api?.rateLimit,
|
||||||
|
cors: config.api?.cors ?? true,
|
||||||
|
corsOrigins: config.api?.corsOrigins || ['*'],
|
||||||
|
},
|
||||||
|
ui: {
|
||||||
|
enabled: config.ui?.enabled ?? true,
|
||||||
|
port: config.ui?.port || 8081,
|
||||||
|
host: config.ui?.host || '0.0.0.0',
|
||||||
|
assetSource: config.ui?.assetSource === 'disk' ? 'disk' : 'bundle',
|
||||||
|
},
|
||||||
|
docker: {
|
||||||
|
networkName: config.docker?.networkName || 'modelgrid',
|
||||||
|
runtime: config.docker?.runtime || 'docker',
|
||||||
|
socketPath: config.docker?.socketPath,
|
||||||
|
},
|
||||||
|
gpus: {
|
||||||
|
autoDetect: config.gpus?.autoDetect ?? true,
|
||||||
|
assignments: config.gpus?.assignments || {},
|
||||||
|
},
|
||||||
|
containers: filteredContainers,
|
||||||
|
models: {
|
||||||
|
registryUrl: config.models?.registryUrl || 'https://list.modelgrid.com/catalog/models.json',
|
||||||
|
autoDeploy: config.models?.autoDeploy ?? true,
|
||||||
|
defaultEngine: 'vllm',
|
||||||
|
autoLoad: config.models?.autoLoad || [],
|
||||||
|
},
|
||||||
|
cluster: {
|
||||||
|
enabled: config.cluster?.enabled ?? false,
|
||||||
|
nodeName: config.cluster?.nodeName || 'modelgrid-local',
|
||||||
|
role: config.cluster?.role || 'standalone',
|
||||||
|
bindHost: config.cluster?.bindHost || '0.0.0.0',
|
||||||
|
gossipPort: config.cluster?.gossipPort || 7946,
|
||||||
|
sharedSecret: config.cluster?.sharedSecret,
|
||||||
|
advertiseUrl: config.cluster?.advertiseUrl,
|
||||||
|
controlPlaneUrl: config.cluster?.controlPlaneUrl,
|
||||||
|
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
|
||||||
|
seedNodes: config.cluster?.seedNodes || [],
|
||||||
|
},
|
||||||
|
checkInterval: config.checkInterval || 30000,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private logIgnoredConfigKeys(config: Partial<IModelGridConfig>): void {
|
||||||
|
const unknownTopLevelKeys = Object.keys(config).filter((key) =>
|
||||||
|
!['version', 'api', 'ui', 'docker', 'gpus', 'containers', 'models', 'cluster', 'checkInterval']
|
||||||
|
.includes(key)
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const key of unknownTopLevelKeys) {
|
||||||
|
logger.warn(`Ignoring unknown config key: ${key}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const legacyModelConfig = config.models as {
|
||||||
|
greenlistUrl?: string;
|
||||||
|
autoPull?: boolean;
|
||||||
|
defaultContainer?: string;
|
||||||
|
} | undefined;
|
||||||
|
|
||||||
|
if (legacyModelConfig?.greenlistUrl) {
|
||||||
|
logger.warn('Ignoring removed config key: models.greenlistUrl');
|
||||||
|
}
|
||||||
|
if (legacyModelConfig?.autoPull !== undefined) {
|
||||||
|
logger.warn('Ignoring removed config key: models.autoPull');
|
||||||
|
}
|
||||||
|
if (legacyModelConfig?.defaultContainer) {
|
||||||
|
logger.warn('Ignoring removed config key: models.defaultContainer');
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const container of config.containers || []) {
|
||||||
|
const containerType = (container as { type?: string }).type;
|
||||||
|
if (containerType === 'ollama') {
|
||||||
|
logger.warn('Ignoring unsupported container type: ollama');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,6 +11,7 @@ import type {
|
|||||||
TContainerType,
|
TContainerType,
|
||||||
} from '../interfaces/container.ts';
|
} from '../interfaces/container.ts';
|
||||||
import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts';
|
import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts';
|
||||||
|
import { API_SERVER } from '../constants.ts';
|
||||||
import { ContainerRuntime } from '../docker/container-runtime.ts';
|
import { ContainerRuntime } from '../docker/container-runtime.ts';
|
||||||
import { logger } from '../logger.ts';
|
import { logger } from '../logger.ts';
|
||||||
|
|
||||||
@@ -23,6 +24,13 @@ export type TModelPullProgress = (progress: {
|
|||||||
percent?: number;
|
percent?: number;
|
||||||
}) => void;
|
}) => void;
|
||||||
|
|
||||||
|
export class UpstreamTimeoutError extends Error {
|
||||||
|
constructor(message: string = 'Upstream request timed out') {
|
||||||
|
super(message);
|
||||||
|
this.name = 'UpstreamTimeoutError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for AI model containers
|
* Abstract base class for AI model containers
|
||||||
*/
|
*/
|
||||||
@@ -165,7 +173,7 @@ export abstract class BaseContainer {
|
|||||||
const url = `${endpoint}${path}`;
|
const url = `${endpoint}${path}`;
|
||||||
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeout = options.timeout || 30000;
|
const timeout = options.timeout || API_SERVER.REQUEST_TIMEOUT_MS;
|
||||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -180,6 +188,11 @@ export abstract class BaseContainer {
|
|||||||
});
|
});
|
||||||
|
|
||||||
return response;
|
return response;
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
|
throw new UpstreamTimeoutError();
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
clearTimeout(timeoutId);
|
clearTimeout(timeoutId);
|
||||||
}
|
}
|
||||||
|
|||||||
+33
-4
@@ -9,6 +9,7 @@ import { logger } from './logger.ts';
|
|||||||
import { TIMING } from './constants.ts';
|
import { TIMING } from './constants.ts';
|
||||||
import type { ModelGrid } from './modelgrid.ts';
|
import type { ModelGrid } from './modelgrid.ts';
|
||||||
import { ApiServer } from './api/server.ts';
|
import { ApiServer } from './api/server.ts';
|
||||||
|
import { UiServer } from './ui/server.ts';
|
||||||
import type { IModelGridConfig } from './interfaces/config.ts';
|
import type { IModelGridConfig } from './interfaces/config.ts';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -18,6 +19,7 @@ export class Daemon {
|
|||||||
private modelgrid: ModelGrid;
|
private modelgrid: ModelGrid;
|
||||||
private isRunning: boolean = false;
|
private isRunning: boolean = false;
|
||||||
private apiServer?: ApiServer;
|
private apiServer?: ApiServer;
|
||||||
|
private uiServer?: UiServer;
|
||||||
|
|
||||||
constructor(modelgrid: ModelGrid) {
|
constructor(modelgrid: ModelGrid) {
|
||||||
this.modelgrid = modelgrid;
|
this.modelgrid = modelgrid;
|
||||||
@@ -48,6 +50,9 @@ export class Daemon {
|
|||||||
// Start API server
|
// Start API server
|
||||||
await this.startApiServer(config);
|
await this.startApiServer(config);
|
||||||
|
|
||||||
|
// Start UI server (runs on its own port, serves the operations console)
|
||||||
|
await this.startUiServer(config);
|
||||||
|
|
||||||
// Start containers
|
// Start containers
|
||||||
await this.startContainers();
|
await this.startContainers();
|
||||||
|
|
||||||
@@ -67,10 +72,9 @@ export class Daemon {
|
|||||||
await this.monitor();
|
await this.monitor();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.isRunning = false;
|
this.isRunning = false;
|
||||||
logger.error(
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`,
|
logger.error(`Daemon failed to start: ${message}`);
|
||||||
);
|
throw error;
|
||||||
process.exit(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,6 +90,11 @@ export class Daemon {
|
|||||||
|
|
||||||
this.isRunning = false;
|
this.isRunning = false;
|
||||||
|
|
||||||
|
// Stop UI server
|
||||||
|
if (this.uiServer) {
|
||||||
|
await this.uiServer.stop();
|
||||||
|
}
|
||||||
|
|
||||||
// Stop API server
|
// Stop API server
|
||||||
if (this.apiServer) {
|
if (this.apiServer) {
|
||||||
await this.apiServer.stop();
|
await this.apiServer.stop();
|
||||||
@@ -114,6 +123,26 @@ export class Daemon {
|
|||||||
await this.apiServer.start();
|
await this.apiServer.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the UI server, if enabled.
|
||||||
|
*/
|
||||||
|
private async startUiServer(config: IModelGridConfig): Promise<void> {
|
||||||
|
if (!config.ui.enabled) {
|
||||||
|
logger.dim('UI server disabled in configuration');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info('Starting UI server...');
|
||||||
|
|
||||||
|
this.uiServer = new UiServer(
|
||||||
|
config.ui,
|
||||||
|
this.modelgrid.getContainerManager(),
|
||||||
|
this.modelgrid.getClusterManager(),
|
||||||
|
);
|
||||||
|
|
||||||
|
await this.uiServer.start();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start configured containers
|
* Start configured containers
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||||
|
import type { IContainerStatus } from '../interfaces/container.ts';
|
||||||
|
import type { IGpuInfo } from '../interfaces/gpu.ts';
|
||||||
|
|
||||||
|
export function buildHealthSnapshot(options: {
|
||||||
|
statuses: Map<string, IContainerStatus>;
|
||||||
|
modelCount: number;
|
||||||
|
gpus: IGpuInfo[];
|
||||||
|
startTime: number;
|
||||||
|
version: string;
|
||||||
|
}): IHealthResponse {
|
||||||
|
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||||
|
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||||
|
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||||
|
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||||
|
|
||||||
|
for (const [id, containerStatus] of options.statuses) {
|
||||||
|
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||||
|
containerHealth[id] = 'healthy';
|
||||||
|
} else {
|
||||||
|
containerHealth[id] = 'unhealthy';
|
||||||
|
status = 'degraded';
|
||||||
|
reasons.add('unhealthy_container');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const gpu of options.gpus) {
|
||||||
|
gpuStatus[gpu.id] = 'available';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.modelCount === 0) {
|
||||||
|
status = 'degraded';
|
||||||
|
reasons.add('no_models_available');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
status,
|
||||||
|
reasons: Array.from(reasons),
|
||||||
|
version: options.version,
|
||||||
|
uptime: Math.floor((Date.now() - options.startTime) / 1000),
|
||||||
|
containers: options.statuses.size,
|
||||||
|
models: options.modelCount,
|
||||||
|
gpus: options.gpus.length,
|
||||||
|
details: {
|
||||||
|
containers: containerHealth,
|
||||||
|
gpus: gpuStatus,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -309,6 +309,8 @@ export interface IApiError {
|
|||||||
export interface IHealthResponse {
|
export interface IHealthResponse {
|
||||||
/** Status */
|
/** Status */
|
||||||
status: 'ok' | 'degraded' | 'error';
|
status: 'ok' | 'degraded' | 'error';
|
||||||
|
/** Machine-readable reasons for degraded or error states */
|
||||||
|
reasons?: Array<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>;
|
||||||
/** Version */
|
/** Version */
|
||||||
version: string;
|
version: string;
|
||||||
/** Uptime in seconds */
|
/** Uptime in seconds */
|
||||||
|
|||||||
@@ -60,6 +60,28 @@ export interface IModelConfig {
|
|||||||
autoLoad: string[];
|
autoLoad: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Browser-based operations console (UI) configuration.
|
||||||
|
* The UI is served on its own port, distinct from the OpenAI API port,
|
||||||
|
* so that the data plane stays clean.
|
||||||
|
*/
|
||||||
|
export interface IUiConfig {
|
||||||
|
/** Whether to start the UI server alongside the API */
|
||||||
|
enabled: boolean;
|
||||||
|
/** Port to bind the UI server to (default: 8081) */
|
||||||
|
port: number;
|
||||||
|
/** Host to bind the UI server to (default: '0.0.0.0') */
|
||||||
|
host: string;
|
||||||
|
/**
|
||||||
|
* Where UI assets come from.
|
||||||
|
* - 'bundle': from the compiled-in `ts_bundled/bundle.ts` (default, required
|
||||||
|
* for `deno compile` single-binary builds)
|
||||||
|
* - 'disk': read on demand from `ts_web/` for the dev loop
|
||||||
|
* Overridden at runtime by the `UI_ASSET_SOURCE` env var.
|
||||||
|
*/
|
||||||
|
assetSource: 'bundle' | 'disk';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main ModelGrid configuration interface
|
* Main ModelGrid configuration interface
|
||||||
*/
|
*/
|
||||||
@@ -68,6 +90,8 @@ export interface IModelGridConfig {
|
|||||||
version: string;
|
version: string;
|
||||||
/** API server configuration */
|
/** API server configuration */
|
||||||
api: IApiConfig;
|
api: IApiConfig;
|
||||||
|
/** UI server configuration */
|
||||||
|
ui: IUiConfig;
|
||||||
/** Docker configuration */
|
/** Docker configuration */
|
||||||
docker: IDockerConfig;
|
docker: IDockerConfig;
|
||||||
/** GPU configuration */
|
/** GPU configuration */
|
||||||
|
|||||||
+7
-76
@@ -24,7 +24,7 @@ import { ClusterHandler } from './cli/cluster-handler.ts';
|
|||||||
import { ModelHandler } from './cli/model-handler.ts';
|
import { ModelHandler } from './cli/model-handler.ts';
|
||||||
import { ConfigHandler } from './cli/config-handler.ts';
|
import { ConfigHandler } from './cli/config-handler.ts';
|
||||||
import { ServiceHandler } from './cli/service-handler.ts';
|
import { ServiceHandler } from './cli/service-handler.ts';
|
||||||
import * as fs from 'node:fs/promises';
|
import { ConfigManager } from './config/config-manager.ts';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ModelGrid - Main application coordinator
|
* ModelGrid - Main application coordinator
|
||||||
@@ -42,6 +42,7 @@ export class ModelGrid {
|
|||||||
private clusterCoordinator?: ClusterCoordinator;
|
private clusterCoordinator?: ClusterCoordinator;
|
||||||
private modelRegistry: ModelRegistry;
|
private modelRegistry: ModelRegistry;
|
||||||
private modelLoader?: ModelLoader;
|
private modelLoader?: ModelLoader;
|
||||||
|
private configManager: ConfigManager;
|
||||||
|
|
||||||
// CLI Handlers
|
// CLI Handlers
|
||||||
private gpuHandler: GpuHandler;
|
private gpuHandler: GpuHandler;
|
||||||
@@ -60,6 +61,7 @@ export class ModelGrid {
|
|||||||
this.containerManager = new ContainerManager();
|
this.containerManager = new ContainerManager();
|
||||||
this.clusterManager = new ClusterManager();
|
this.clusterManager = new ClusterManager();
|
||||||
this.modelRegistry = new ModelRegistry();
|
this.modelRegistry = new ModelRegistry();
|
||||||
|
this.configManager = new ConfigManager();
|
||||||
this.systemd = new Systemd();
|
this.systemd = new Systemd();
|
||||||
this.daemon = new Daemon(this);
|
this.daemon = new Daemon(this);
|
||||||
|
|
||||||
@@ -80,23 +82,8 @@ export class ModelGrid {
|
|||||||
* Load configuration from file
|
* Load configuration from file
|
||||||
*/
|
*/
|
||||||
public async loadConfig(): Promise<void> {
|
public async loadConfig(): Promise<void> {
|
||||||
try {
|
this.config = await this.configManager.loadConfig();
|
||||||
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
||||||
this.config = this.normalizeConfig(
|
|
||||||
JSON.parse(configContent) as Partial<IModelGridConfig> & {
|
|
||||||
models?: {
|
|
||||||
greenlistUrl?: string;
|
|
||||||
autoPull?: boolean;
|
|
||||||
} & Partial<IModelGridConfig['models']>;
|
|
||||||
},
|
|
||||||
);
|
|
||||||
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
|
||||||
} catch (error) {
|
|
||||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
||||||
throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`);
|
|
||||||
}
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -107,8 +94,7 @@ export class ModelGrid {
|
|||||||
throw new Error('No configuration to save');
|
throw new Error('No configuration to save');
|
||||||
}
|
}
|
||||||
|
|
||||||
await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true });
|
await this.configManager.saveConfig(this.config);
|
||||||
await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(this.config, null, 2));
|
|
||||||
logger.dim(`Configuration saved to ${PATHS.CONFIG_FILE}`);
|
logger.dim(`Configuration saved to ${PATHS.CONFIG_FILE}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -270,7 +256,7 @@ export class ModelGrid {
|
|||||||
|
|
||||||
// Initialize containers from config
|
// Initialize containers from config
|
||||||
for (const containerConfig of this.config.containers) {
|
for (const containerConfig of this.config.containers) {
|
||||||
await this.containerManager.addContainer(containerConfig);
|
this.containerManager.addContainer(containerConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize model registry
|
// Initialize model registry
|
||||||
@@ -294,61 +280,6 @@ export class ModelGrid {
|
|||||||
logger.success('ModelGrid initialized');
|
logger.success('ModelGrid initialized');
|
||||||
}
|
}
|
||||||
|
|
||||||
private normalizeConfig(
|
|
||||||
config: Partial<IModelGridConfig> & {
|
|
||||||
models?: {
|
|
||||||
greenlistUrl?: string;
|
|
||||||
autoPull?: boolean;
|
|
||||||
} & Partial<IModelGridConfig['models']>;
|
|
||||||
},
|
|
||||||
): IModelGridConfig {
|
|
||||||
const filteredContainers = (config.containers || []).filter(
|
|
||||||
(container) => (container as { type?: string }).type !== 'ollama',
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
version: config.version || VERSION,
|
|
||||||
api: {
|
|
||||||
port: config.api?.port || 8080,
|
|
||||||
host: config.api?.host || '0.0.0.0',
|
|
||||||
apiKeys: config.api?.apiKeys || [],
|
|
||||||
rateLimit: config.api?.rateLimit,
|
|
||||||
cors: config.api?.cors ?? true,
|
|
||||||
corsOrigins: config.api?.corsOrigins || ['*'],
|
|
||||||
},
|
|
||||||
docker: {
|
|
||||||
networkName: config.docker?.networkName || 'modelgrid',
|
|
||||||
runtime: config.docker?.runtime || 'docker',
|
|
||||||
socketPath: config.docker?.socketPath,
|
|
||||||
},
|
|
||||||
gpus: {
|
|
||||||
autoDetect: config.gpus?.autoDetect ?? true,
|
|
||||||
assignments: config.gpus?.assignments || {},
|
|
||||||
},
|
|
||||||
containers: filteredContainers,
|
|
||||||
models: {
|
|
||||||
registryUrl: config.models?.registryUrl || config.models?.greenlistUrl ||
|
|
||||||
'https://list.modelgrid.com/catalog/models.json',
|
|
||||||
autoDeploy: config.models?.autoDeploy ?? config.models?.autoPull ?? true,
|
|
||||||
defaultEngine: 'vllm',
|
|
||||||
autoLoad: config.models?.autoLoad || [],
|
|
||||||
},
|
|
||||||
cluster: {
|
|
||||||
enabled: config.cluster?.enabled ?? false,
|
|
||||||
nodeName: config.cluster?.nodeName || 'modelgrid-local',
|
|
||||||
role: config.cluster?.role || 'standalone',
|
|
||||||
bindHost: config.cluster?.bindHost || '0.0.0.0',
|
|
||||||
gossipPort: config.cluster?.gossipPort || 7946,
|
|
||||||
sharedSecret: config.cluster?.sharedSecret,
|
|
||||||
advertiseUrl: config.cluster?.advertiseUrl,
|
|
||||||
controlPlaneUrl: config.cluster?.controlPlaneUrl,
|
|
||||||
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
|
|
||||||
seedNodes: config.cluster?.seedNodes || [],
|
|
||||||
},
|
|
||||||
checkInterval: config.checkInterval || 30000,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shutdown the ModelGrid system
|
* Shutdown the ModelGrid system
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
import * as fs from 'node:fs/promises';
|
import * as fs from 'node:fs/promises';
|
||||||
import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts';
|
import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts';
|
||||||
import { MODEL_REGISTRY, TIMING } from '../constants.ts';
|
import { API_SERVER, MODEL_REGISTRY, TIMING } from '../constants.ts';
|
||||||
import { logger } from '../logger.ts';
|
import { logger } from '../logger.ts';
|
||||||
|
|
||||||
export class ModelRegistry {
|
export class ModelRegistry {
|
||||||
@@ -167,7 +167,7 @@ export class ModelRegistry {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeout = setTimeout(() => controller.abort(), 30000);
|
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(source, {
|
const response = await fetch(source, {
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
export { UiServer } from './server.ts';
|
||||||
+317
@@ -0,0 +1,317 @@
|
|||||||
|
/**
|
||||||
|
* UI Server
|
||||||
|
*
|
||||||
|
* Serves the ModelGrid operations console on its own port, separate from
|
||||||
|
* the OpenAI-compatible API. Assets come from one of two sources:
|
||||||
|
* - 'disk': read on demand from `ts_web/` (dev loop, hot edits)
|
||||||
|
* - 'bundle': from the generated `ts_bundled/bundle.ts` module
|
||||||
|
* (default, required for `deno compile` single-binary builds)
|
||||||
|
*
|
||||||
|
* Plus a single JSON endpoint `/_ui/overview` that the SPA calls to render
|
||||||
|
* the Overview view without cross-origin fetches into the API server.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as http from 'node:http';
|
||||||
|
import * as fs from 'node:fs/promises';
|
||||||
|
import { dirname, extname, join, resolve } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import type { IUiConfig } from '../interfaces/config.ts';
|
||||||
|
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||||
|
import { logger } from '../logger.ts';
|
||||||
|
import { VERSION } from '../constants.ts';
|
||||||
|
import type { ContainerManager } from '../containers/container-manager.ts';
|
||||||
|
import type { ClusterManager } from '../cluster/cluster-manager.ts';
|
||||||
|
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||||
|
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||||
|
|
||||||
|
interface IBundledFile {
|
||||||
|
path: string;
|
||||||
|
contentBase64: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IAssetEntry {
|
||||||
|
bytes: Uint8Array;
|
||||||
|
contentType: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = dirname(__filename);
|
||||||
|
const REPO_ROOT = resolve(__dirname, '..', '..');
|
||||||
|
const TS_WEB_DIR = join(REPO_ROOT, 'ts_web');
|
||||||
|
|
||||||
|
export class UiServer {
|
||||||
|
private server?: http.Server;
|
||||||
|
private config: IUiConfig;
|
||||||
|
private containerManager: ContainerManager;
|
||||||
|
private clusterManager: ClusterManager;
|
||||||
|
private gpuDetector: GpuDetector;
|
||||||
|
private bundleMap: Map<string, IAssetEntry> | null = null;
|
||||||
|
private activeAssetSource: 'disk' | 'bundle' = 'bundle';
|
||||||
|
private startTime = 0;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
config: IUiConfig,
|
||||||
|
containerManager: ContainerManager,
|
||||||
|
clusterManager: ClusterManager,
|
||||||
|
) {
|
||||||
|
this.config = config;
|
||||||
|
this.containerManager = containerManager;
|
||||||
|
this.clusterManager = clusterManager;
|
||||||
|
this.gpuDetector = new GpuDetector();
|
||||||
|
}
|
||||||
|
|
||||||
|
public async start(): Promise<void> {
|
||||||
|
if (this.server) {
|
||||||
|
logger.warn('UI server is already running');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.activeAssetSource = this.resolveAssetSource();
|
||||||
|
if (this.activeAssetSource === 'bundle') {
|
||||||
|
this.bundleMap = await this.loadBundleMap();
|
||||||
|
if (!this.bundleMap) {
|
||||||
|
logger.warn(
|
||||||
|
'UI bundle not found (ts_bundled/bundle.ts missing). ' +
|
||||||
|
'Falling back to disk mode — run `deno task bundle:ui` before `deno compile`.',
|
||||||
|
);
|
||||||
|
this.activeAssetSource = 'disk';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.startTime = Date.now();
|
||||||
|
|
||||||
|
this.server = http.createServer(async (req, res) => {
|
||||||
|
try {
|
||||||
|
await this.handleRequest(req, res);
|
||||||
|
} catch (err) {
|
||||||
|
logger.error(`UI request error: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Internal server error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
this.server!.listen(this.config.port, this.config.host, () => {
|
||||||
|
logger.success(
|
||||||
|
`UI server started on ${this.config.host}:${this.config.port} ` +
|
||||||
|
`(asset source: ${this.activeAssetSource})`,
|
||||||
|
);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
this.server!.on('error', (error) => {
|
||||||
|
logger.error(`UI server error: ${error.message}`);
|
||||||
|
reject(error);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public async stop(): Promise<void> {
|
||||||
|
if (!this.server) return;
|
||||||
|
await new Promise<void>((resolve) => {
|
||||||
|
this.server!.close(() => resolve());
|
||||||
|
});
|
||||||
|
this.server = undefined;
|
||||||
|
logger.log('UI server stopped');
|
||||||
|
}
|
||||||
|
|
||||||
|
public getInfo(): { running: boolean; host: string; port: number; assetSource: string } {
|
||||||
|
return {
|
||||||
|
running: !!this.server,
|
||||||
|
host: this.config.host,
|
||||||
|
port: this.config.port,
|
||||||
|
assetSource: this.activeAssetSource,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async handleRequest(
|
||||||
|
req: http.IncomingMessage,
|
||||||
|
res: http.ServerResponse,
|
||||||
|
): Promise<void> {
|
||||||
|
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||||
|
const path = url.pathname;
|
||||||
|
|
||||||
|
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
||||||
|
res.writeHead(405, { 'Content-Type': 'text/plain', 'Allow': 'GET, HEAD' });
|
||||||
|
res.end('Method Not Allowed');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (path === '/_ui/overview') {
|
||||||
|
await this.handleOverview(res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.serveAsset(path, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async handleOverview(res: http.ServerResponse): Promise<void> {
|
||||||
|
const statuses = await this.containerManager.getAllStatus();
|
||||||
|
const models = await this.containerManager.getAllAvailableModels();
|
||||||
|
const gpus = await this.gpuDetector.detectGpus();
|
||||||
|
|
||||||
|
const health: IHealthResponse = buildHealthSnapshot({
|
||||||
|
statuses,
|
||||||
|
modelCount: models.size,
|
||||||
|
gpus,
|
||||||
|
startTime: this.startTime,
|
||||||
|
version: VERSION,
|
||||||
|
});
|
||||||
|
|
||||||
|
const clusterConfig = this.clusterManager.getConfig();
|
||||||
|
|
||||||
|
const body = {
|
||||||
|
health,
|
||||||
|
node: {
|
||||||
|
name: clusterConfig?.nodeName ?? 'modelgrid-local',
|
||||||
|
role: clusterConfig?.role ?? 'standalone',
|
||||||
|
version: VERSION,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
res.writeHead(200, {
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
|
'Cache-Control': 'no-store',
|
||||||
|
});
|
||||||
|
res.end(JSON.stringify(body));
|
||||||
|
}
|
||||||
|
|
||||||
|
private async serveAsset(path: string, res: http.ServerResponse): Promise<void> {
|
||||||
|
const normalized = path === '/' ? '/index.html' : path;
|
||||||
|
|
||||||
|
if (this.activeAssetSource === 'bundle' && this.bundleMap) {
|
||||||
|
const hit = this.bundleMap.get(normalized);
|
||||||
|
if (hit) {
|
||||||
|
this.writeAsset(res, hit);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// SPA fallback: any unknown non-asset path gets index.html.
|
||||||
|
if (!hasKnownAssetExtension(normalized)) {
|
||||||
|
const shell = this.bundleMap.get('/index.html');
|
||||||
|
if (shell) {
|
||||||
|
this.writeAsset(res, shell);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Not Found');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disk mode
|
||||||
|
const safe = normalizePath(normalized);
|
||||||
|
if (!safe) {
|
||||||
|
res.writeHead(400, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Bad Request');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const full = join(TS_WEB_DIR, safe);
|
||||||
|
try {
|
||||||
|
const bytes = await fs.readFile(full);
|
||||||
|
this.writeAsset(res, {
|
||||||
|
bytes: new Uint8Array(bytes),
|
||||||
|
contentType: contentTypeForPath(safe),
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
} catch (err) {
|
||||||
|
if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||||
|
if (!hasKnownAssetExtension(safe)) {
|
||||||
|
try {
|
||||||
|
const shell = await fs.readFile(join(TS_WEB_DIR, 'index.html'));
|
||||||
|
this.writeAsset(res, {
|
||||||
|
bytes: new Uint8Array(shell),
|
||||||
|
contentType: 'text/html; charset=utf-8',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
} catch {
|
||||||
|
// fall through to 404
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Not Found');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private writeAsset(res: http.ServerResponse, asset: IAssetEntry): void {
|
||||||
|
res.writeHead(200, {
|
||||||
|
'Content-Type': asset.contentType,
|
||||||
|
'Content-Length': asset.bytes.byteLength,
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
});
|
||||||
|
res.end(asset.bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private resolveAssetSource(): 'disk' | 'bundle' {
|
||||||
|
const envOverride = typeof Deno !== 'undefined' ? Deno.env.get('UI_ASSET_SOURCE') : undefined;
|
||||||
|
const picked = (envOverride || this.config.assetSource || 'bundle').toLowerCase();
|
||||||
|
if (picked === 'disk' || picked === 'bundle') return picked;
|
||||||
|
logger.warn(`Unknown UI_ASSET_SOURCE "${picked}", defaulting to bundle`);
|
||||||
|
return 'bundle';
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadBundleMap(): Promise<Map<string, IAssetEntry> | null> {
|
||||||
|
try {
|
||||||
|
// The bundle module is generated by `deno task bundle:ui`.
|
||||||
|
// @ts-ignore — generated file may not exist until the bundle task runs.
|
||||||
|
const mod = await import('../../ts_bundled/bundle.ts');
|
||||||
|
const files: IBundledFile[] = mod.files ?? [];
|
||||||
|
const map = new Map<string, IAssetEntry>();
|
||||||
|
for (const file of files) {
|
||||||
|
map.set(`/${file.path}`, {
|
||||||
|
bytes: decodeBase64(file.contentBase64),
|
||||||
|
contentType: contentTypeForPath(file.path),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeBase64(input: string): Uint8Array {
|
||||||
|
const binary = atob(input);
|
||||||
|
const bytes = new Uint8Array(binary.length);
|
||||||
|
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizePath(path: string): string | null {
|
||||||
|
// Strip leading slashes, reject traversal.
|
||||||
|
const stripped = path.replace(/^\/+/, '');
|
||||||
|
if (stripped.includes('..')) return null;
|
||||||
|
return stripped;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasKnownAssetExtension(path: string): boolean {
|
||||||
|
return extname(path) !== '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function contentTypeForPath(path: string): string {
|
||||||
|
const ext = extname(path).toLowerCase().replace(/^\./, '');
|
||||||
|
const types: Record<string, string> = {
|
||||||
|
html: 'text/html; charset=utf-8',
|
||||||
|
js: 'application/javascript; charset=utf-8',
|
||||||
|
mjs: 'application/javascript; charset=utf-8',
|
||||||
|
css: 'text/css; charset=utf-8',
|
||||||
|
json: 'application/json; charset=utf-8',
|
||||||
|
map: 'application/json; charset=utf-8',
|
||||||
|
svg: 'image/svg+xml',
|
||||||
|
png: 'image/png',
|
||||||
|
jpg: 'image/jpeg',
|
||||||
|
jpeg: 'image/jpeg',
|
||||||
|
gif: 'image/gif',
|
||||||
|
ico: 'image/x-icon',
|
||||||
|
webp: 'image/webp',
|
||||||
|
woff: 'font/woff',
|
||||||
|
woff2: 'font/woff2',
|
||||||
|
ttf: 'font/ttf',
|
||||||
|
otf: 'font/otf',
|
||||||
|
txt: 'text/plain; charset=utf-8',
|
||||||
|
};
|
||||||
|
return types[ext] || 'application/octet-stream';
|
||||||
|
}
|
||||||
+187
@@ -0,0 +1,187 @@
|
|||||||
|
:root {
|
||||||
|
color-scheme: dark;
|
||||||
|
--bg: #000;
|
||||||
|
--bg-1: #0b0b0d;
|
||||||
|
--bg-2: #14141a;
|
||||||
|
--fg: #e6e6ea;
|
||||||
|
--fg-dim: #8a8a92;
|
||||||
|
--border: #23232b;
|
||||||
|
--accent: #4357d9;
|
||||||
|
--ok: #2ecc71;
|
||||||
|
--warn: #f1c40f;
|
||||||
|
--err: #e74c3c;
|
||||||
|
}
|
||||||
|
|
||||||
|
* { box-sizing: border-box; }
|
||||||
|
|
||||||
|
html, body {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
height: 100%;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--fg);
|
||||||
|
font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 220px 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
a { color: inherit; text-decoration: none; }
|
||||||
|
|
||||||
|
.dim { color: var(--fg-dim); }
|
||||||
|
|
||||||
|
.nav {
|
||||||
|
background: var(--bg-1);
|
||||||
|
border-right: 1px solid var(--border);
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100vh;
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-brand {
|
||||||
|
padding: 20px 16px 12px;
|
||||||
|
font-size: 15px;
|
||||||
|
font-weight: 600;
|
||||||
|
letter-spacing: 0.02em;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-items {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
padding: 8px 0;
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-items a {
|
||||||
|
padding: 8px 16px;
|
||||||
|
color: var(--fg-dim);
|
||||||
|
border-left: 2px solid transparent;
|
||||||
|
transition: color 0.1s, background 0.1s, border-color 0.1s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-items a:hover {
|
||||||
|
color: var(--fg);
|
||||||
|
background: var(--bg-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-items a.active {
|
||||||
|
color: var(--fg);
|
||||||
|
background: var(--bg-2);
|
||||||
|
border-left-color: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-footer {
|
||||||
|
padding: 12px 16px;
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
main {
|
||||||
|
padding: 24px 32px;
|
||||||
|
overflow-y: auto;
|
||||||
|
height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
margin: 0 0 20px;
|
||||||
|
letter-spacing: 0.01em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||||
|
gap: 12px;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card {
|
||||||
|
background: var(--bg-1);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-label {
|
||||||
|
font-size: 11px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.08em;
|
||||||
|
color: var(--fg-dim);
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-value {
|
||||||
|
font-size: 22px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-sub {
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--fg-dim);
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-dot {
|
||||||
|
display: inline-block;
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
border-radius: 50%;
|
||||||
|
margin-right: 6px;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
.status-dot.ok { background: var(--ok); }
|
||||||
|
.status-dot.warn{ background: var(--warn); }
|
||||||
|
.status-dot.err { background: var(--err); }
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
background: var(--bg-1);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
th, td {
|
||||||
|
text-align: left;
|
||||||
|
padding: 10px 14px;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
font-weight: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
color: var(--fg-dim);
|
||||||
|
font-size: 11px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.08em;
|
||||||
|
background: var(--bg-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
tr:last-child td { border-bottom: none; }
|
||||||
|
|
||||||
|
.placeholder {
|
||||||
|
padding: 40px;
|
||||||
|
text-align: center;
|
||||||
|
color: var(--fg-dim);
|
||||||
|
background: var(--bg-1);
|
||||||
|
border: 1px dashed var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.error {
|
||||||
|
background: var(--bg-1);
|
||||||
|
border: 1px solid var(--err);
|
||||||
|
color: var(--err);
|
||||||
|
padding: 12px 16px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
+161
@@ -0,0 +1,161 @@
|
|||||||
|
// ModelGrid UI — vanilla client. Bundled into ts_bundled/bundle.ts for
|
||||||
|
// the single-binary build, or served from disk in dev mode.
|
||||||
|
|
||||||
|
const VIEWS = [
|
||||||
|
'overview',
|
||||||
|
'cluster',
|
||||||
|
'gpus',
|
||||||
|
'deployments',
|
||||||
|
'models',
|
||||||
|
'access',
|
||||||
|
'logs',
|
||||||
|
'metrics',
|
||||||
|
'settings',
|
||||||
|
];
|
||||||
|
|
||||||
|
const view = document.getElementById('view');
|
||||||
|
const nodeIdent = document.getElementById('node-ident');
|
||||||
|
const nodeVersion = document.getElementById('node-version');
|
||||||
|
|
||||||
|
function parseHash() {
|
||||||
|
const raw = location.hash.replace(/^#\/?/, '');
|
||||||
|
const [top = 'overview'] = raw.split('/').filter(Boolean);
|
||||||
|
return VIEWS.includes(top) ? top : 'overview';
|
||||||
|
}
|
||||||
|
|
||||||
|
function setActive(current) {
|
||||||
|
document.querySelectorAll('.nav-items a').forEach((el) => {
|
||||||
|
el.classList.toggle('active', el.dataset.view === current);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchHealth() {
|
||||||
|
const res = await fetch('/_ui/overview', { headers: { accept: 'application/json' } });
|
||||||
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
function statusDot(status) {
|
||||||
|
const ok = status === 'ok';
|
||||||
|
const warn = status === 'degraded';
|
||||||
|
const cls = ok ? 'ok' : warn ? 'warn' : 'err';
|
||||||
|
return `<span class="status-dot ${cls}"></span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function renderOverview() {
|
||||||
|
view.innerHTML = `<h1>Overview</h1><div id="ovstate" class="placeholder">Loading…</div>`;
|
||||||
|
try {
|
||||||
|
const data = await fetchHealth();
|
||||||
|
const health = data.health;
|
||||||
|
const containers = health.containers || 0;
|
||||||
|
const models = health.models || 0;
|
||||||
|
const gpus = health.gpus || 0;
|
||||||
|
const uptime = health.uptime || 0;
|
||||||
|
const detailEntries = Object.entries(health.details?.containers || {});
|
||||||
|
const runningContainers = detailEntries.filter(([, v]) => v === 'healthy').length;
|
||||||
|
|
||||||
|
view.innerHTML = `
|
||||||
|
<h1>Overview</h1>
|
||||||
|
<div class="cards">
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-label">Fleet</div>
|
||||||
|
<div class="card-value">${statusDot(health.status)}${health.status}</div>
|
||||||
|
<div class="card-sub">v${health.version} · up ${formatUptime(uptime)}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-label">Deployments</div>
|
||||||
|
<div class="card-value">${runningContainers} / ${containers}</div>
|
||||||
|
<div class="card-sub">${containers === 0 ? 'no deployments' : `${runningContainers} healthy`}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-label">GPUs</div>
|
||||||
|
<div class="card-value">${gpus}</div>
|
||||||
|
<div class="card-sub">${gpus === 0 ? 'no GPU detected' : 'detected'}</div>
|
||||||
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-label">Models</div>
|
||||||
|
<div class="card-value">${models}</div>
|
||||||
|
<div class="card-sub">served via OpenAI API</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<h1 style="margin-top:24px">Deployments</h1>
|
||||||
|
${renderContainerTable(detailEntries)}
|
||||||
|
`;
|
||||||
|
if (data.node) {
|
||||||
|
nodeIdent.textContent = `${data.node.name} · ${data.node.role}`;
|
||||||
|
nodeVersion.textContent = `v${data.node.version}`;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
view.innerHTML = `<h1>Overview</h1><div class="error">Failed to load: ${escapeHtml(String(err.message || err))}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderContainerTable(entries) {
|
||||||
|
if (entries.length === 0) {
|
||||||
|
return `<div class="placeholder">No deployments configured. Add one with <code>modelgrid run <model></code>.</div>`;
|
||||||
|
}
|
||||||
|
const rows = entries.map(([id, state]) => `
|
||||||
|
<tr>
|
||||||
|
<td>${escapeHtml(id)}</td>
|
||||||
|
<td>${statusDot(state === 'healthy' ? 'ok' : 'err')}${escapeHtml(state)}</td>
|
||||||
|
</tr>
|
||||||
|
`).join('');
|
||||||
|
return `<table><thead><tr><th>Container</th><th>Health</th></tr></thead><tbody>${rows}</tbody></table>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderPlaceholder(name) {
|
||||||
|
view.innerHTML = `
|
||||||
|
<h1>${name}</h1>
|
||||||
|
<div class="placeholder">
|
||||||
|
This view is part of the UI concept (see <code>readme.ui.md</code>) but is not implemented yet.
|
||||||
|
Use the CLI for now: <code>modelgrid ${cliHint(name)}</code>.
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function cliHint(view) {
|
||||||
|
const map = {
|
||||||
|
Cluster: 'cluster status',
|
||||||
|
GPUs: 'gpu list',
|
||||||
|
Deployments: 'ps',
|
||||||
|
Models: 'model list',
|
||||||
|
Access: 'config apikey list',
|
||||||
|
Logs: 'service logs',
|
||||||
|
Metrics: 'service status',
|
||||||
|
Settings: 'config show',
|
||||||
|
};
|
||||||
|
return map[view] || '--help';
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatUptime(s) {
|
||||||
|
if (s < 60) return `${s}s`;
|
||||||
|
if (s < 3600) return `${Math.floor(s / 60)}m`;
|
||||||
|
if (s < 86400) return `${Math.floor(s / 3600)}h`;
|
||||||
|
return `${Math.floor(s / 86400)}d`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeHtml(s) {
|
||||||
|
return s.replace(/[&<>"']/g, (c) => ({
|
||||||
|
'&': '&', '<': '<', '>': '>', '"': '"', "'": ''',
|
||||||
|
}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
function route() {
|
||||||
|
const current = parseHash();
|
||||||
|
setActive(current);
|
||||||
|
switch (current) {
|
||||||
|
case 'overview': return renderOverview();
|
||||||
|
case 'cluster': return renderPlaceholder('Cluster');
|
||||||
|
case 'gpus': return renderPlaceholder('GPUs');
|
||||||
|
case 'deployments': return renderPlaceholder('Deployments');
|
||||||
|
case 'models': return renderPlaceholder('Models');
|
||||||
|
case 'access': return renderPlaceholder('Access');
|
||||||
|
case 'logs': return renderPlaceholder('Logs');
|
||||||
|
case 'metrics': return renderPlaceholder('Metrics');
|
||||||
|
case 'settings': return renderPlaceholder('Settings');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener('hashchange', route);
|
||||||
|
if (!location.hash) location.hash = '#/overview';
|
||||||
|
route();
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
<meta name="theme-color" content="#000000">
|
||||||
|
<title>ModelGrid</title>
|
||||||
|
<link rel="stylesheet" href="/app.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<aside class="nav">
|
||||||
|
<div class="nav-brand">ModelGrid</div>
|
||||||
|
<nav class="nav-items">
|
||||||
|
<a href="#/overview" data-view="overview">Overview</a>
|
||||||
|
<a href="#/cluster" data-view="cluster">Cluster</a>
|
||||||
|
<a href="#/gpus" data-view="gpus">GPUs</a>
|
||||||
|
<a href="#/deployments" data-view="deployments">Deployments</a>
|
||||||
|
<a href="#/models" data-view="models">Models</a>
|
||||||
|
<a href="#/access" data-view="access">Access</a>
|
||||||
|
<a href="#/logs" data-view="logs">Logs</a>
|
||||||
|
<a href="#/metrics" data-view="metrics">Metrics</a>
|
||||||
|
<a href="#/settings" data-view="settings">Settings</a>
|
||||||
|
</nav>
|
||||||
|
<div class="nav-footer">
|
||||||
|
<div id="node-ident">—</div>
|
||||||
|
<div id="node-version" class="dim">—</div>
|
||||||
|
</div>
|
||||||
|
</aside>
|
||||||
|
<main id="view"></main>
|
||||||
|
<script src="/app.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user