Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4af9d3de69 | |||
| 405fff91af | |||
| 9022c8dbf3 | |||
| 703cceb512 | |||
| 9d925f9401 | |||
| fe4fdb32d7 | |||
| d6b4c0def1 | |||
| 9608540792 | |||
| 3762fc661e | |||
| 6541b2db1c | |||
| da7375c889 | |||
| 44eb9b9173 | |||
| 1f24df0d80 | |||
| c95961d596 | |||
| 0921dfbe5e | |||
| 5172002ec0 | |||
| 58eabba84d | |||
| 5e8ce6690d | |||
| 0ea98caed6 | |||
| 871afedbb7 | |||
| 1f6cf51794 | |||
| 054875abb5 | |||
| 3e341bbfda | |||
| 9f7308498c | |||
| 952bf394d3 | |||
| 3b2a16b151 | |||
| 9c9c0c90ae | |||
| 24bb6b3058 | |||
| cec102e54e |
@@ -1,6 +1,9 @@
|
||||
# Compiled Deno binaries (built by scripts/compile-all.sh)
|
||||
dist/binaries/
|
||||
|
||||
# Generated UI bundle (built by scripts/bundle-ui.ts)
|
||||
ts_bundled/
|
||||
|
||||
# Deno cache and lock file
|
||||
.deno/
|
||||
deno.lock
|
||||
|
||||
@@ -4,9 +4,10 @@
|
||||
"exports": "./mod.ts",
|
||||
"nodeModulesDir": "auto",
|
||||
"tasks": {
|
||||
"dev": "deno run --allow-all mod.ts",
|
||||
"dev": "UI_ASSET_SOURCE=disk deno run --allow-all mod.ts",
|
||||
"bundle:ui": "deno run --allow-read --allow-write scripts/bundle-ui.ts",
|
||||
"compile": "deno task compile:all",
|
||||
"compile:all": "bash scripts/compile-all.sh",
|
||||
"compile:all": "deno task bundle:ui && bash scripts/compile-all.sh",
|
||||
"test": "deno test --allow-all test/",
|
||||
"test:watch": "deno test --allow-all --watch test/",
|
||||
"check": "deno check mod.ts",
|
||||
|
||||
+3
-2
@@ -37,8 +37,9 @@
|
||||
"scripts": {
|
||||
"postinstall": "node scripts/install-binary.js",
|
||||
"prepublishOnly": "echo 'Publishing ModelGrid binaries to npm...'",
|
||||
"test": "echo 'Tests are run with Deno: deno task test'",
|
||||
"build": "echo 'no build needed'"
|
||||
"test": "deno task test",
|
||||
"check": "deno task check",
|
||||
"build": "deno task bundle:ui"
|
||||
},
|
||||
"files": [
|
||||
"bin/",
|
||||
|
||||
+15
-7
@@ -3,7 +3,7 @@
|
||||
## Project Overview
|
||||
|
||||
ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers
|
||||
(Ollama, vLLM, TGI) with an OpenAI-compatible API interface.
|
||||
(vLLM, TGI) with an OpenAI-compatible API interface.
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -84,13 +84,12 @@ ts/
|
||||
|
||||
### Greenlit Model System
|
||||
|
||||
- Only pre-approved models can be auto-pulled for security
|
||||
- Greenlist fetched from remote URL (configurable)
|
||||
- Only catalog-listed models can be auto-deployed on demand
|
||||
- Catalog fetched from a remote URL (configurable)
|
||||
- VRAM requirements checked before loading
|
||||
|
||||
### Container Types
|
||||
|
||||
- **Ollama**: Easy to use, native API converted to OpenAI format
|
||||
- **vLLM**: High performance, natively OpenAI-compatible
|
||||
- **TGI**: HuggingFace Text Generation Inference
|
||||
|
||||
@@ -111,12 +110,20 @@ interface IModelGridConfig {
|
||||
port: number; // Default: 8080
|
||||
host: string; // Default: '0.0.0.0'
|
||||
apiKeys: string[]; // Valid API keys
|
||||
rateLimit?: number;
|
||||
cors: boolean;
|
||||
corsOrigins: string[];
|
||||
};
|
||||
ui: {
|
||||
enabled: boolean;
|
||||
port: number; // Default: 8081
|
||||
host: string; // Default: '0.0.0.0'
|
||||
assetSource: 'bundle' | 'disk';
|
||||
};
|
||||
docker: {
|
||||
networkName: string; // Default: 'modelgrid'
|
||||
runtime: 'docker' | 'podman';
|
||||
socketPath?: string;
|
||||
};
|
||||
gpus: {
|
||||
autoDetect: boolean;
|
||||
@@ -124,11 +131,12 @@ interface IModelGridConfig {
|
||||
};
|
||||
containers: IContainerConfig[];
|
||||
models: {
|
||||
greenlistUrl: string;
|
||||
autoPull: boolean;
|
||||
defaultContainer: string;
|
||||
registryUrl: string;
|
||||
autoDeploy: boolean;
|
||||
defaultEngine: 'vllm';
|
||||
autoLoad: string[];
|
||||
};
|
||||
cluster: IClusterConfig;
|
||||
checkInterval: number;
|
||||
}
|
||||
```
|
||||
|
||||
@@ -318,15 +318,15 @@ modelgrid cluster activate NODE # Mark a node active again
|
||||
|
||||
High-performance inference with PagedAttention and continuous batching.
|
||||
|
||||
```bash
|
||||
```jsonc
|
||||
{
|
||||
"id": "vllm-1",
|
||||
"type": "vllm",
|
||||
"name": "vLLM Server",
|
||||
"gpuIds": ["nvidia-0", "nvidia-1"], # Tensor parallelism
|
||||
"gpuIds": ["nvidia-0", "nvidia-1"], // Tensor parallelism
|
||||
"port": 8000,
|
||||
"env": {
|
||||
"HF_TOKEN": "your-huggingface-token" # For gated models
|
||||
"HF_TOKEN": "your-huggingface-token" // For gated models
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -555,6 +555,12 @@ deno run --allow-all mod.ts help
|
||||
# Run tests
|
||||
deno task test
|
||||
|
||||
# Run the main regression suite used during focused changes
|
||||
deno test --allow-all test/test.ts
|
||||
|
||||
# Run the full suite, including focused seam tests
|
||||
deno test --allow-all test/
|
||||
|
||||
# Type check
|
||||
deno task check
|
||||
|
||||
@@ -595,6 +601,14 @@ modelgrid/
|
||||
└── bin/ # npm wrapper
|
||||
```
|
||||
|
||||
Focused seam tests live alongside `test/test.ts`:
|
||||
|
||||
- `test/api-router_test.ts` covers routing, auth failures, and request-size handling
|
||||
- `test/api-server_test.ts` covers health, metrics, and authenticated model listing
|
||||
- `test/modelgrid-config_test.ts` covers config normalization and ignored-key warnings
|
||||
- `test/model-registry_test.ts` covers fallback and file-backed catalog loading
|
||||
- `test/cluster-manager-persistence_test.ts` covers persisted cluster state loading and pruning
|
||||
|
||||
## 🗑️ Uninstallation
|
||||
|
||||
```bash
|
||||
|
||||
+17
-7
@@ -26,9 +26,9 @@
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Container Runtime │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Ollama │ │ vLLM │ │ TGI │ │ Custom │ │
|
||||
│ │Container │ │Container │ │Container │ │Container │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
|
||||
│ │ vLLM │ │ TGI │ │ Custom │ │
|
||||
│ │Container │ │Container │ │Container │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
### Pending Tasks
|
||||
|
||||
- [ ] Integration testing with real GPUs
|
||||
- [ ] End-to-end API testing
|
||||
- [x] End-to-end API smoke testing for health, metrics, and authenticated model listings
|
||||
- [ ] Documentation improvements
|
||||
- [ ] First release (v1.0.0)
|
||||
|
||||
@@ -116,8 +116,7 @@ modelgrid/
|
||||
│ │ │ └── embeddings.ts # /v1/embeddings
|
||||
│ │ └── middleware/ # Request processing
|
||||
│ │ ├── auth.ts # API key validation
|
||||
│ │ ├── sanity.ts # Request validation
|
||||
│ │ └── proxy.ts # Container proxy
|
||||
│ │ └── sanity.ts # Request validation
|
||||
│ ├── models/ # Model management
|
||||
│ │ ├── index.ts
|
||||
│ │ ├── registry.ts # Model registry
|
||||
@@ -134,6 +133,17 @@ modelgrid/
|
||||
└── docs/ # Documentation
|
||||
```
|
||||
|
||||
## Test Commands
|
||||
|
||||
```bash
|
||||
deno task check
|
||||
deno test --allow-all test/test.ts
|
||||
deno test --allow-all test/
|
||||
```
|
||||
|
||||
The focused seam tests currently cover API routing, API server endpoints, config normalization,
|
||||
model registry loading, and cluster state persistence.
|
||||
|
||||
---
|
||||
|
||||
## CLI Commands
|
||||
@@ -177,7 +187,7 @@ modelgrid config init # Initialize configuration
|
||||
|
||||
## Greenlit Model System
|
||||
|
||||
Models are controlled via a remote greenlist to prevent arbitrary downloads:
|
||||
Models are resolved through a remote catalog so deployments come from an explicit allowlist:
|
||||
|
||||
```json
|
||||
{
|
||||
|
||||
+414
@@ -0,0 +1,414 @@
|
||||
# 🖥️ ModelGrid — UI Concept
|
||||
|
||||
**A browser-based operations console for ModelGrid, served by the same daemon that
|
||||
already exposes the OpenAI-compatible API.**
|
||||
|
||||
This document sketches the user interface that will sit on top of the ModelGrid
|
||||
daemon: what it shows, how it is organized, how an operator moves through it,
|
||||
and how it stays in sync with a running node or a small cluster. It is a
|
||||
concept, not a final spec — the goal is to lock the shape of the product
|
||||
before any frontend code is written.
|
||||
|
||||
The structural idioms (tabbed top-level views, route-origin awareness,
|
||||
embedded ops dashboard on a dedicated port, API-first with a thin UI on top)
|
||||
are adapted from `@serve.zone/dcrouter`'s Ops dashboard. ModelGrid's UI should
|
||||
feel familiar to anyone who has operated dcrouter, while staying grounded in
|
||||
ModelGrid's own domain: GPUs, vLLM deployments, a public model catalog, and a
|
||||
cluster of gateway-capable nodes.
|
||||
|
||||
## 🎯 Purpose & Audience
|
||||
|
||||
- **Primary user:** the operator of one or a few ModelGrid nodes. Often the
|
||||
same person who provisioned the GPU host and ran `modelgrid service enable`.
|
||||
- **Secondary user:** a platform engineer wiring ModelGrid into an internal
|
||||
AI platform who needs to manage API keys, audit deployments, and watch
|
||||
request traffic.
|
||||
- **Not an end-user chat UI.** Consumers of the OpenAI-compatible API keep
|
||||
using their own SDKs and tools. The browser UI is for operating the fleet,
|
||||
not for prompting models.
|
||||
|
||||
The UI should collapse gracefully from a full cluster view down to a
|
||||
single-node, standalone deployment, because both shapes are first-class in
|
||||
ModelGrid's `cluster.role` model (`standalone` / `control-plane` / `worker`).
|
||||
|
||||
## 🧭 Top-Level Information Architecture
|
||||
|
||||
URLs follow `/{view}` for flat views and `/{view}/{subview}` for tabbed
|
||||
views, matching dcrouter's routing idiom.
|
||||
|
||||
```
|
||||
/overview
|
||||
/stats
|
||||
/configuration
|
||||
|
||||
/cluster
|
||||
/nodes
|
||||
/placements
|
||||
/desired
|
||||
|
||||
/gpus
|
||||
/devices
|
||||
/drivers
|
||||
|
||||
/deployments
|
||||
/active
|
||||
/history
|
||||
|
||||
/models
|
||||
/catalog
|
||||
/deployed
|
||||
|
||||
/access
|
||||
/apikeys
|
||||
/clients
|
||||
|
||||
/logs (flat)
|
||||
/metrics (flat)
|
||||
/settings (flat)
|
||||
```
|
||||
|
||||
Rationale for the split:
|
||||
|
||||
- **Overview** is the landing page — one screen that answers "is the fleet
|
||||
healthy right now?"
|
||||
- **Cluster / GPUs / Deployments / Models** are the four nouns an operator
|
||||
actually reasons about when running ModelGrid. Keeping them at the top
|
||||
level matches the CLI verbs (`modelgrid cluster`, `modelgrid gpu`,
|
||||
`modelgrid container`, `modelgrid model`) so muscle memory transfers.
|
||||
- **Access** consolidates the authn/authz surface (API keys today,
|
||||
user/OIDC later) into one place, the way dcrouter groups `apitokens` and
|
||||
`users` under `access`.
|
||||
- **Logs** and **Metrics** are flat because they are cross-cutting streams,
|
||||
not noun-scoped tabs.
|
||||
|
||||
The navigation chrome itself is a persistent left rail on desktop, collapsing
|
||||
into a top hamburger on narrow viewports. The selected view is indicated
|
||||
there; subviews surface as a tab strip at the top of the content area.
|
||||
|
||||
```
|
||||
┌────────────┬──────────────────────────────────────────────────────────────┐
|
||||
│ ModelGrid │ Overview ▸ Stats Configuration │
|
||||
│ ├──────────────────────────────────────────────────────────────┤
|
||||
│ Overview ●│ │
|
||||
│ Cluster │ ┌─ Fleet Health ─────────────────────────────────────┐ │
|
||||
│ GPUs │ │ 2 nodes • 3 GPUs • 4 deployments • api OK │ │
|
||||
│ Deploys │ └───────────────────────────────────────────────────┘ │
|
||||
│ Models │ ┌─ Live Traffic ──────────────┐ ┌─ GPU Utilization ─┐ │
|
||||
│ Access │ │ 42 req/s p95 820 ms │ │ ▁▂▄▅▇█▇▅▄▂▁ │ │
|
||||
│ │ │ ▁▂▃▅▇▇▅▃▂▁▁▂▄▆ │ │ avg 64% │ │
|
||||
│ Logs │ └─────────────────────────────┘ └───────────────────┘ │
|
||||
│ Metrics │ ┌─ Deployments ────────────────────────────────────┐ │
|
||||
│ Settings │ │ llama-3.1-8b running 2/2 nvidia-0,1 │ │
|
||||
│ │ │ qwen2.5-7b running 1/1 nvidia-2 │ │
|
||||
│ node: ctrl │ │ bge-m3 pending 0/1 (no capacity) │ │
|
||||
│ v1.1.0 │ └──────────────────────────────────────────────────┘ │
|
||||
└────────────┴──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
The footer of the rail surfaces the local node's identity (`nodeName`,
|
||||
`role`), the daemon version, and a small link to the API base URL —
|
||||
equivalent to how dcrouter surfaces its runtime identity in the sidebar.
|
||||
|
||||
## 📄 Per-View Sketches
|
||||
|
||||
### Overview ▸ Stats (landing page)
|
||||
|
||||
A dashboard of the things that an on-call operator wants to see in under
|
||||
two seconds:
|
||||
|
||||
- **Fleet health band**: green/yellow/red status tiles for nodes, GPUs,
|
||||
deployments, API.
|
||||
- **Live traffic**: requests/sec, p50/p95/p99 latency, error rate. Sparkline
|
||||
for the last 15 minutes, streaming from `/metrics` and a server-pushed
|
||||
channel.
|
||||
- **GPU utilization strip**: one micro-sparkline per GPU, colored by VRAM
|
||||
pressure.
|
||||
- **Deployment summary**: the `modelgrid ps` output, but clickable. Each
|
||||
row deep-links into Deployments ▸ Active.
|
||||
- **Catalog drift**: a small callout when `list.modelgrid.com` has newer
|
||||
model entries than the node's cached catalog.
|
||||
|
||||
### Overview ▸ Configuration
|
||||
|
||||
A read-only rendering of the resolved `/etc/modelgrid/config.json` with
|
||||
section headers (`api`, `docker`, `gpus`, `models`, `cluster`). Operators
|
||||
can copy the JSON; editing config is intentionally kept to the Settings view
|
||||
(or the CLI) to avoid a "two sources of truth" problem.
|
||||
|
||||
### Cluster ▸ Nodes
|
||||
|
||||
Mirrors `modelgrid cluster nodes`. Each row: node name, role badge
|
||||
(`standalone` / `control-plane` / `worker`), advertised URL, last heartbeat,
|
||||
GPU inventory summary, status (`active` / `cordoned` / `draining`).
|
||||
|
||||
Row actions: `cordon`, `drain`, `activate` — the same verbs as the CLI.
|
||||
Hitting an action fires the corresponding control-plane call and shows an
|
||||
in-row toast on success.
|
||||
|
||||
```
|
||||
┌ Nodes ───────────────────────────────────────────────────────────────────┐
|
||||
│ Name Role Advertised URL Heartbeat │
|
||||
│ ────────────────────────────────────────────────────────────────────── │
|
||||
│ control-a control-plane http://ctrl.internal:8080 2s ago ● │
|
||||
│ worker-a worker http://wa.internal:8080 3s ago ● │
|
||||
│ worker-b worker http://wb.internal:8080 41s ago ◐ │
|
||||
│ [cordon] [drain]
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Cluster ▸ Placements
|
||||
|
||||
A live map of where every deployed model is currently running, read from
|
||||
the control-plane's placement state. Grouped by model, with a column per
|
||||
node. Cells show replica count and health. This is where the operator
|
||||
answers "where did `llama-3.1-8b` actually end up?".
|
||||
|
||||
### Cluster ▸ Desired
|
||||
|
||||
The companion to Placements: the desired-state table. Each row is a model
|
||||
with a target replica count. Rows can be added (`cluster ensure`), edited
|
||||
(`cluster scale`), or removed (`cluster clear`). The reconciler's pending
|
||||
work is surfaced as a diff badge: e.g. `+1 replica`, `moving from worker-b
|
||||
→ worker-a`.
|
||||
|
||||
### GPUs ▸ Devices
|
||||
|
||||
Mirrors `modelgrid gpu list` / `gpu status`, rendered as a card per GPU:
|
||||
vendor, model, VRAM free/total, driver version, temperature, current
|
||||
utilization, and which deployment is using it. Cards stream their
|
||||
utilization via the realtime channel; no full page reloads.
|
||||
|
||||
### GPUs ▸ Drivers
|
||||
|
||||
Status per vendor (NVIDIA / AMD / Intel): driver installed? version? any
|
||||
known issue? Includes a button to run `modelgrid gpu install`
|
||||
interactively — but since the install flow is privileged and interactive,
|
||||
the UI only kicks off the CLI walk-through in a terminal session rather
|
||||
than trying to reimplement it in the browser. A small "copy the command"
|
||||
affordance makes this explicit.
|
||||
|
||||
### Deployments ▸ Active
|
||||
|
||||
The core operational table. One row per active vLLM deployment:
|
||||
|
||||
- container ID, display name, model, GPU bindings, port, uptime, request
|
||||
rate, error rate
|
||||
- status pill (`running`, `pending`, `restarting`, `failed`)
|
||||
- row actions: `logs`, `stop`, `restart`, `remove`
|
||||
|
||||
Clicking a row opens a detail drawer with sub-tabs:
|
||||
|
||||
- **Summary** — the effective container config and the scheduling
|
||||
decision that landed it on this node
|
||||
- **Logs** — a live tail (SSE)
|
||||
- **Metrics** — request latency histogram, token throughput, VRAM
|
||||
occupancy
|
||||
- **Events** — a timeline of lifecycle events (scheduled, pulled image,
|
||||
started, health check, restart, stopped)
|
||||
|
||||
### Deployments ▸ History
|
||||
|
||||
Deployments that have been stopped or removed, with the reason and the
|
||||
last-known logs. Useful for post-mortem on a failed deploy.
|
||||
|
||||
### Models ▸ Catalog
|
||||
|
||||
The current catalog resolved from `list.modelgrid.com`, with a "refresh"
|
||||
action that calls `modelgrid model refresh`. Each entry shows canonical
|
||||
ID, aliases, capabilities (chat / completions / embeddings), minimum
|
||||
VRAM, default GPU count, and a `Deploy` button. Deploying opens a small
|
||||
form that mirrors `modelgrid run`: target node (or auto), desired replica
|
||||
count, optional env overrides (e.g. `HF_TOKEN`).
|
||||
|
||||
A visible "source" badge marks whether the entry came from the public
|
||||
catalog or a custom `registryUrl`, so operators can tell at a glance which
|
||||
models the cluster will actually trust for auto-deploy.
|
||||
|
||||
### Models ▸ Deployed
|
||||
|
||||
Shows the union of what is running across the cluster, with replica
|
||||
counts, keyed by canonical model ID. This is the view a developer asks
|
||||
the operator for when they want to know "what models can I hit on this
|
||||
endpoint?". It is effectively a pretty rendering of `/v1/models`.
|
||||
|
||||
### Access ▸ API Keys
|
||||
|
||||
Mirrors `modelgrid config apikey list`. Columns: label, prefix (first
|
||||
8 chars), created, last used, status. Actions: `generate`, `revoke`.
|
||||
Generating a key shows the secret once in a modal with a copy button,
|
||||
then never shows it again — the same contract as dcrouter's API tokens.
|
||||
|
||||
### Access ▸ Clients
|
||||
|
||||
Placeholder for per-consumer rate limits, quotas, and request labels.
|
||||
This view is explicitly future work; it renders as "not yet configured"
|
||||
until the daemon exposes client records. Listing it now reserves the IA
|
||||
slot so it doesn't have to be retrofitted later.
|
||||
|
||||
### Logs
|
||||
|
||||
A unified tail across daemon, scheduler, and deployments, with filters
|
||||
by source (`daemon`, `scheduler`, `deployment:<id>`), level, and
|
||||
free-text. Streamed via SSE. A "pause" toggle freezes the view for
|
||||
reading; a "download" action exports the current buffer as NDJSON.
|
||||
|
||||
### Metrics
|
||||
|
||||
The `/metrics` endpoint rendered as a small set of charts (request rate,
|
||||
latency, error rate, VRAM occupancy, model throughput). This is
|
||||
deliberately lightweight — serious monitoring is expected to come from
|
||||
Prometheus scraping `/metrics` into Grafana, and the UI says so with a
|
||||
link to the recommended dashboard snippet.
|
||||
|
||||
### Settings
|
||||
|
||||
Editable configuration, grouped to match the config file:
|
||||
|
||||
- **API** — port, bind host, CORS, rate limit
|
||||
- **Docker** — runtime, network name, socket path
|
||||
- **GPUs** — auto-detect toggle, per-GPU assignments
|
||||
- **Models** — registry URL, auto-deploy, default engine, auto-load list
|
||||
- **Cluster** — role, advertise URL, control-plane URL, shared secret,
|
||||
heartbeat interval, seeds
|
||||
|
||||
Edits write through the daemon's config API (to be defined) and reload
|
||||
without a restart wherever possible. Settings that require a restart are
|
||||
marked with a `restart required` badge, and the UI surfaces a single
|
||||
"restart daemon" action at the top of the view when any are pending.
|
||||
|
||||
## 🛤️ Key User Journeys
|
||||
|
||||
### Deploy a model from the catalog
|
||||
|
||||
1. Operator opens **Models ▸ Catalog**, filters for chat-capable models
|
||||
with VRAM ≤ 24 GB.
|
||||
2. Clicks `Deploy` on `meta-llama/Llama-3.1-8B-Instruct`.
|
||||
3. Dialog appears with target node (`auto` / specific worker), replica
|
||||
count (default from catalog), optional env (`HF_TOKEN`).
|
||||
4. On submit, the UI calls the control plane (`cluster ensure` + `scale`
|
||||
under the hood). The dialog closes and the new row appears in
|
||||
**Deployments ▸ Active** in `pending` state.
|
||||
5. SSE updates walk the row through `pulling image → starting → running`.
|
||||
6. A toast links to the deployment detail drawer for logs.
|
||||
|
||||
### Add a worker node to an existing control plane
|
||||
|
||||
1. Operator opens **Cluster ▸ Nodes** on the control plane.
|
||||
2. Clicks `Add node`, which opens a helper that pre-fills the worker's
|
||||
expected `cluster` config block — role, control-plane URL, shared
|
||||
secret — and exposes a one-liner install command.
|
||||
3. The operator runs the install command on the worker host. The UI does
|
||||
**not** SSH into anything; it just hands out the exact snippet.
|
||||
4. Once the worker's daemon starts and registers, the new node appears
|
||||
in the Nodes table with its first heartbeat. The helper closes
|
||||
automatically.
|
||||
|
||||
### Rotate an API key
|
||||
|
||||
1. **Access ▸ API Keys** → `Generate`.
|
||||
2. Name the key, pick a scope (today: single scope; later: per-model).
|
||||
3. The secret is shown once in a modal; copy-to-clipboard and a clear
|
||||
"you will not see this again" note.
|
||||
4. Old key row gets a `revoke` action. Revoke is a confirm-then-apply
|
||||
flow because it will break live traffic.
|
||||
|
||||
### Investigate a failing deployment
|
||||
|
||||
1. **Overview ▸ Stats** shows a red tile: `1 deployment failed`.
|
||||
2. Click drills into **Deployments ▸ Active**, filtered to `failed`.
|
||||
3. Open the row drawer → **Events** tab to see the lifecycle timeline.
|
||||
4. Jump to **Logs** tab for the live tail. If the deployment is down,
|
||||
fall back to the last 500 lines from its event buffer.
|
||||
5. From the drawer, `restart` retries the deployment; if it fails again,
|
||||
the `Summary` tab shows the scheduling decision so the operator can
|
||||
see whether VRAM, GPU pinning, or image pull is the root cause.
|
||||
|
||||
## 📡 Realtime, Auth, and API Contract
|
||||
|
||||
- **Realtime updates.** Metrics, logs, GPU utilization, heartbeats, and
|
||||
deployment state changes stream over Server-Sent Events. A single
|
||||
`/v1/_ui/events?topics=...` endpoint is preferred over per-feature
|
||||
sockets so the browser holds exactly one connection. WebSocket is
|
||||
reserved for bidirectional features (e.g. an interactive install
|
||||
walkthrough) that we do not need in v1.
|
||||
- **Auth model.** The UI runs behind the same daemon process as the
|
||||
OpenAI-compatible API, on a dedicated `uiPort` (default `8081`) to
|
||||
keep the data-plane clean. Login uses a session cookie; the first-boot
|
||||
bootstrap seeds an `admin` user with a one-time password printed to
|
||||
`journalctl -u modelgrid`, the same way dcrouter prints its initial
|
||||
`admin`/`admin`. SSO/OIDC is a later add-on.
|
||||
- **API contract.** Every UI action maps to an HTTP endpoint on the
|
||||
daemon (`/v1/_ui/...`). The UI must not talk to any private internals
|
||||
directly; this keeps `@modelgrid.com/modelgrid-apiclient` (a future
|
||||
sibling to `@serve.zone/dcrouter-apiclient`) able to do everything the
|
||||
UI can do, from scripts.
|
||||
- **Origin badges.** Similar to dcrouter's `config` / `email` / `dns` /
|
||||
`api` route-origin model, ModelGrid should tag each deployment with
|
||||
its origin: `config` (seeded via `containers` in config.json),
|
||||
`catalog` (auto-deployed from `models.autoLoad`), `api` (created via
|
||||
UI/API). Origin determines what the UI allows: `config`-origin
|
||||
deployments are toggle-only, `api`-origin deployments are full CRUD.
|
||||
|
||||
## 🧱 Implementation Notes (non-binding)
|
||||
|
||||
- **Web component stack.** Match the dcrouter OpsServer approach:
|
||||
component-per-view under `ts_web/elements/<area>/`, a tiny
|
||||
SmartRouter-style client router (`ts_web/router.ts`), and a single
|
||||
`appstate.ts` as the store.
|
||||
- **Bundled into the binary via `ts_bundled/bundle.ts`.** ModelGrid is a
|
||||
Deno project that ships as a `deno compile` single binary, so the UI
|
||||
follows the `@stack.gallery/registry` pattern: a build step bundles
|
||||
the `ts_web/` sources (HTML, JS, CSS, fonts, icons) into a single
|
||||
generated `ts_bundled/bundle.ts` module that exports a
|
||||
`{ path → bytes | string }` map. The daemon dynamically imports that
|
||||
module at startup and hands the map to **typedserver**, which serves
|
||||
it on the UI port. Result: no external asset directory, no runtime
|
||||
filesystem dependency, one binary still ships the entire console.
|
||||
- **Dev vs prod asset source.** In `deno task dev`, typedserver is
|
||||
pointed at `ts_web/` on disk so UI edits are hot-reloadable without
|
||||
re-running the bundler. In `deno task compile` / prod, the bundler
|
||||
regenerates `ts_bundled/bundle.ts` first and the compiled binary
|
||||
serves exclusively from the embedded map. A single flag
|
||||
(`UI_ASSET_SOURCE=disk|bundle`, default `bundle`) picks the strategy
|
||||
at runtime.
|
||||
- **Bundler placement.** Mirrors `@stack.gallery/registry`: keep the
|
||||
bundler in `scripts/bundle-ui.ts`, invoke it from a `deno task
|
||||
bundle:ui` that the `compile:all` task depends on, and `.gitignore`
|
||||
the generated `ts_bundled/bundle.ts` so it is only produced during
|
||||
release builds (or regenerated on demand for local prod testing).
|
||||
- **Packaging.** Follow dcrouter's module split: `@modelgrid.com/modelgrid`
|
||||
ships the daemon and the embedded UI bundle; a future
|
||||
`@modelgrid.com/modelgrid-web` can carve out the web sources as their
|
||||
own publishable boundary if the bundle grows large or the UI needs to
|
||||
be consumed independently.
|
||||
- **Dark theme default** (black background, high-contrast foreground) to
|
||||
match dcrouter and the expected server-ops environment. Light theme
|
||||
is a later toggle.
|
||||
- **No server-side rendering.** The UI is a static SPA; typedserver
|
||||
returns the asset map's `index.html` for the app shell and the rest
|
||||
of the state comes from the API. This keeps the runtime surface
|
||||
small and makes the UI-less `curl` story identical to the UI story.
|
||||
|
||||
## ❓ Open Questions
|
||||
|
||||
- **Edit config from the UI or keep it CLI/file-first?** Current lean:
|
||||
UI is authoritative only for API keys, deployments, and cluster
|
||||
actions. Config editing is exposed but optional, with CLI still the
|
||||
canonical path for reproducible installs.
|
||||
- **Do we expose a model prompt playground?** Nice to have for smoke
|
||||
tests, but it blurs the operator/consumer line. Defer to v2.
|
||||
- **Cluster-wide vs per-node view.** On a worker node, should the UI
|
||||
show only local state, or proxy the control plane's cluster view? The
|
||||
current lean: workers show local-only, and link to the control plane
|
||||
for cluster views. This avoids split-brain confusion.
|
||||
- **Access control granularity.** API keys today are coarse (all or
|
||||
nothing). A future model might scope keys per deployment or per
|
||||
model. Reserve the column in the Access ▸ API Keys table now.
|
||||
|
||||
## 🛑 Out of Scope (for this concept)
|
||||
|
||||
- End-user chat or prompt UIs for the OpenAI-compatible API.
|
||||
- Billing, quotas, or usage-based pricing dashboards.
|
||||
- Multi-tenant isolation beyond per-API-key separation.
|
||||
- Anything specific to non-vLLM runtimes — the UI assumes the v1.1.0
|
||||
reorientation around vLLM as the only first-class runtime.
|
||||
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env -S deno run --allow-read --allow-write
|
||||
|
||||
/**
|
||||
* bundle-ui.ts
|
||||
*
|
||||
* Walks `ts_web/` and emits `ts_bundled/bundle.ts`, a single TypeScript
|
||||
* module that exports every UI asset as base64 in order. The daemon's
|
||||
* UI server imports this module at runtime to serve the console without
|
||||
* any external filesystem dependency — the entire browser app ends up
|
||||
* embedded in the `deno compile` binary.
|
||||
*
|
||||
* The output shape matches the `@stack.gallery/registry` convention so
|
||||
* a consumer can loop `files` as `{ path, contentBase64 }` entries.
|
||||
*/
|
||||
|
||||
import { walk } from 'jsr:@std/fs@^1.0.0/walk';
|
||||
import { fromFileUrl, join, relative } from 'jsr:@std/path@^1.0.0';
|
||||
|
||||
const here = fromFileUrl(new URL('./', import.meta.url));
|
||||
const repoRoot = join(here, '..');
|
||||
const sourceDir = join(repoRoot, 'ts_web');
|
||||
const outDir = join(repoRoot, 'ts_bundled');
|
||||
const outFile = join(outDir, 'bundle.ts');
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const entries: Array<{ path: string; contentBase64: string; size: number }> = [];
|
||||
|
||||
for await (
|
||||
const entry of walk(sourceDir, {
|
||||
includeDirs: false,
|
||||
includeSymlinks: false,
|
||||
})
|
||||
) {
|
||||
const rel = relative(sourceDir, entry.path).replaceAll('\\', '/');
|
||||
const bytes = await Deno.readFile(entry.path);
|
||||
entries.push({
|
||||
path: rel,
|
||||
contentBase64: encodeBase64(bytes),
|
||||
size: bytes.byteLength,
|
||||
});
|
||||
}
|
||||
|
||||
entries.sort((a, b) => a.path.localeCompare(b.path));
|
||||
|
||||
const generatedAt = new Date().toISOString();
|
||||
const totalBytes = entries.reduce((sum, e) => sum + e.size, 0);
|
||||
|
||||
const header = [
|
||||
'// AUTO-GENERATED — do not edit.',
|
||||
'// Regenerate with: deno task bundle:ui',
|
||||
`// Source: ts_web/ (${entries.length} files, ${totalBytes} bytes)`,
|
||||
`// Generated: ${generatedAt}`,
|
||||
'',
|
||||
'export interface IBundledFile {',
|
||||
' path: string;',
|
||||
' contentBase64: string;',
|
||||
'}',
|
||||
'',
|
||||
'export const files: IBundledFile[] = [',
|
||||
].join('\n');
|
||||
|
||||
const body = entries.map((e) =>
|
||||
` { path: ${JSON.stringify(e.path)}, contentBase64: ${JSON.stringify(e.contentBase64)} },`
|
||||
).join('\n');
|
||||
|
||||
const footer = '\n];\n';
|
||||
|
||||
await Deno.mkdir(outDir, { recursive: true });
|
||||
await Deno.writeTextFile(outFile, header + '\n' + body + footer);
|
||||
|
||||
console.log(
|
||||
`bundle-ui: wrote ${entries.length} file(s), ${totalBytes} bytes → ${
|
||||
relative(repoRoot, outFile)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
|
||||
function encodeBase64(bytes: Uint8Array): string {
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.length; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
await main();
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { EventEmitter } from 'node:events';
|
||||
import { AuthMiddleware } from '../ts/api/middleware/auth.ts';
|
||||
import { ApiRouter } from '../ts/api/router.ts';
|
||||
|
||||
class TestResponse {
|
||||
public statusCode = 200;
|
||||
public headers: Record<string, string> = {};
|
||||
public body = '';
|
||||
|
||||
public writeHead(statusCode: number, headers: Record<string, string>): TestResponse {
|
||||
this.statusCode = statusCode;
|
||||
this.headers = headers;
|
||||
return this;
|
||||
}
|
||||
|
||||
public end(body = ''): TestResponse {
|
||||
this.body = body;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
class TestRequest extends EventEmitter {
|
||||
public method: string;
|
||||
public headers: Record<string, string>;
|
||||
public destroyed = false;
|
||||
public paused = false;
|
||||
|
||||
constructor(method: string, headers: Record<string, string>) {
|
||||
super();
|
||||
this.method = method;
|
||||
this.headers = headers;
|
||||
}
|
||||
|
||||
public pause(): this {
|
||||
this.paused = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
public destroy(): this {
|
||||
this.destroyed = true;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
function createRouter(): ApiRouter {
|
||||
return new ApiRouter(
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
['valid-key'],
|
||||
{
|
||||
authMiddleware: new AuthMiddleware(['valid-key']),
|
||||
sanityMiddleware: {
|
||||
validateChatRequest() {
|
||||
return { valid: true };
|
||||
},
|
||||
sanitizeChatRequest(body: Record<string, unknown>) {
|
||||
return body;
|
||||
},
|
||||
validateEmbeddingsRequest() {
|
||||
return { valid: true };
|
||||
},
|
||||
sanitizeEmbeddingsRequest(body: Record<string, unknown>) {
|
||||
return body;
|
||||
},
|
||||
} as never,
|
||||
chatHandler: {
|
||||
async handleChatCompletion() {
|
||||
throw new Error('chat handler should not run in this test');
|
||||
},
|
||||
} as never,
|
||||
modelsHandler: {
|
||||
async handleListModels() {
|
||||
throw new Error('models handler should not run in this test');
|
||||
},
|
||||
} as never,
|
||||
embeddingsHandler: {
|
||||
async handleEmbeddings() {
|
||||
throw new Error('embeddings handler should not run in this test');
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Deno.test('ApiRouter returns 404 for unknown endpoints', async () => {
|
||||
const router = createRouter();
|
||||
const response = new TestResponse();
|
||||
|
||||
await router.route(
|
||||
{ method: 'GET', headers: {} } as never,
|
||||
response as never,
|
||||
'/does-not-exist',
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 404);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'invalid_request_error');
|
||||
});
|
||||
|
||||
Deno.test('ApiRouter rejects protected endpoints without a bearer token', async () => {
|
||||
const router = createRouter();
|
||||
const response = new TestResponse();
|
||||
|
||||
await router.route(
|
||||
{ method: 'GET', headers: {} } as never,
|
||||
response as never,
|
||||
'/v1/models',
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 401);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'authentication_error');
|
||||
});
|
||||
|
||||
Deno.test('ApiRouter returns 413 for oversized request bodies', async () => {
|
||||
const router = createRouter();
|
||||
const request = new TestRequest('POST', {
|
||||
authorization: 'Bearer valid-key',
|
||||
});
|
||||
const response = new TestResponse();
|
||||
|
||||
const routePromise = router.route(request as never, response as never, '/v1/chat/completions');
|
||||
request.emit('data', 'x'.repeat(10 * 1024 * 1024 + 1));
|
||||
await routePromise;
|
||||
|
||||
assertEquals(response.statusCode, 413);
|
||||
assertEquals(request.paused, true);
|
||||
assertEquals(request.destroyed, true);
|
||||
assertEquals(JSON.parse(response.body).error.message, 'Request body too large');
|
||||
});
|
||||
@@ -0,0 +1,315 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ApiServer } from '../ts/api/server.ts';
|
||||
|
||||
Deno.test('ApiServer serves health metrics and authenticated model listings', async () => {
|
||||
const port = 18100 + Math.floor(Math.random() * 1000);
|
||||
const server = new ApiServer(
|
||||
{
|
||||
host: '127.0.0.1',
|
||||
port,
|
||||
apiKeys: ['valid-key'],
|
||||
cors: false,
|
||||
corsOrigins: [],
|
||||
},
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map([
|
||||
['vllm-1', { running: true, health: 'healthy' }],
|
||||
]);
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map([
|
||||
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||
]);
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getAllModels() {
|
||||
return [
|
||||
{
|
||||
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
||||
engine: 'vllm',
|
||||
source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' },
|
||||
capabilities: { chat: true },
|
||||
requirements: { minVramGb: 18 },
|
||||
},
|
||||
];
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
getStatus() {
|
||||
return {
|
||||
localNode: null,
|
||||
nodes: [],
|
||||
models: {},
|
||||
desiredDeployments: [],
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
gpuDetector: {
|
||||
async detectGpus() {
|
||||
return [{ id: 'nvidia-0' }];
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const healthResponse = await fetch(`http://127.0.0.1:${port}/health`);
|
||||
const healthBody = await healthResponse.json();
|
||||
assertEquals(healthResponse.status, 200);
|
||||
assertEquals(healthBody.status, 'ok');
|
||||
assertEquals(healthBody.models, 1);
|
||||
assertEquals(Array.isArray(healthBody.reasons), true);
|
||||
assertEquals(healthBody.reasons.length, 0);
|
||||
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
||||
|
||||
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||
const metricsBody = await metricsResponse.text();
|
||||
assertEquals(metricsResponse.status, 200);
|
||||
assertEquals(metricsBody.includes('modelgrid_uptime_seconds'), true);
|
||||
assertEquals(metricsBody.includes('modelgrid_models_available 1'), true);
|
||||
|
||||
const unauthenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`);
|
||||
const unauthenticatedBody = await unauthenticatedModels.json();
|
||||
assertEquals(unauthenticatedModels.status, 401);
|
||||
assertEquals(unauthenticatedBody.error.type, 'authentication_error');
|
||||
|
||||
const authenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
||||
headers: {
|
||||
Authorization: 'Bearer valid-key',
|
||||
'X-Request-Id': 'req-test-models',
|
||||
},
|
||||
});
|
||||
const authenticatedBody = await authenticatedModels.json();
|
||||
assertEquals(authenticatedModels.status, 200);
|
||||
assertEquals(authenticatedBody.object, 'list');
|
||||
assertEquals(authenticatedBody.data[0].id, 'meta-llama/Llama-3.1-8B-Instruct');
|
||||
assertEquals(authenticatedModels.headers.get('x-request-id'), 'req-test-models');
|
||||
|
||||
const metricsAfterRequests = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||
const metricsAfterRequestsBody = await metricsAfterRequests.text();
|
||||
assertEquals(
|
||||
metricsAfterRequestsBody.includes('modelgrid_api_requests_total{path="/v1/models"} 2'),
|
||||
true,
|
||||
);
|
||||
assertEquals(
|
||||
metricsAfterRequestsBody.includes('modelgrid_api_auth_failures_total{path="/v1/models"} 1'),
|
||||
true,
|
||||
);
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () => {
|
||||
const port = 19100 + Math.floor(Math.random() * 1000);
|
||||
let failModelListing = true;
|
||||
const server = new ApiServer(
|
||||
{
|
||||
host: '127.0.0.1',
|
||||
port,
|
||||
apiKeys: ['valid-key'],
|
||||
cors: false,
|
||||
corsOrigins: [],
|
||||
},
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map();
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
if (failModelListing) {
|
||||
failModelListing = false;
|
||||
throw new Error('models unavailable');
|
||||
}
|
||||
|
||||
return new Map();
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getAllModels() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
getStatus() {
|
||||
return {
|
||||
localNode: null,
|
||||
nodes: [],
|
||||
models: {},
|
||||
desiredDeployments: [],
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
gpuDetector: {
|
||||
async detectGpus() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const failedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
||||
headers: {
|
||||
Authorization: 'Bearer valid-key',
|
||||
},
|
||||
});
|
||||
assertEquals(failedModels.status, 500);
|
||||
await failedModels.text();
|
||||
|
||||
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||
const metricsBody = await metricsResponse.text();
|
||||
assertEquals(
|
||||
metricsBody.includes('modelgrid_api_server_errors_total{path="/v1/models"} 1'),
|
||||
true,
|
||||
);
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ApiServer health reports degraded reasons', async () => {
|
||||
const port = 19300 + Math.floor(Math.random() * 1000);
|
||||
const server = new ApiServer(
|
||||
{
|
||||
host: '127.0.0.1',
|
||||
port,
|
||||
apiKeys: ['valid-key'],
|
||||
cors: false,
|
||||
corsOrigins: [],
|
||||
},
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map([
|
||||
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||
]);
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map();
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getAllModels() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
getStatus() {
|
||||
return {
|
||||
localNode: null,
|
||||
nodes: [],
|
||||
models: {},
|
||||
desiredDeployments: [],
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
gpuDetector: {
|
||||
async detectGpus() {
|
||||
return [{ id: 'nvidia-0' }];
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
||||
const body = await response.json();
|
||||
|
||||
assertEquals(response.status, 503);
|
||||
assertEquals(body.status, 'degraded');
|
||||
assertEquals(body.reasons.includes('unhealthy_container'), true);
|
||||
assertEquals(body.reasons.includes('no_models_available'), true);
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
||||
const port = 19200 + Math.floor(Math.random() * 1000);
|
||||
const server = new ApiServer(
|
||||
{
|
||||
host: '127.0.0.1',
|
||||
port,
|
||||
apiKeys: ['valid-key'],
|
||||
rateLimit: 2,
|
||||
cors: false,
|
||||
corsOrigins: [],
|
||||
},
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map();
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map([
|
||||
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||
]);
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getAllModels() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
getStatus() {
|
||||
return {
|
||||
localNode: null,
|
||||
nodes: [],
|
||||
models: {},
|
||||
desiredDeployments: [],
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
gpuDetector: {
|
||||
async detectGpus() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const requestHeaders = {
|
||||
Authorization: 'Bearer valid-key',
|
||||
};
|
||||
|
||||
const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||
assertEquals(first.status, 200);
|
||||
await first.text();
|
||||
|
||||
const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||
assertEquals(second.status, 200);
|
||||
await second.text();
|
||||
|
||||
const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
||||
assertEquals(third.status, 429);
|
||||
assertEquals((await third.json()).error.type, 'rate_limit_exceeded');
|
||||
|
||||
const health = await fetch(`http://127.0.0.1:${port}/health`);
|
||||
assertEquals(health.status, 200);
|
||||
await health.text();
|
||||
|
||||
const metrics = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||
assertEquals(metrics.status, 200);
|
||||
await metrics.text();
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,120 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ChatHandler } from '../ts/api/handlers/chat.ts';
|
||||
import { EmbeddingsHandler } from '../ts/api/handlers/embeddings.ts';
|
||||
import { UpstreamTimeoutError } from '../ts/containers/base-container.ts';
|
||||
|
||||
class TestResponse {
|
||||
public statusCode = 200;
|
||||
public headers: Record<string, string> = {};
|
||||
public body = '';
|
||||
|
||||
public writeHead(statusCode: number, headers: Record<string, string>): TestResponse {
|
||||
this.statusCode = statusCode;
|
||||
this.headers = headers;
|
||||
return this;
|
||||
}
|
||||
|
||||
public end(body = ''): TestResponse {
|
||||
this.body = body;
|
||||
return this;
|
||||
}
|
||||
|
||||
public write(_chunk: string | Uint8Array): boolean {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Deno.test('ChatHandler maps upstream timeouts to 504 responses', async () => {
|
||||
const handler = new ChatHandler(
|
||||
{
|
||||
async findContainerForModel() {
|
||||
return {
|
||||
async chatCompletion() {
|
||||
throw new UpstreamTimeoutError();
|
||||
},
|
||||
async chatCompletionStream() {
|
||||
throw new UpstreamTimeoutError();
|
||||
},
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getModel(modelName: string) {
|
||||
return { id: modelName };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async loadModel() {
|
||||
return { success: false };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
shouldDeployLocallyFirst() {
|
||||
return false;
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
const response = new TestResponse();
|
||||
await handler.handleChatCompletion(
|
||||
{ headers: {} } as never,
|
||||
response as never,
|
||||
{ model: 'meta-llama/Llama-3.1-8B-Instruct', messages: [{ role: 'user', content: 'hi' }] },
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 504);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||
});
|
||||
|
||||
Deno.test('EmbeddingsHandler maps upstream timeouts to 504 responses', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = async () => {
|
||||
const error = new Error('request aborted');
|
||||
error.name = 'AbortError';
|
||||
throw error;
|
||||
};
|
||||
|
||||
try {
|
||||
const handler = new EmbeddingsHandler(
|
||||
{
|
||||
async findContainerForModel() {
|
||||
return null;
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getModel(modelName: string) {
|
||||
return { id: modelName };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async ensureModelViaControlPlane(modelName: string) {
|
||||
return {
|
||||
location: {
|
||||
modelId: modelName,
|
||||
nodeName: 'worker-a',
|
||||
endpoint: 'http://worker-a:8080',
|
||||
healthy: true,
|
||||
engine: 'vllm',
|
||||
containerId: 'remote',
|
||||
},
|
||||
};
|
||||
},
|
||||
getLocalNodeName() {
|
||||
return 'control';
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
const response = new TestResponse();
|
||||
await handler.handleEmbeddings(
|
||||
{ headers: {} } as never,
|
||||
response as never,
|
||||
{ model: 'BAAI/bge-m3', input: 'hello' },
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 504);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,134 @@
|
||||
import { assertEquals, assertExists } from 'jsr:@std/assert@^1.0.0';
|
||||
import { CLUSTER, PATHS } from '../ts/constants.ts';
|
||||
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||
import type { IClusterNodeHeartbeat } from '../ts/interfaces/cluster.ts';
|
||||
|
||||
function createNode(nodeName: string, lastSeenAt: number): IClusterNodeHeartbeat {
|
||||
return {
|
||||
nodeName,
|
||||
role: nodeName === 'control' ? 'control-plane' : 'worker',
|
||||
endpoint: `http://${nodeName}:8080`,
|
||||
healthy: true,
|
||||
resources: {
|
||||
gpuCount: 1,
|
||||
totalVramGb: 24,
|
||||
availableVramGb: 24,
|
||||
maxSingleGpuVramGb: 24,
|
||||
largestGpuGroupCount: 1,
|
||||
largestGpuGroupVramGb: 24,
|
||||
deploymentCount: 0,
|
||||
topologyGroups: [
|
||||
{
|
||||
id: 'nvidia-0',
|
||||
vendor: 'nvidia',
|
||||
gpuIds: ['gpu-0'],
|
||||
gpuCount: 1,
|
||||
totalVramGb: 24,
|
||||
maxSingleGpuVramGb: 24,
|
||||
busNumbers: [1],
|
||||
},
|
||||
],
|
||||
},
|
||||
deployments: [],
|
||||
lastSeenAt,
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForPersistence(): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, 25));
|
||||
}
|
||||
|
||||
Deno.test('ClusterManager initialize loads persisted state and prunes stale nodes', async () => {
|
||||
const originalDataDir = PATHS.DATA_DIR;
|
||||
const tempDir = await Deno.makeTempDir();
|
||||
(PATHS as { DATA_DIR: string }).DATA_DIR = tempDir;
|
||||
|
||||
try {
|
||||
const now = Date.now();
|
||||
await Deno.writeTextFile(
|
||||
`${tempDir}/cluster-state.json`,
|
||||
JSON.stringify({
|
||||
nodes: [
|
||||
createNode('control', now),
|
||||
createNode('worker-fresh', now),
|
||||
createNode('worker-stale', now - CLUSTER.NODE_STALE_AFTER_MS - 1000),
|
||||
],
|
||||
}),
|
||||
);
|
||||
await Deno.writeTextFile(
|
||||
`${tempDir}/cluster-control-state.json`,
|
||||
JSON.stringify({
|
||||
desiredDeployments: [
|
||||
{ modelId: 'meta-llama/Llama-3.1-8B-Instruct', desiredReplicas: 2, updatedAt: now },
|
||||
],
|
||||
nodeSchedulerStates: {
|
||||
'worker-fresh': 'cordoned',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const clusterManager = new ClusterManager();
|
||||
clusterManager.configure({
|
||||
enabled: true,
|
||||
nodeName: 'control',
|
||||
role: 'control-plane',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
});
|
||||
|
||||
await clusterManager.initialize();
|
||||
|
||||
assertEquals(clusterManager.getAllNodes().map((node) => node.nodeName), ['control', 'worker-fresh']);
|
||||
assertExists(clusterManager.getLocalNode());
|
||||
assertEquals(clusterManager.getDesiredDeployments().length, 1);
|
||||
assertEquals(clusterManager.getNodeSchedulerState('worker-fresh'), 'cordoned');
|
||||
} finally {
|
||||
(PATHS as { DATA_DIR: string }).DATA_DIR = originalDataDir;
|
||||
await Deno.remove(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ClusterManager persists state only after initialization completes', async () => {
|
||||
const originalDataDir = PATHS.DATA_DIR;
|
||||
const tempDir = await Deno.makeTempDir();
|
||||
(PATHS as { DATA_DIR: string }).DATA_DIR = tempDir;
|
||||
|
||||
try {
|
||||
const clusterManager = new ClusterManager();
|
||||
clusterManager.configure({
|
||||
enabled: true,
|
||||
nodeName: 'control',
|
||||
role: 'control-plane',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
});
|
||||
|
||||
clusterManager.updateLocalNode(createNode('control', Date.now()));
|
||||
clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 1);
|
||||
await waitForPersistence();
|
||||
|
||||
assertEquals(await Deno.stat(`${tempDir}/cluster-state.json`).catch(() => null), null);
|
||||
assertEquals(await Deno.stat(`${tempDir}/cluster-control-state.json`).catch(() => null), null);
|
||||
|
||||
await clusterManager.initialize();
|
||||
clusterManager.updateLocalNode(createNode('control', Date.now()));
|
||||
clusterManager.setNodeSchedulerState('control', 'active');
|
||||
clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 3);
|
||||
await waitForPersistence();
|
||||
|
||||
const stateFile = JSON.parse(await Deno.readTextFile(`${tempDir}/cluster-state.json`));
|
||||
const controlFile = JSON.parse(await Deno.readTextFile(`${tempDir}/cluster-control-state.json`));
|
||||
|
||||
assertEquals(stateFile.nodes.length, 1);
|
||||
assertEquals(stateFile.nodes[0].nodeName, 'control');
|
||||
assertEquals(controlFile.desiredDeployments[0].desiredReplicas, 3);
|
||||
assertEquals(controlFile.nodeSchedulerStates.control, 'active');
|
||||
} finally {
|
||||
(PATHS as { DATA_DIR: string }).DATA_DIR = originalDataDir;
|
||||
await Deno.remove(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,111 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ConfigHandler } from '../ts/cli/config-handler.ts';
|
||||
import { PATHS } from '../ts/constants.ts';
|
||||
import { logger } from '../ts/logger.ts';
|
||||
|
||||
Deno.test('ConfigHandler init writes the current default config shape', async () => {
|
||||
const tempDir = await Deno.makeTempDir();
|
||||
const originalConfigDir = PATHS.CONFIG_DIR;
|
||||
const originalConfigFile = PATHS.CONFIG_FILE;
|
||||
(PATHS as { CONFIG_DIR: string }).CONFIG_DIR = tempDir;
|
||||
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = `${tempDir}/config.json`;
|
||||
|
||||
try {
|
||||
const handler = new ConfigHandler();
|
||||
await handler.init();
|
||||
|
||||
const config = JSON.parse(await Deno.readTextFile(`${tempDir}/config.json`));
|
||||
assertEquals(config.ui.enabled, true);
|
||||
assertEquals(config.ui.assetSource, 'bundle');
|
||||
assertEquals(config.cluster.role, 'standalone');
|
||||
assertEquals(config.models.registryUrl, 'https://list.modelgrid.com/catalog/models.json');
|
||||
assertEquals(config.models.autoDeploy, true);
|
||||
assertEquals(config.models.defaultEngine, 'vllm');
|
||||
} finally {
|
||||
(PATHS as { CONFIG_DIR: string }).CONFIG_DIR = originalConfigDir;
|
||||
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = originalConfigFile;
|
||||
await Deno.remove(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ConfigHandler show renders canonical model and ui settings', async () => {
|
||||
const tempDir = await Deno.makeTempDir();
|
||||
const originalConfigFile = PATHS.CONFIG_FILE;
|
||||
const boxes: Array<{ title: string; lines: string[] }> = [];
|
||||
const originalLog = logger.log;
|
||||
const originalLogBox = logger.logBox;
|
||||
|
||||
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = `${tempDir}/config.json`;
|
||||
|
||||
logger.log = (_message: string) => {};
|
||||
logger.logBox = (
|
||||
title: string,
|
||||
lines: string[],
|
||||
) => {
|
||||
boxes.push({ title, lines });
|
||||
};
|
||||
|
||||
try {
|
||||
await Deno.writeTextFile(
|
||||
`${tempDir}/config.json`,
|
||||
JSON.stringify({
|
||||
version: '1.0.0',
|
||||
api: {
|
||||
port: 8080,
|
||||
host: '0.0.0.0',
|
||||
apiKeys: ['sk-test'],
|
||||
rateLimit: 60,
|
||||
cors: true,
|
||||
corsOrigins: ['*'],
|
||||
},
|
||||
ui: {
|
||||
enabled: true,
|
||||
port: 8081,
|
||||
host: '0.0.0.0',
|
||||
assetSource: 'bundle',
|
||||
},
|
||||
docker: {
|
||||
networkName: 'modelgrid',
|
||||
runtime: 'docker',
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: true,
|
||||
assignments: {},
|
||||
},
|
||||
containers: [],
|
||||
models: {
|
||||
registryUrl: 'https://example.com/catalog.json',
|
||||
autoDeploy: false,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: ['meta-llama/Llama-3.1-8B-Instruct'],
|
||||
},
|
||||
cluster: {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
},
|
||||
checkInterval: 30000,
|
||||
}),
|
||||
);
|
||||
|
||||
const handler = new ConfigHandler();
|
||||
await handler.show();
|
||||
|
||||
const modelsBox = boxes.find((box) => box.title === 'Models');
|
||||
assertEquals(modelsBox?.lines.some((line) => line.includes('Auto Deploy:')), true);
|
||||
assertEquals(modelsBox?.lines.some((line) => line.includes('Default Engine: vllm')), true);
|
||||
assertEquals(modelsBox?.lines.some((line) => line.includes('https://example.com/catalog.json')), true);
|
||||
|
||||
const apiBox = boxes.find((box) => box.title === 'API Server');
|
||||
assertEquals(apiBox?.lines.some((line) => line.includes('Rate Limit: 60 req/min')), true);
|
||||
} finally {
|
||||
logger.log = originalLog;
|
||||
logger.logBox = originalLogBox;
|
||||
(PATHS as { CONFIG_FILE: string }).CONFIG_FILE = originalConfigFile;
|
||||
await Deno.remove(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,40 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ModelRegistry } from '../ts/models/registry.ts';
|
||||
|
||||
Deno.test('ModelRegistry falls back to the built-in catalog when the source is unavailable', async () => {
|
||||
const registry = new ModelRegistry('http://127.0.0.1:9/catalog.json');
|
||||
const catalog = await registry.fetchCatalog(true);
|
||||
|
||||
assertEquals(catalog.version, '1.0');
|
||||
assertEquals(catalog.models.length > 0, true);
|
||||
});
|
||||
|
||||
Deno.test('ModelRegistry reads catalog entries from a local file source', async () => {
|
||||
const filePath = await Deno.makeTempFile({ suffix: '.json' });
|
||||
await Deno.writeTextFile(
|
||||
filePath,
|
||||
JSON.stringify({
|
||||
version: '1.0',
|
||||
generatedAt: '2026-01-01T00:00:00.000Z',
|
||||
models: [
|
||||
{
|
||||
id: 'Qwen/Qwen2.5-7B-Instruct',
|
||||
aliases: ['qwen-local'],
|
||||
engine: 'vllm',
|
||||
source: { repo: 'Qwen/Qwen2.5-7B-Instruct' },
|
||||
capabilities: { chat: true },
|
||||
requirements: { minVramGb: 16 },
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
try {
|
||||
const registry = new ModelRegistry(filePath);
|
||||
const model = await registry.getModel('qwen-local');
|
||||
|
||||
assertEquals(model?.id, 'Qwen/Qwen2.5-7B-Instruct');
|
||||
} finally {
|
||||
await Deno.remove(filePath);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,119 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ConfigManager } from '../ts/config/config-manager.ts';
|
||||
import type { IModelGridConfig } from '../ts/interfaces/config.ts';
|
||||
import { logger } from '../ts/logger.ts';
|
||||
|
||||
Deno.test('ConfigManager normalizes current config defaults', () => {
|
||||
const configManager = new ConfigManager();
|
||||
|
||||
const normalized = configManager.normalizeConfig({
|
||||
version: '1.0.0',
|
||||
api: {
|
||||
port: 9000,
|
||||
host: '127.0.0.1',
|
||||
apiKeys: ['test-key'],
|
||||
},
|
||||
docker: {
|
||||
networkName: 'modelgrid',
|
||||
runtime: 'docker',
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: true,
|
||||
assignments: {},
|
||||
},
|
||||
containers: [],
|
||||
models: {
|
||||
registryUrl: 'https://example.com/catalog.json',
|
||||
autoDeploy: false,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: ['Qwen/Qwen2.5-7B-Instruct'],
|
||||
},
|
||||
cluster: {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
},
|
||||
checkInterval: 15000,
|
||||
});
|
||||
|
||||
assertEquals(normalized.models.registryUrl, 'https://example.com/catalog.json');
|
||||
assertEquals(normalized.models.autoDeploy, false);
|
||||
assertEquals(normalized.models.defaultEngine, 'vllm');
|
||||
assertEquals(normalized.ui.enabled, true);
|
||||
assertEquals(normalized.ui.port, 8081);
|
||||
assertEquals(normalized.ui.assetSource, 'bundle');
|
||||
});
|
||||
|
||||
Deno.test('ConfigManager warns when config contains ignored keys', () => {
|
||||
const configManager = new ConfigManager();
|
||||
const warnings: string[] = [];
|
||||
const originalWarn = logger.warn;
|
||||
logger.warn = (message: string) => {
|
||||
warnings.push(message);
|
||||
};
|
||||
|
||||
try {
|
||||
configManager.normalizeConfig({
|
||||
version: '1.0.0',
|
||||
api: {
|
||||
port: 8080,
|
||||
host: '127.0.0.1',
|
||||
apiKeys: [],
|
||||
},
|
||||
docker: {
|
||||
networkName: 'modelgrid',
|
||||
runtime: 'docker',
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: true,
|
||||
assignments: {},
|
||||
},
|
||||
containers: [
|
||||
{ id: 'legacy', type: 'ollama' } as never,
|
||||
],
|
||||
models: {
|
||||
registryUrl: 'https://example.com/catalog.json',
|
||||
autoDeploy: true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: [],
|
||||
greenlistUrl: 'https://legacy.example.com/catalog.json',
|
||||
autoPull: true,
|
||||
defaultContainer: 'legacy-container',
|
||||
} as IModelGridConfig['models'] & {
|
||||
greenlistUrl: string;
|
||||
autoPull: boolean;
|
||||
defaultContainer: string;
|
||||
},
|
||||
cluster: {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
},
|
||||
checkInterval: 30000,
|
||||
legacySection: true,
|
||||
} as Partial<IModelGridConfig> & {
|
||||
legacySection: boolean;
|
||||
models: IModelGridConfig['models'] & {
|
||||
greenlistUrl: string;
|
||||
autoPull: boolean;
|
||||
defaultContainer: string;
|
||||
};
|
||||
});
|
||||
} finally {
|
||||
logger.warn = originalWarn;
|
||||
}
|
||||
|
||||
assertEquals(warnings.includes('Ignoring unknown config key: legacySection'), true);
|
||||
assertEquals(warnings.includes('Ignoring removed config key: models.greenlistUrl'), true);
|
||||
assertEquals(warnings.includes('Ignoring removed config key: models.autoPull'), true);
|
||||
assertEquals(warnings.includes('Ignoring removed config key: models.defaultContainer'), true);
|
||||
assertEquals(warnings.includes('Ignoring unsupported container type: ollama'), true);
|
||||
});
|
||||
@@ -0,0 +1,67 @@
|
||||
// Smoke test for the UI server: bundle mode serves /index.html,
|
||||
// disk mode serves /app.js, /_ui/overview returns structured JSON.
|
||||
// Run with: deno run --allow-all test/ui-server.smoke.ts
|
||||
|
||||
import { UiServer } from '../ts/ui/server.ts';
|
||||
import { ContainerManager } from '../ts/containers/container-manager.ts';
|
||||
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||
|
||||
async function probe(source: 'bundle' | 'disk', port: number): Promise<void> {
|
||||
const cm = new ContainerManager();
|
||||
const cluster = new ClusterManager();
|
||||
cluster.configure({
|
||||
enabled: false,
|
||||
nodeName: 'test-node',
|
||||
role: 'standalone',
|
||||
bindHost: '127.0.0.1',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
});
|
||||
|
||||
const server = new UiServer(
|
||||
{ enabled: true, port, host: '127.0.0.1', assetSource: source },
|
||||
cm,
|
||||
cluster,
|
||||
);
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const index = await fetch(`http://127.0.0.1:${port}/`);
|
||||
const indexBody = await index.text();
|
||||
if (!index.ok || !indexBody.includes('ModelGrid')) {
|
||||
throw new Error(`[${source}] index.html missing expected content (status=${index.status})`);
|
||||
}
|
||||
|
||||
const app = await fetch(`http://127.0.0.1:${port}/app.js`);
|
||||
const appBody = await app.text();
|
||||
if (!app.ok || !appBody.includes('ModelGrid UI')) {
|
||||
throw new Error(`[${source}] app.js missing expected content (status=${app.status})`);
|
||||
}
|
||||
|
||||
const spa = await fetch(`http://127.0.0.1:${port}/cluster/nodes`);
|
||||
const spaBody = await spa.text();
|
||||
if (!spa.ok || !spaBody.includes('ModelGrid')) {
|
||||
throw new Error(`[${source}] SPA fallback did not return index.html (status=${spa.status})`);
|
||||
}
|
||||
|
||||
const overview = await fetch(`http://127.0.0.1:${port}/_ui/overview`);
|
||||
const data = await overview.json();
|
||||
if (!overview.ok || data.node?.name !== 'test-node' || !data.health?.status) {
|
||||
throw new Error(`[${source}] /_ui/overview unexpected: ${JSON.stringify(data)}`);
|
||||
}
|
||||
|
||||
const missing = await fetch(`http://127.0.0.1:${port}/nope.png`);
|
||||
if (missing.status !== 404) {
|
||||
throw new Error(`[${source}] expected 404 for missing asset, got ${missing.status}`);
|
||||
}
|
||||
|
||||
console.log(`ok: ${source} mode — index, app.js, SPA fallback, /_ui/overview, 404`);
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
}
|
||||
|
||||
await probe('bundle', 18081);
|
||||
await probe('disk', 18082);
|
||||
console.log('UI server smoke test passed');
|
||||
@@ -0,0 +1,55 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
||||
import { UiServer } from '../ts/ui/server.ts';
|
||||
|
||||
Deno.test('UiServer overview mirrors degraded API health semantics', async () => {
|
||||
const port = 20300 + Math.floor(Math.random() * 1000);
|
||||
const cluster = new ClusterManager();
|
||||
cluster.configure({
|
||||
enabled: false,
|
||||
nodeName: 'ui-test-node',
|
||||
role: 'standalone',
|
||||
bindHost: '127.0.0.1',
|
||||
gossipPort: 7946,
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
});
|
||||
|
||||
const server = new UiServer(
|
||||
{ enabled: true, port, host: '127.0.0.1', assetSource: 'disk' },
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map([
|
||||
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||
]);
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map();
|
||||
},
|
||||
} as never,
|
||||
cluster,
|
||||
);
|
||||
|
||||
(server as unknown as {
|
||||
gpuDetector: { detectGpus: () => Promise<unknown[]> };
|
||||
}).gpuDetector = {
|
||||
async detectGpus() {
|
||||
return [{ id: 'nvidia-0' }];
|
||||
},
|
||||
};
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/_ui/overview`);
|
||||
const body = await response.json();
|
||||
|
||||
assertEquals(response.status, 200);
|
||||
assertEquals(body.health.status, 'degraded');
|
||||
assertEquals(body.health.reasons.includes('unhealthy_container'), true);
|
||||
assertEquals(body.health.reasons.includes('no_models_available'), true);
|
||||
assertEquals(body.node.name, 'ui-test-node');
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
+16
-1
@@ -6,6 +6,8 @@ import * as http from 'node:http';
|
||||
import type { IApiError, IChatCompletionRequest } from '../../interfaces/api.ts';
|
||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { UpstreamTimeoutError } from '../../containers/base-container.ts';
|
||||
import { API_SERVER } from '../../constants.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
import { ModelLoader } from '../../models/loader.ts';
|
||||
@@ -85,6 +87,11 @@ export class ChatHandler {
|
||||
|
||||
await this.proxyChatRequest(req, res, ensured.location.endpoint, requestBody);
|
||||
} catch (error) {
|
||||
if (error instanceof UpstreamTimeoutError) {
|
||||
this.sendError(res, 504, error.message, 'upstream_timeout');
|
||||
return;
|
||||
}
|
||||
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Chat completion error: ${message}`);
|
||||
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||||
@@ -158,11 +165,19 @@ export class ChatHandler {
|
||||
targetEndpoint: string,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||
const response = await fetch(`${targetEndpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: this.buildForwardHeaders(req),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
signal: controller.signal,
|
||||
}).catch((error) => {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
throw new UpstreamTimeoutError();
|
||||
}
|
||||
throw error;
|
||||
}).finally(() => clearTimeout(timeout));
|
||||
|
||||
if (body.stream) {
|
||||
res.writeHead(response.status, {
|
||||
|
||||
@@ -11,6 +11,8 @@ import type {
|
||||
} from '../../interfaces/api.ts';
|
||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { UpstreamTimeoutError } from '../../containers/base-container.ts';
|
||||
import { API_SERVER } from '../../constants.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
@@ -80,7 +82,7 @@ export class EmbeddingsHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await fetch(`${ensured.location.endpoint}/v1/embeddings`, {
|
||||
const response = await this.fetchWithTimeout(`${ensured.location.endpoint}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: this.buildForwardHeaders(req),
|
||||
body: JSON.stringify(requestBody),
|
||||
@@ -92,6 +94,11 @@ export class EmbeddingsHandler {
|
||||
});
|
||||
res.end(text);
|
||||
} catch (error) {
|
||||
if (error instanceof UpstreamTimeoutError) {
|
||||
this.sendError(res, 504, error.message, 'upstream_timeout');
|
||||
return;
|
||||
}
|
||||
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Embeddings error: ${message}`);
|
||||
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
||||
@@ -159,7 +166,7 @@ export class EmbeddingsHandler {
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/v1/embeddings`, {
|
||||
const response = await this.fetchWithTimeout(`${endpoint}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model, input }),
|
||||
@@ -181,7 +188,7 @@ export class EmbeddingsHandler {
|
||||
_model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/embed`, {
|
||||
const response = await this.fetchWithTimeout(`${endpoint}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ inputs: input }),
|
||||
@@ -214,6 +221,25 @@ export class EmbeddingsHandler {
|
||||
return headers;
|
||||
}
|
||||
|
||||
private async fetchWithTimeout(url: string, init: RequestInit): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
return await fetch(url, {
|
||||
...init,
|
||||
signal: controller.signal,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
throw new UpstreamTimeoutError();
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
|
||||
+64
-17
@@ -17,6 +17,19 @@ import { EmbeddingsHandler } from './handlers/embeddings.ts';
|
||||
import { AuthMiddleware } from './middleware/auth.ts';
|
||||
import { SanityMiddleware } from './middleware/sanity.ts';
|
||||
|
||||
interface IParsedRequestBody {
|
||||
kind: 'ok' | 'invalid' | 'too_large';
|
||||
body?: unknown;
|
||||
}
|
||||
|
||||
interface IApiRouterOptions {
|
||||
chatHandler?: ChatHandler;
|
||||
modelsHandler?: ModelsHandler;
|
||||
embeddingsHandler?: EmbeddingsHandler;
|
||||
authMiddleware?: AuthMiddleware;
|
||||
sanityMiddleware?: SanityMiddleware;
|
||||
}
|
||||
|
||||
/**
|
||||
* API Router - routes requests to handlers
|
||||
*/
|
||||
@@ -37,6 +50,7 @@ export class ApiRouter {
|
||||
modelLoader: ModelLoader,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
apiKeys: string[],
|
||||
options: IApiRouterOptions = {},
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
@@ -44,22 +58,23 @@ export class ApiRouter {
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
|
||||
// Initialize handlers
|
||||
this.chatHandler = new ChatHandler(
|
||||
this.chatHandler = options.chatHandler || new ChatHandler(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
modelLoader,
|
||||
clusterCoordinator,
|
||||
);
|
||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
|
||||
this.embeddingsHandler = new EmbeddingsHandler(
|
||||
this.modelsHandler =
|
||||
options.modelsHandler || new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
|
||||
this.embeddingsHandler = options.embeddingsHandler || new EmbeddingsHandler(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
clusterCoordinator,
|
||||
);
|
||||
|
||||
// Initialize middleware
|
||||
this.authMiddleware = new AuthMiddleware(apiKeys);
|
||||
this.sanityMiddleware = new SanityMiddleware(modelRegistry);
|
||||
this.authMiddleware = options.authMiddleware || new AuthMiddleware(apiKeys);
|
||||
this.sanityMiddleware = options.sanityMiddleware || new SanityMiddleware(modelRegistry);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -119,11 +134,16 @@ export class ApiRouter {
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
const parsedBody = await this.parseRequestBody(req);
|
||||
if (parsedBody.kind === 'too_large') {
|
||||
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
if (parsedBody.kind !== 'ok') {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
const body = parsedBody.body;
|
||||
|
||||
// Validate request
|
||||
const validation = this.sanityMiddleware.validateChatRequest(body);
|
||||
@@ -155,11 +175,16 @@ export class ApiRouter {
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
const parsedBody = await this.parseRequestBody(req);
|
||||
if (parsedBody.kind === 'too_large') {
|
||||
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
if (parsedBody.kind !== 'ok') {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
const body = parsedBody.body;
|
||||
|
||||
// Convert to chat format and handle
|
||||
const chatBody = this.convertCompletionToChat(body as Record<string, unknown>);
|
||||
@@ -229,11 +254,16 @@ export class ApiRouter {
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
const parsedBody = await this.parseRequestBody(req);
|
||||
if (parsedBody.kind === 'too_large') {
|
||||
this.sendError(res, 413, 'Request body too large', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
if (parsedBody.kind !== 'ok') {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
const body = parsedBody.body;
|
||||
|
||||
const validation = this.sanityMiddleware.validateEmbeddingsRequest(body);
|
||||
if (!validation.valid) {
|
||||
@@ -250,28 +280,45 @@ export class ApiRouter {
|
||||
/**
|
||||
* Parse request body
|
||||
*/
|
||||
private async parseRequestBody(req: http.IncomingMessage): Promise<unknown | null> {
|
||||
private async parseRequestBody(req: http.IncomingMessage): Promise<IParsedRequestBody> {
|
||||
return new Promise((resolve) => {
|
||||
let body = '';
|
||||
let resolved = false;
|
||||
|
||||
const finish = (result: IParsedRequestBody): void => {
|
||||
if (resolved) {
|
||||
return;
|
||||
}
|
||||
resolved = true;
|
||||
resolve(result);
|
||||
};
|
||||
|
||||
req.on('data', (chunk) => {
|
||||
if (resolved) {
|
||||
return;
|
||||
}
|
||||
|
||||
body += chunk.toString();
|
||||
// Limit body size
|
||||
|
||||
if (body.length > 10 * 1024 * 1024) {
|
||||
resolve(null);
|
||||
req.pause();
|
||||
req.destroy();
|
||||
finish({ kind: 'too_large' });
|
||||
}
|
||||
});
|
||||
|
||||
req.on('end', () => {
|
||||
try {
|
||||
resolve(JSON.parse(body));
|
||||
finish({ kind: 'ok', body: JSON.parse(body) });
|
||||
} catch {
|
||||
resolve(null);
|
||||
finish({ kind: 'invalid' });
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => {
|
||||
resolve(null);
|
||||
if (!resolved) {
|
||||
finish({ kind: 'invalid' });
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
+126
-35
@@ -16,6 +16,13 @@ import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { ClusterHandler } from './handlers/cluster.ts';
|
||||
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||
|
||||
interface IApiServerOptions {
|
||||
gpuDetector?: GpuDetector;
|
||||
router?: ApiRouter;
|
||||
clusterHandler?: ClusterHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* API Server for ModelGrid
|
||||
@@ -31,6 +38,10 @@ export class ApiServer {
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
private clusterHandler: ClusterHandler;
|
||||
private startTime: number = 0;
|
||||
private requestCounts = new Map<string, number>();
|
||||
private authFailureCounts = new Map<string, number>();
|
||||
private serverErrorCounts = new Map<string, number>();
|
||||
private rateLimitBuckets = new Map<string, { count: number; windowStart: number }>();
|
||||
|
||||
constructor(
|
||||
config: IApiConfig,
|
||||
@@ -38,15 +49,16 @@ export class ApiServer {
|
||||
modelRegistry: ModelRegistry,
|
||||
modelLoader: ModelLoader,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
options: IApiServerOptions = {},
|
||||
) {
|
||||
this.config = config;
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.gpuDetector = options.gpuDetector || new GpuDetector();
|
||||
this.modelLoader = modelLoader;
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
this.clusterHandler = new ClusterHandler(clusterCoordinator);
|
||||
this.router = new ApiRouter(
|
||||
this.clusterHandler = options.clusterHandler || new ClusterHandler(clusterCoordinator);
|
||||
this.router = options.router || new ApiRouter(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
this.modelLoader,
|
||||
@@ -112,6 +124,7 @@ export class ApiServer {
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const requestId = this.ensureRequestId(req, res);
|
||||
|
||||
// Set CORS headers if enabled
|
||||
if (this.config.cors) {
|
||||
@@ -131,18 +144,27 @@ export class ApiServer {
|
||||
|
||||
if (path.startsWith('/_cluster')) {
|
||||
await this.clusterHandler.handle(req, res, path, url);
|
||||
this.recordRequest(path, res.statusCode);
|
||||
return;
|
||||
}
|
||||
|
||||
// Health check endpoint (no auth required)
|
||||
if (path === '/health' || path === '/healthz') {
|
||||
await this.handleHealthCheck(res);
|
||||
this.recordRequest(path, res.statusCode);
|
||||
return;
|
||||
}
|
||||
|
||||
// Metrics endpoint (no auth required)
|
||||
if (path === '/metrics') {
|
||||
await this.handleMetrics(res);
|
||||
this.recordRequest(path, res.statusCode);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.isRequestWithinRateLimit(req)) {
|
||||
this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded');
|
||||
this.recordRequest(path, res.statusCode);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -156,7 +178,8 @@ export class ApiServer {
|
||||
|
||||
// Log request
|
||||
const duration = Date.now() - startTime;
|
||||
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||||
this.recordRequest(path, res.statusCode);
|
||||
logger.dim(`[${requestId}] ${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -187,44 +210,21 @@ export class ApiServer {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
// Check container health
|
||||
for (const [id, containerStatus] of statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
}
|
||||
}
|
||||
|
||||
// Check GPU status
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
const response: IHealthResponse = {
|
||||
status,
|
||||
const response: IHealthResponse = buildHealthSnapshot({
|
||||
statuses,
|
||||
modelCount: models.size,
|
||||
gpus,
|
||||
startTime: this.startTime,
|
||||
version: VERSION,
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response, null, 2));
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
status: 'error',
|
||||
reasons: ['gpu_detection_failed'],
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
}));
|
||||
}
|
||||
@@ -268,6 +268,28 @@ export class ApiServer {
|
||||
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
||||
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
||||
|
||||
for (const [path, count] of this.requestCounts.entries()) {
|
||||
metrics.push(`# HELP modelgrid_api_requests_total Total API requests by path`);
|
||||
metrics.push(`# TYPE modelgrid_api_requests_total counter`);
|
||||
metrics.push(`modelgrid_api_requests_total{path="${this.escapeMetricLabel(path)}"} ${count}`);
|
||||
}
|
||||
|
||||
for (const [path, count] of this.authFailureCounts.entries()) {
|
||||
metrics.push(`# HELP modelgrid_api_auth_failures_total Total authentication failures by path`);
|
||||
metrics.push(`# TYPE modelgrid_api_auth_failures_total counter`);
|
||||
metrics.push(
|
||||
`modelgrid_api_auth_failures_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const [path, count] of this.serverErrorCounts.entries()) {
|
||||
metrics.push(`# HELP modelgrid_api_server_errors_total Total 5xx responses by path`);
|
||||
metrics.push(`# TYPE modelgrid_api_server_errors_total counter`);
|
||||
metrics.push(
|
||||
`modelgrid_api_server_errors_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
|
||||
);
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
||||
res.end(metrics.join('\n') + '\n');
|
||||
} catch (error) {
|
||||
@@ -310,4 +332,73 @@ export class ApiServer {
|
||||
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
||||
};
|
||||
}
|
||||
|
||||
private recordRequest(path: string, statusCode: number): void {
|
||||
this.incrementMetric(this.requestCounts, path);
|
||||
|
||||
if (statusCode === 401) {
|
||||
this.incrementMetric(this.authFailureCounts, path);
|
||||
}
|
||||
|
||||
if (statusCode >= 500) {
|
||||
this.incrementMetric(this.serverErrorCounts, path);
|
||||
}
|
||||
}
|
||||
|
||||
private isRequestWithinRateLimit(req: http.IncomingMessage): boolean {
|
||||
const configuredLimit = this.config.rateLimit;
|
||||
if (!configuredLimit || configuredLimit <= 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const key = this.getRateLimitKey(req);
|
||||
const now = Date.now();
|
||||
const windowMs = 60 * 1000;
|
||||
const bucket = this.rateLimitBuckets.get(key);
|
||||
|
||||
if (!bucket || now - bucket.windowStart >= windowMs) {
|
||||
this.rateLimitBuckets.set(key, { count: 1, windowStart: now });
|
||||
return true;
|
||||
}
|
||||
|
||||
if (bucket.count >= configuredLimit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bucket.count += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
private getRateLimitKey(req: http.IncomingMessage): string {
|
||||
if (typeof req.headers.authorization === 'string') {
|
||||
const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i);
|
||||
if (match) {
|
||||
return `api_key:${match[1]}`;
|
||||
}
|
||||
}
|
||||
|
||||
return `ip:${req.socket.remoteAddress || 'unknown'}`;
|
||||
}
|
||||
|
||||
private incrementMetric(metric: Map<string, number>, path: string): void {
|
||||
metric.set(path, (metric.get(path) || 0) + 1);
|
||||
}
|
||||
|
||||
private ensureRequestId(req: http.IncomingMessage, res: http.ServerResponse): string {
|
||||
const existing = typeof req.headers['x-request-id'] === 'string'
|
||||
? req.headers['x-request-id']
|
||||
: undefined;
|
||||
const requestId = existing || this.generateRequestId();
|
||||
req.headers['x-request-id'] = requestId;
|
||||
res.setHeader('X-Request-Id', requestId);
|
||||
return requestId;
|
||||
}
|
||||
|
||||
private generateRequestId(): string {
|
||||
return `req-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
private escapeMetricLabel(value: string): string {
|
||||
return value.replaceAll('\\', '\\\\').replaceAll('"', '\\"');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,11 +26,8 @@ export class ConfigHandler {
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
const modelConfig = {
|
||||
registryUrl: config.models.registryUrl ||
|
||||
(config.models as { greenlistUrl?: string }).greenlistUrl ||
|
||||
'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models.autoDeploy ??
|
||||
(config.models as { autoPull?: boolean }).autoPull ?? true,
|
||||
registryUrl: config.models.registryUrl || 'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models.autoDeploy ?? true,
|
||||
defaultEngine: config.models.defaultEngine || 'vllm',
|
||||
autoLoad: config.models.autoLoad || [],
|
||||
};
|
||||
@@ -218,6 +215,12 @@ export class ConfigHandler {
|
||||
cors: true,
|
||||
corsOrigins: ['*'],
|
||||
},
|
||||
ui: {
|
||||
enabled: true,
|
||||
port: 8081,
|
||||
host: '0.0.0.0',
|
||||
assetSource: 'bundle',
|
||||
},
|
||||
docker: {
|
||||
networkName: 'modelgrid',
|
||||
runtime: 'docker',
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os from 'node:os';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
|
||||
import type {
|
||||
@@ -14,9 +13,10 @@ import type {
|
||||
import { CLUSTER, PATHS } from '../constants.ts';
|
||||
|
||||
export class ClusterManager {
|
||||
private initialized = false;
|
||||
private config: IClusterConfig = {
|
||||
enabled: false,
|
||||
nodeName: os.hostname(),
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: CLUSTER.DEFAULT_BIND_HOST,
|
||||
gossipPort: CLUSTER.DEFAULT_GOSSIP_PORT,
|
||||
@@ -64,6 +64,8 @@ export class ClusterManager {
|
||||
} catch {
|
||||
// No persisted control state yet.
|
||||
}
|
||||
|
||||
this.initialized = true;
|
||||
}
|
||||
|
||||
public configure(config: IClusterConfig): void {
|
||||
@@ -385,6 +387,10 @@ export class ClusterManager {
|
||||
}
|
||||
|
||||
private schedulePersist(): void {
|
||||
if (!this.initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.persistQueued) {
|
||||
return;
|
||||
}
|
||||
@@ -397,6 +403,10 @@ export class ClusterManager {
|
||||
}
|
||||
|
||||
private scheduleControlPersist(): void {
|
||||
if (!this.initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.controlPersistQueued) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { PATHS, VERSION } from '../constants.ts';
|
||||
import type { IModelGridConfig } from '../interfaces/config.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
export class ConfigManager {
|
||||
public async loadConfig(): Promise<IModelGridConfig> {
|
||||
try {
|
||||
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
||||
return this.normalizeConfig(JSON.parse(configContent) as Partial<IModelGridConfig>);
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
public async saveConfig(config: IModelGridConfig): Promise<void> {
|
||||
await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true });
|
||||
await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(config, null, 2));
|
||||
}
|
||||
|
||||
public normalizeConfig(config: Partial<IModelGridConfig>): IModelGridConfig {
|
||||
this.logIgnoredConfigKeys(config);
|
||||
|
||||
const filteredContainers = (config.containers || []).filter(
|
||||
(container) => (container as { type?: string }).type !== 'ollama',
|
||||
);
|
||||
|
||||
return {
|
||||
version: config.version || VERSION,
|
||||
api: {
|
||||
port: config.api?.port || 8080,
|
||||
host: config.api?.host || '0.0.0.0',
|
||||
apiKeys: config.api?.apiKeys || [],
|
||||
rateLimit: config.api?.rateLimit,
|
||||
cors: config.api?.cors ?? true,
|
||||
corsOrigins: config.api?.corsOrigins || ['*'],
|
||||
},
|
||||
ui: {
|
||||
enabled: config.ui?.enabled ?? true,
|
||||
port: config.ui?.port || 8081,
|
||||
host: config.ui?.host || '0.0.0.0',
|
||||
assetSource: config.ui?.assetSource === 'disk' ? 'disk' : 'bundle',
|
||||
},
|
||||
docker: {
|
||||
networkName: config.docker?.networkName || 'modelgrid',
|
||||
runtime: config.docker?.runtime || 'docker',
|
||||
socketPath: config.docker?.socketPath,
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: config.gpus?.autoDetect ?? true,
|
||||
assignments: config.gpus?.assignments || {},
|
||||
},
|
||||
containers: filteredContainers,
|
||||
models: {
|
||||
registryUrl: config.models?.registryUrl || 'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models?.autoDeploy ?? true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: config.models?.autoLoad || [],
|
||||
},
|
||||
cluster: {
|
||||
enabled: config.cluster?.enabled ?? false,
|
||||
nodeName: config.cluster?.nodeName || 'modelgrid-local',
|
||||
role: config.cluster?.role || 'standalone',
|
||||
bindHost: config.cluster?.bindHost || '0.0.0.0',
|
||||
gossipPort: config.cluster?.gossipPort || 7946,
|
||||
sharedSecret: config.cluster?.sharedSecret,
|
||||
advertiseUrl: config.cluster?.advertiseUrl,
|
||||
controlPlaneUrl: config.cluster?.controlPlaneUrl,
|
||||
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
|
||||
seedNodes: config.cluster?.seedNodes || [],
|
||||
},
|
||||
checkInterval: config.checkInterval || 30000,
|
||||
};
|
||||
}
|
||||
|
||||
private logIgnoredConfigKeys(config: Partial<IModelGridConfig>): void {
|
||||
const unknownTopLevelKeys = Object.keys(config).filter((key) =>
|
||||
!['version', 'api', 'ui', 'docker', 'gpus', 'containers', 'models', 'cluster', 'checkInterval']
|
||||
.includes(key)
|
||||
);
|
||||
|
||||
for (const key of unknownTopLevelKeys) {
|
||||
logger.warn(`Ignoring unknown config key: ${key}`);
|
||||
}
|
||||
|
||||
const legacyModelConfig = config.models as {
|
||||
greenlistUrl?: string;
|
||||
autoPull?: boolean;
|
||||
defaultContainer?: string;
|
||||
} | undefined;
|
||||
|
||||
if (legacyModelConfig?.greenlistUrl) {
|
||||
logger.warn('Ignoring removed config key: models.greenlistUrl');
|
||||
}
|
||||
if (legacyModelConfig?.autoPull !== undefined) {
|
||||
logger.warn('Ignoring removed config key: models.autoPull');
|
||||
}
|
||||
if (legacyModelConfig?.defaultContainer) {
|
||||
logger.warn('Ignoring removed config key: models.defaultContainer');
|
||||
}
|
||||
|
||||
for (const container of config.containers || []) {
|
||||
const containerType = (container as { type?: string }).type;
|
||||
if (containerType === 'ollama') {
|
||||
logger.warn('Ignoring unsupported container type: ollama');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import type {
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts';
|
||||
import { API_SERVER } from '../constants.ts';
|
||||
import { ContainerRuntime } from '../docker/container-runtime.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
@@ -23,6 +24,13 @@ export type TModelPullProgress = (progress: {
|
||||
percent?: number;
|
||||
}) => void;
|
||||
|
||||
export class UpstreamTimeoutError extends Error {
|
||||
constructor(message: string = 'Upstream request timed out') {
|
||||
super(message);
|
||||
this.name = 'UpstreamTimeoutError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract base class for AI model containers
|
||||
*/
|
||||
@@ -165,7 +173,7 @@ export abstract class BaseContainer {
|
||||
const url = `${endpoint}${path}`;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = options.timeout || 30000;
|
||||
const timeout = options.timeout || API_SERVER.REQUEST_TIMEOUT_MS;
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
@@ -180,6 +188,11 @@ export abstract class BaseContainer {
|
||||
});
|
||||
|
||||
return response;
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
throw new UpstreamTimeoutError();
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
|
||||
+33
-4
@@ -9,6 +9,7 @@ import { logger } from './logger.ts';
|
||||
import { TIMING } from './constants.ts';
|
||||
import type { ModelGrid } from './modelgrid.ts';
|
||||
import { ApiServer } from './api/server.ts';
|
||||
import { UiServer } from './ui/server.ts';
|
||||
import type { IModelGridConfig } from './interfaces/config.ts';
|
||||
|
||||
/**
|
||||
@@ -18,6 +19,7 @@ export class Daemon {
|
||||
private modelgrid: ModelGrid;
|
||||
private isRunning: boolean = false;
|
||||
private apiServer?: ApiServer;
|
||||
private uiServer?: UiServer;
|
||||
|
||||
constructor(modelgrid: ModelGrid) {
|
||||
this.modelgrid = modelgrid;
|
||||
@@ -48,6 +50,9 @@ export class Daemon {
|
||||
// Start API server
|
||||
await this.startApiServer(config);
|
||||
|
||||
// Start UI server (runs on its own port, serves the operations console)
|
||||
await this.startUiServer(config);
|
||||
|
||||
// Start containers
|
||||
await this.startContainers();
|
||||
|
||||
@@ -67,10 +72,9 @@ export class Daemon {
|
||||
await this.monitor();
|
||||
} catch (error) {
|
||||
this.isRunning = false;
|
||||
logger.error(
|
||||
`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
process.exit(1);
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Daemon failed to start: ${message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,6 +90,11 @@ export class Daemon {
|
||||
|
||||
this.isRunning = false;
|
||||
|
||||
// Stop UI server
|
||||
if (this.uiServer) {
|
||||
await this.uiServer.stop();
|
||||
}
|
||||
|
||||
// Stop API server
|
||||
if (this.apiServer) {
|
||||
await this.apiServer.stop();
|
||||
@@ -114,6 +123,26 @@ export class Daemon {
|
||||
await this.apiServer.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the UI server, if enabled.
|
||||
*/
|
||||
private async startUiServer(config: IModelGridConfig): Promise<void> {
|
||||
if (!config.ui.enabled) {
|
||||
logger.dim('UI server disabled in configuration');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info('Starting UI server...');
|
||||
|
||||
this.uiServer = new UiServer(
|
||||
config.ui,
|
||||
this.modelgrid.getContainerManager(),
|
||||
this.modelgrid.getClusterManager(),
|
||||
);
|
||||
|
||||
await this.uiServer.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start configured containers
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||
import type { IContainerStatus } from '../interfaces/container.ts';
|
||||
import type { IGpuInfo } from '../interfaces/gpu.ts';
|
||||
|
||||
export function buildHealthSnapshot(options: {
|
||||
statuses: Map<string, IContainerStatus>;
|
||||
modelCount: number;
|
||||
gpus: IGpuInfo[];
|
||||
startTime: number;
|
||||
version: string;
|
||||
}): IHealthResponse {
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
for (const [id, containerStatus] of options.statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
reasons.add('unhealthy_container');
|
||||
}
|
||||
}
|
||||
|
||||
for (const gpu of options.gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
if (options.modelCount === 0) {
|
||||
status = 'degraded';
|
||||
reasons.add('no_models_available');
|
||||
}
|
||||
|
||||
return {
|
||||
status,
|
||||
reasons: Array.from(reasons),
|
||||
version: options.version,
|
||||
uptime: Math.floor((Date.now() - options.startTime) / 1000),
|
||||
containers: options.statuses.size,
|
||||
models: options.modelCount,
|
||||
gpus: options.gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -309,6 +309,8 @@ export interface IApiError {
|
||||
export interface IHealthResponse {
|
||||
/** Status */
|
||||
status: 'ok' | 'degraded' | 'error';
|
||||
/** Machine-readable reasons for degraded or error states */
|
||||
reasons?: Array<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>;
|
||||
/** Version */
|
||||
version: string;
|
||||
/** Uptime in seconds */
|
||||
|
||||
@@ -60,6 +60,28 @@ export interface IModelConfig {
|
||||
autoLoad: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Browser-based operations console (UI) configuration.
|
||||
* The UI is served on its own port, distinct from the OpenAI API port,
|
||||
* so that the data plane stays clean.
|
||||
*/
|
||||
export interface IUiConfig {
|
||||
/** Whether to start the UI server alongside the API */
|
||||
enabled: boolean;
|
||||
/** Port to bind the UI server to (default: 8081) */
|
||||
port: number;
|
||||
/** Host to bind the UI server to (default: '0.0.0.0') */
|
||||
host: string;
|
||||
/**
|
||||
* Where UI assets come from.
|
||||
* - 'bundle': from the compiled-in `ts_bundled/bundle.ts` (default, required
|
||||
* for `deno compile` single-binary builds)
|
||||
* - 'disk': read on demand from `ts_web/` for the dev loop
|
||||
* Overridden at runtime by the `UI_ASSET_SOURCE` env var.
|
||||
*/
|
||||
assetSource: 'bundle' | 'disk';
|
||||
}
|
||||
|
||||
/**
|
||||
* Main ModelGrid configuration interface
|
||||
*/
|
||||
@@ -68,6 +90,8 @@ export interface IModelGridConfig {
|
||||
version: string;
|
||||
/** API server configuration */
|
||||
api: IApiConfig;
|
||||
/** UI server configuration */
|
||||
ui: IUiConfig;
|
||||
/** Docker configuration */
|
||||
docker: IDockerConfig;
|
||||
/** GPU configuration */
|
||||
|
||||
+7
-76
@@ -24,7 +24,7 @@ import { ClusterHandler } from './cli/cluster-handler.ts';
|
||||
import { ModelHandler } from './cli/model-handler.ts';
|
||||
import { ConfigHandler } from './cli/config-handler.ts';
|
||||
import { ServiceHandler } from './cli/service-handler.ts';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { ConfigManager } from './config/config-manager.ts';
|
||||
|
||||
/**
|
||||
* ModelGrid - Main application coordinator
|
||||
@@ -42,6 +42,7 @@ export class ModelGrid {
|
||||
private clusterCoordinator?: ClusterCoordinator;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader?: ModelLoader;
|
||||
private configManager: ConfigManager;
|
||||
|
||||
// CLI Handlers
|
||||
private gpuHandler: GpuHandler;
|
||||
@@ -60,6 +61,7 @@ export class ModelGrid {
|
||||
this.containerManager = new ContainerManager();
|
||||
this.clusterManager = new ClusterManager();
|
||||
this.modelRegistry = new ModelRegistry();
|
||||
this.configManager = new ConfigManager();
|
||||
this.systemd = new Systemd();
|
||||
this.daemon = new Daemon(this);
|
||||
|
||||
@@ -80,23 +82,8 @@ export class ModelGrid {
|
||||
* Load configuration from file
|
||||
*/
|
||||
public async loadConfig(): Promise<void> {
|
||||
try {
|
||||
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
||||
this.config = this.normalizeConfig(
|
||||
JSON.parse(configContent) as Partial<IModelGridConfig> & {
|
||||
models?: {
|
||||
greenlistUrl?: string;
|
||||
autoPull?: boolean;
|
||||
} & Partial<IModelGridConfig['models']>;
|
||||
},
|
||||
);
|
||||
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
this.config = await this.configManager.loadConfig();
|
||||
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -107,8 +94,7 @@ export class ModelGrid {
|
||||
throw new Error('No configuration to save');
|
||||
}
|
||||
|
||||
await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true });
|
||||
await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(this.config, null, 2));
|
||||
await this.configManager.saveConfig(this.config);
|
||||
logger.dim(`Configuration saved to ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
|
||||
@@ -270,7 +256,7 @@ export class ModelGrid {
|
||||
|
||||
// Initialize containers from config
|
||||
for (const containerConfig of this.config.containers) {
|
||||
await this.containerManager.addContainer(containerConfig);
|
||||
this.containerManager.addContainer(containerConfig);
|
||||
}
|
||||
|
||||
// Initialize model registry
|
||||
@@ -294,61 +280,6 @@ export class ModelGrid {
|
||||
logger.success('ModelGrid initialized');
|
||||
}
|
||||
|
||||
private normalizeConfig(
|
||||
config: Partial<IModelGridConfig> & {
|
||||
models?: {
|
||||
greenlistUrl?: string;
|
||||
autoPull?: boolean;
|
||||
} & Partial<IModelGridConfig['models']>;
|
||||
},
|
||||
): IModelGridConfig {
|
||||
const filteredContainers = (config.containers || []).filter(
|
||||
(container) => (container as { type?: string }).type !== 'ollama',
|
||||
);
|
||||
|
||||
return {
|
||||
version: config.version || VERSION,
|
||||
api: {
|
||||
port: config.api?.port || 8080,
|
||||
host: config.api?.host || '0.0.0.0',
|
||||
apiKeys: config.api?.apiKeys || [],
|
||||
rateLimit: config.api?.rateLimit,
|
||||
cors: config.api?.cors ?? true,
|
||||
corsOrigins: config.api?.corsOrigins || ['*'],
|
||||
},
|
||||
docker: {
|
||||
networkName: config.docker?.networkName || 'modelgrid',
|
||||
runtime: config.docker?.runtime || 'docker',
|
||||
socketPath: config.docker?.socketPath,
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: config.gpus?.autoDetect ?? true,
|
||||
assignments: config.gpus?.assignments || {},
|
||||
},
|
||||
containers: filteredContainers,
|
||||
models: {
|
||||
registryUrl: config.models?.registryUrl || config.models?.greenlistUrl ||
|
||||
'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models?.autoDeploy ?? config.models?.autoPull ?? true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: config.models?.autoLoad || [],
|
||||
},
|
||||
cluster: {
|
||||
enabled: config.cluster?.enabled ?? false,
|
||||
nodeName: config.cluster?.nodeName || 'modelgrid-local',
|
||||
role: config.cluster?.role || 'standalone',
|
||||
bindHost: config.cluster?.bindHost || '0.0.0.0',
|
||||
gossipPort: config.cluster?.gossipPort || 7946,
|
||||
sharedSecret: config.cluster?.sharedSecret,
|
||||
advertiseUrl: config.cluster?.advertiseUrl,
|
||||
controlPlaneUrl: config.cluster?.controlPlaneUrl,
|
||||
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
|
||||
seedNodes: config.cluster?.seedNodes || [],
|
||||
},
|
||||
checkInterval: config.checkInterval || 30000,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the ModelGrid system
|
||||
*/
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
import * as fs from 'node:fs/promises';
|
||||
import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts';
|
||||
import { MODEL_REGISTRY, TIMING } from '../constants.ts';
|
||||
import { API_SERVER, MODEL_REGISTRY, TIMING } from '../constants.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
export class ModelRegistry {
|
||||
@@ -167,7 +167,7 @@ export class ModelRegistry {
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 30000);
|
||||
const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
const response = await fetch(source, {
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
export { UiServer } from './server.ts';
|
||||
+317
@@ -0,0 +1,317 @@
|
||||
/**
|
||||
* UI Server
|
||||
*
|
||||
* Serves the ModelGrid operations console on its own port, separate from
|
||||
* the OpenAI-compatible API. Assets come from one of two sources:
|
||||
* - 'disk': read on demand from `ts_web/` (dev loop, hot edits)
|
||||
* - 'bundle': from the generated `ts_bundled/bundle.ts` module
|
||||
* (default, required for `deno compile` single-binary builds)
|
||||
*
|
||||
* Plus a single JSON endpoint `/_ui/overview` that the SPA calls to render
|
||||
* the Overview view without cross-origin fetches into the API server.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { dirname, extname, join, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import type { IUiConfig } from '../interfaces/config.ts';
|
||||
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { VERSION } from '../constants.ts';
|
||||
import type { ContainerManager } from '../containers/container-manager.ts';
|
||||
import type { ClusterManager } from '../cluster/cluster-manager.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||
|
||||
interface IBundledFile {
|
||||
path: string;
|
||||
contentBase64: string;
|
||||
}
|
||||
|
||||
interface IAssetEntry {
|
||||
bytes: Uint8Array;
|
||||
contentType: string;
|
||||
}
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const REPO_ROOT = resolve(__dirname, '..', '..');
|
||||
const TS_WEB_DIR = join(REPO_ROOT, 'ts_web');
|
||||
|
||||
export class UiServer {
|
||||
private server?: http.Server;
|
||||
private config: IUiConfig;
|
||||
private containerManager: ContainerManager;
|
||||
private clusterManager: ClusterManager;
|
||||
private gpuDetector: GpuDetector;
|
||||
private bundleMap: Map<string, IAssetEntry> | null = null;
|
||||
private activeAssetSource: 'disk' | 'bundle' = 'bundle';
|
||||
private startTime = 0;
|
||||
|
||||
constructor(
|
||||
config: IUiConfig,
|
||||
containerManager: ContainerManager,
|
||||
clusterManager: ClusterManager,
|
||||
) {
|
||||
this.config = config;
|
||||
this.containerManager = containerManager;
|
||||
this.clusterManager = clusterManager;
|
||||
this.gpuDetector = new GpuDetector();
|
||||
}
|
||||
|
||||
public async start(): Promise<void> {
|
||||
if (this.server) {
|
||||
logger.warn('UI server is already running');
|
||||
return;
|
||||
}
|
||||
|
||||
this.activeAssetSource = this.resolveAssetSource();
|
||||
if (this.activeAssetSource === 'bundle') {
|
||||
this.bundleMap = await this.loadBundleMap();
|
||||
if (!this.bundleMap) {
|
||||
logger.warn(
|
||||
'UI bundle not found (ts_bundled/bundle.ts missing). ' +
|
||||
'Falling back to disk mode — run `deno task bundle:ui` before `deno compile`.',
|
||||
);
|
||||
this.activeAssetSource = 'disk';
|
||||
}
|
||||
}
|
||||
|
||||
this.startTime = Date.now();
|
||||
|
||||
this.server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await this.handleRequest(req, res);
|
||||
} catch (err) {
|
||||
logger.error(`UI request error: ${err instanceof Error ? err.message : String(err)}`);
|
||||
if (!res.headersSent) {
|
||||
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||
res.end('Internal server error');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
this.server!.listen(this.config.port, this.config.host, () => {
|
||||
logger.success(
|
||||
`UI server started on ${this.config.host}:${this.config.port} ` +
|
||||
`(asset source: ${this.activeAssetSource})`,
|
||||
);
|
||||
resolve();
|
||||
});
|
||||
this.server!.on('error', (error) => {
|
||||
logger.error(`UI server error: ${error.message}`);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public async stop(): Promise<void> {
|
||||
if (!this.server) return;
|
||||
await new Promise<void>((resolve) => {
|
||||
this.server!.close(() => resolve());
|
||||
});
|
||||
this.server = undefined;
|
||||
logger.log('UI server stopped');
|
||||
}
|
||||
|
||||
public getInfo(): { running: boolean; host: string; port: number; assetSource: string } {
|
||||
return {
|
||||
running: !!this.server,
|
||||
host: this.config.host,
|
||||
port: this.config.port,
|
||||
assetSource: this.activeAssetSource,
|
||||
};
|
||||
}
|
||||
|
||||
private async handleRequest(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
const path = url.pathname;
|
||||
|
||||
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
||||
res.writeHead(405, { 'Content-Type': 'text/plain', 'Allow': 'GET, HEAD' });
|
||||
res.end('Method Not Allowed');
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/_ui/overview') {
|
||||
await this.handleOverview(res);
|
||||
return;
|
||||
}
|
||||
|
||||
await this.serveAsset(path, res);
|
||||
}
|
||||
|
||||
private async handleOverview(res: http.ServerResponse): Promise<void> {
|
||||
const statuses = await this.containerManager.getAllStatus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
const health: IHealthResponse = buildHealthSnapshot({
|
||||
statuses,
|
||||
modelCount: models.size,
|
||||
gpus,
|
||||
startTime: this.startTime,
|
||||
version: VERSION,
|
||||
});
|
||||
|
||||
const clusterConfig = this.clusterManager.getConfig();
|
||||
|
||||
const body = {
|
||||
health,
|
||||
node: {
|
||||
name: clusterConfig?.nodeName ?? 'modelgrid-local',
|
||||
role: clusterConfig?.role ?? 'standalone',
|
||||
version: VERSION,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
'Cache-Control': 'no-store',
|
||||
});
|
||||
res.end(JSON.stringify(body));
|
||||
}
|
||||
|
||||
private async serveAsset(path: string, res: http.ServerResponse): Promise<void> {
|
||||
const normalized = path === '/' ? '/index.html' : path;
|
||||
|
||||
if (this.activeAssetSource === 'bundle' && this.bundleMap) {
|
||||
const hit = this.bundleMap.get(normalized);
|
||||
if (hit) {
|
||||
this.writeAsset(res, hit);
|
||||
return;
|
||||
}
|
||||
// SPA fallback: any unknown non-asset path gets index.html.
|
||||
if (!hasKnownAssetExtension(normalized)) {
|
||||
const shell = this.bundleMap.get('/index.html');
|
||||
if (shell) {
|
||||
this.writeAsset(res, shell);
|
||||
return;
|
||||
}
|
||||
}
|
||||
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
||||
res.end('Not Found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Disk mode
|
||||
const safe = normalizePath(normalized);
|
||||
if (!safe) {
|
||||
res.writeHead(400, { 'Content-Type': 'text/plain' });
|
||||
res.end('Bad Request');
|
||||
return;
|
||||
}
|
||||
const full = join(TS_WEB_DIR, safe);
|
||||
try {
|
||||
const bytes = await fs.readFile(full);
|
||||
this.writeAsset(res, {
|
||||
bytes: new Uint8Array(bytes),
|
||||
contentType: contentTypeForPath(safe),
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
if (!hasKnownAssetExtension(safe)) {
|
||||
try {
|
||||
const shell = await fs.readFile(join(TS_WEB_DIR, 'index.html'));
|
||||
this.writeAsset(res, {
|
||||
bytes: new Uint8Array(shell),
|
||||
contentType: 'text/html; charset=utf-8',
|
||||
});
|
||||
return;
|
||||
} catch {
|
||||
// fall through to 404
|
||||
}
|
||||
}
|
||||
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
||||
res.end('Not Found');
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
private writeAsset(res: http.ServerResponse, asset: IAssetEntry): void {
|
||||
res.writeHead(200, {
|
||||
'Content-Type': asset.contentType,
|
||||
'Content-Length': asset.bytes.byteLength,
|
||||
'Cache-Control': 'no-cache',
|
||||
});
|
||||
res.end(asset.bytes);
|
||||
}
|
||||
|
||||
private resolveAssetSource(): 'disk' | 'bundle' {
|
||||
const envOverride = typeof Deno !== 'undefined' ? Deno.env.get('UI_ASSET_SOURCE') : undefined;
|
||||
const picked = (envOverride || this.config.assetSource || 'bundle').toLowerCase();
|
||||
if (picked === 'disk' || picked === 'bundle') return picked;
|
||||
logger.warn(`Unknown UI_ASSET_SOURCE "${picked}", defaulting to bundle`);
|
||||
return 'bundle';
|
||||
}
|
||||
|
||||
private async loadBundleMap(): Promise<Map<string, IAssetEntry> | null> {
|
||||
try {
|
||||
// The bundle module is generated by `deno task bundle:ui`.
|
||||
// @ts-ignore — generated file may not exist until the bundle task runs.
|
||||
const mod = await import('../../ts_bundled/bundle.ts');
|
||||
const files: IBundledFile[] = mod.files ?? [];
|
||||
const map = new Map<string, IAssetEntry>();
|
||||
for (const file of files) {
|
||||
map.set(`/${file.path}`, {
|
||||
bytes: decodeBase64(file.contentBase64),
|
||||
contentType: contentTypeForPath(file.path),
|
||||
});
|
||||
}
|
||||
return map;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function decodeBase64(input: string): Uint8Array {
|
||||
const binary = atob(input);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
function normalizePath(path: string): string | null {
|
||||
// Strip leading slashes, reject traversal.
|
||||
const stripped = path.replace(/^\/+/, '');
|
||||
if (stripped.includes('..')) return null;
|
||||
return stripped;
|
||||
}
|
||||
|
||||
function hasKnownAssetExtension(path: string): boolean {
|
||||
return extname(path) !== '';
|
||||
}
|
||||
|
||||
function contentTypeForPath(path: string): string {
|
||||
const ext = extname(path).toLowerCase().replace(/^\./, '');
|
||||
const types: Record<string, string> = {
|
||||
html: 'text/html; charset=utf-8',
|
||||
js: 'application/javascript; charset=utf-8',
|
||||
mjs: 'application/javascript; charset=utf-8',
|
||||
css: 'text/css; charset=utf-8',
|
||||
json: 'application/json; charset=utf-8',
|
||||
map: 'application/json; charset=utf-8',
|
||||
svg: 'image/svg+xml',
|
||||
png: 'image/png',
|
||||
jpg: 'image/jpeg',
|
||||
jpeg: 'image/jpeg',
|
||||
gif: 'image/gif',
|
||||
ico: 'image/x-icon',
|
||||
webp: 'image/webp',
|
||||
woff: 'font/woff',
|
||||
woff2: 'font/woff2',
|
||||
ttf: 'font/ttf',
|
||||
otf: 'font/otf',
|
||||
txt: 'text/plain; charset=utf-8',
|
||||
};
|
||||
return types[ext] || 'application/octet-stream';
|
||||
}
|
||||
+187
@@ -0,0 +1,187 @@
|
||||
:root {
|
||||
color-scheme: dark;
|
||||
--bg: #000;
|
||||
--bg-1: #0b0b0d;
|
||||
--bg-2: #14141a;
|
||||
--fg: #e6e6ea;
|
||||
--fg-dim: #8a8a92;
|
||||
--border: #23232b;
|
||||
--accent: #4357d9;
|
||||
--ok: #2ecc71;
|
||||
--warn: #f1c40f;
|
||||
--err: #e74c3c;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; }
|
||||
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
height: 100%;
|
||||
background: var(--bg);
|
||||
color: var(--fg);
|
||||
font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
body {
|
||||
display: grid;
|
||||
grid-template-columns: 220px 1fr;
|
||||
}
|
||||
|
||||
a { color: inherit; text-decoration: none; }
|
||||
|
||||
.dim { color: var(--fg-dim); }
|
||||
|
||||
.nav {
|
||||
background: var(--bg-1);
|
||||
border-right: 1px solid var(--border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100vh;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}
|
||||
|
||||
.nav-brand {
|
||||
padding: 20px 16px 12px;
|
||||
font-size: 15px;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.02em;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.nav-items {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
padding: 8px 0;
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.nav-items a {
|
||||
padding: 8px 16px;
|
||||
color: var(--fg-dim);
|
||||
border-left: 2px solid transparent;
|
||||
transition: color 0.1s, background 0.1s, border-color 0.1s;
|
||||
}
|
||||
|
||||
.nav-items a:hover {
|
||||
color: var(--fg);
|
||||
background: var(--bg-2);
|
||||
}
|
||||
|
||||
.nav-items a.active {
|
||||
color: var(--fg);
|
||||
background: var(--bg-2);
|
||||
border-left-color: var(--accent);
|
||||
}
|
||||
|
||||
.nav-footer {
|
||||
padding: 12px 16px;
|
||||
border-top: 1px solid var(--border);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
main {
|
||||
padding: 24px 32px;
|
||||
overflow-y: auto;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
margin: 0 0 20px;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.cards {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||
gap: 12px;
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: var(--bg-1);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.card-label {
|
||||
font-size: 11px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--fg-dim);
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.card-value {
|
||||
font-size: 22px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.card-sub {
|
||||
font-size: 12px;
|
||||
color: var(--fg-dim);
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.status-dot {
|
||||
display: inline-block;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
margin-right: 6px;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.status-dot.ok { background: var(--ok); }
|
||||
.status-dot.warn{ background: var(--warn); }
|
||||
.status-dot.err { background: var(--err); }
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
background: var(--bg-1);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
th, td {
|
||||
text-align: left;
|
||||
padding: 10px 14px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
th {
|
||||
color: var(--fg-dim);
|
||||
font-size: 11px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
background: var(--bg-2);
|
||||
}
|
||||
|
||||
tr:last-child td { border-bottom: none; }
|
||||
|
||||
.placeholder {
|
||||
padding: 40px;
|
||||
text-align: center;
|
||||
color: var(--fg-dim);
|
||||
background: var(--bg-1);
|
||||
border: 1px dashed var(--border);
|
||||
border-radius: 6px;
|
||||
}
|
||||
|
||||
.error {
|
||||
background: var(--bg-1);
|
||||
border: 1px solid var(--err);
|
||||
color: var(--err);
|
||||
padding: 12px 16px;
|
||||
border-radius: 6px;
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||
font-size: 12px;
|
||||
}
|
||||
+161
@@ -0,0 +1,161 @@
|
||||
// ModelGrid UI — vanilla client. Bundled into ts_bundled/bundle.ts for
|
||||
// the single-binary build, or served from disk in dev mode.
|
||||
|
||||
const VIEWS = [
|
||||
'overview',
|
||||
'cluster',
|
||||
'gpus',
|
||||
'deployments',
|
||||
'models',
|
||||
'access',
|
||||
'logs',
|
||||
'metrics',
|
||||
'settings',
|
||||
];
|
||||
|
||||
const view = document.getElementById('view');
|
||||
const nodeIdent = document.getElementById('node-ident');
|
||||
const nodeVersion = document.getElementById('node-version');
|
||||
|
||||
function parseHash() {
|
||||
const raw = location.hash.replace(/^#\/?/, '');
|
||||
const [top = 'overview'] = raw.split('/').filter(Boolean);
|
||||
return VIEWS.includes(top) ? top : 'overview';
|
||||
}
|
||||
|
||||
function setActive(current) {
|
||||
document.querySelectorAll('.nav-items a').forEach((el) => {
|
||||
el.classList.toggle('active', el.dataset.view === current);
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchHealth() {
|
||||
const res = await fetch('/_ui/overview', { headers: { accept: 'application/json' } });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
function statusDot(status) {
|
||||
const ok = status === 'ok';
|
||||
const warn = status === 'degraded';
|
||||
const cls = ok ? 'ok' : warn ? 'warn' : 'err';
|
||||
return `<span class="status-dot ${cls}"></span>`;
|
||||
}
|
||||
|
||||
async function renderOverview() {
|
||||
view.innerHTML = `<h1>Overview</h1><div id="ovstate" class="placeholder">Loading…</div>`;
|
||||
try {
|
||||
const data = await fetchHealth();
|
||||
const health = data.health;
|
||||
const containers = health.containers || 0;
|
||||
const models = health.models || 0;
|
||||
const gpus = health.gpus || 0;
|
||||
const uptime = health.uptime || 0;
|
||||
const detailEntries = Object.entries(health.details?.containers || {});
|
||||
const runningContainers = detailEntries.filter(([, v]) => v === 'healthy').length;
|
||||
|
||||
view.innerHTML = `
|
||||
<h1>Overview</h1>
|
||||
<div class="cards">
|
||||
<div class="card">
|
||||
<div class="card-label">Fleet</div>
|
||||
<div class="card-value">${statusDot(health.status)}${health.status}</div>
|
||||
<div class="card-sub">v${health.version} · up ${formatUptime(uptime)}</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-label">Deployments</div>
|
||||
<div class="card-value">${runningContainers} / ${containers}</div>
|
||||
<div class="card-sub">${containers === 0 ? 'no deployments' : `${runningContainers} healthy`}</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-label">GPUs</div>
|
||||
<div class="card-value">${gpus}</div>
|
||||
<div class="card-sub">${gpus === 0 ? 'no GPU detected' : 'detected'}</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-label">Models</div>
|
||||
<div class="card-value">${models}</div>
|
||||
<div class="card-sub">served via OpenAI API</div>
|
||||
</div>
|
||||
</div>
|
||||
<h1 style="margin-top:24px">Deployments</h1>
|
||||
${renderContainerTable(detailEntries)}
|
||||
`;
|
||||
if (data.node) {
|
||||
nodeIdent.textContent = `${data.node.name} · ${data.node.role}`;
|
||||
nodeVersion.textContent = `v${data.node.version}`;
|
||||
}
|
||||
} catch (err) {
|
||||
view.innerHTML = `<h1>Overview</h1><div class="error">Failed to load: ${escapeHtml(String(err.message || err))}</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
function renderContainerTable(entries) {
|
||||
if (entries.length === 0) {
|
||||
return `<div class="placeholder">No deployments configured. Add one with <code>modelgrid run <model></code>.</div>`;
|
||||
}
|
||||
const rows = entries.map(([id, state]) => `
|
||||
<tr>
|
||||
<td>${escapeHtml(id)}</td>
|
||||
<td>${statusDot(state === 'healthy' ? 'ok' : 'err')}${escapeHtml(state)}</td>
|
||||
</tr>
|
||||
`).join('');
|
||||
return `<table><thead><tr><th>Container</th><th>Health</th></tr></thead><tbody>${rows}</tbody></table>`;
|
||||
}
|
||||
|
||||
function renderPlaceholder(name) {
|
||||
view.innerHTML = `
|
||||
<h1>${name}</h1>
|
||||
<div class="placeholder">
|
||||
This view is part of the UI concept (see <code>readme.ui.md</code>) but is not implemented yet.
|
||||
Use the CLI for now: <code>modelgrid ${cliHint(name)}</code>.
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
function cliHint(view) {
|
||||
const map = {
|
||||
Cluster: 'cluster status',
|
||||
GPUs: 'gpu list',
|
||||
Deployments: 'ps',
|
||||
Models: 'model list',
|
||||
Access: 'config apikey list',
|
||||
Logs: 'service logs',
|
||||
Metrics: 'service status',
|
||||
Settings: 'config show',
|
||||
};
|
||||
return map[view] || '--help';
|
||||
}
|
||||
|
||||
function formatUptime(s) {
|
||||
if (s < 60) return `${s}s`;
|
||||
if (s < 3600) return `${Math.floor(s / 60)}m`;
|
||||
if (s < 86400) return `${Math.floor(s / 3600)}h`;
|
||||
return `${Math.floor(s / 86400)}d`;
|
||||
}
|
||||
|
||||
function escapeHtml(s) {
|
||||
return s.replace(/[&<>"']/g, (c) => ({
|
||||
'&': '&', '<': '<', '>': '>', '"': '"', "'": ''',
|
||||
}[c]));
|
||||
}
|
||||
|
||||
function route() {
|
||||
const current = parseHash();
|
||||
setActive(current);
|
||||
switch (current) {
|
||||
case 'overview': return renderOverview();
|
||||
case 'cluster': return renderPlaceholder('Cluster');
|
||||
case 'gpus': return renderPlaceholder('GPUs');
|
||||
case 'deployments': return renderPlaceholder('Deployments');
|
||||
case 'models': return renderPlaceholder('Models');
|
||||
case 'access': return renderPlaceholder('Access');
|
||||
case 'logs': return renderPlaceholder('Logs');
|
||||
case 'metrics': return renderPlaceholder('Metrics');
|
||||
case 'settings': return renderPlaceholder('Settings');
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('hashchange', route);
|
||||
if (!location.hash) location.hash = '#/overview';
|
||||
route();
|
||||
@@ -0,0 +1,32 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="theme-color" content="#000000">
|
||||
<title>ModelGrid</title>
|
||||
<link rel="stylesheet" href="/app.css">
|
||||
</head>
|
||||
<body>
|
||||
<aside class="nav">
|
||||
<div class="nav-brand">ModelGrid</div>
|
||||
<nav class="nav-items">
|
||||
<a href="#/overview" data-view="overview">Overview</a>
|
||||
<a href="#/cluster" data-view="cluster">Cluster</a>
|
||||
<a href="#/gpus" data-view="gpus">GPUs</a>
|
||||
<a href="#/deployments" data-view="deployments">Deployments</a>
|
||||
<a href="#/models" data-view="models">Models</a>
|
||||
<a href="#/access" data-view="access">Access</a>
|
||||
<a href="#/logs" data-view="logs">Logs</a>
|
||||
<a href="#/metrics" data-view="metrics">Metrics</a>
|
||||
<a href="#/settings" data-view="settings">Settings</a>
|
||||
</nav>
|
||||
<div class="nav-footer">
|
||||
<div id="node-ident">—</div>
|
||||
<div id="node-version" class="dim">—</div>
|
||||
</div>
|
||||
</aside>
|
||||
<main id="view"></main>
|
||||
<script src="/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user