feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+158 -205
View File
@@ -1,252 +1,205 @@
/**
* Model Registry
*
* Manages the greenlit model list and model availability.
* Model registry backed by list.modelgrid.com.
*/
import type { IGreenlitModel, IGreenlitModelsList } from '../interfaces/config.ts';
import type { TContainerType } from '../interfaces/container.ts';
import * as fs from 'node:fs/promises';
import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts';
import { MODEL_REGISTRY, TIMING } from '../constants.ts';
import { logger } from '../logger.ts';
/**
* Model registry for managing greenlit models
*/
export class ModelRegistry {
private greenlistUrl: string;
private cachedGreenlist: IGreenlitModelsList | null = null;
private catalogUrl: string;
private cachedCatalog: IModelCatalog | null = null;
private cacheTime: number = 0;
constructor(greenlistUrl: string = MODEL_REGISTRY.DEFAULT_GREENLIST_URL) {
this.greenlistUrl = greenlistUrl;
constructor(catalogUrl: string = MODEL_REGISTRY.DEFAULT_CATALOG_URL) {
this.catalogUrl = catalogUrl;
}
/**
* Set the greenlist URL
*/
public setGreenlistUrl(url: string): void {
this.greenlistUrl = url;
this.cachedGreenlist = null;
public setCatalogUrl(url: string): void {
this.catalogUrl = url;
this.cachedCatalog = null;
this.cacheTime = 0;
}
/**
* Fetch the greenlit model list from remote URL
*/
public async fetchGreenlist(forceRefresh: boolean = false): Promise<IGreenlitModelsList> {
// Return cached data if still valid
public async fetchCatalog(forceRefresh: boolean = false): Promise<IModelCatalog> {
if (
!forceRefresh &&
this.cachedGreenlist &&
this.cachedCatalog &&
Date.now() - this.cacheTime < TIMING.GREENLIST_CACHE_DURATION_MS
) {
return this.cachedGreenlist;
return this.cachedCatalog;
}
try {
logger.dim(`Fetching greenlit models from: ${this.greenlistUrl}`);
logger.dim(`Fetching model catalog from: ${this.catalogUrl}`);
const catalog = await this.readCatalogSource(this.catalogUrl);
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30000);
if (!Array.isArray(catalog.models)) {
throw new Error('Invalid catalog format: missing models array');
}
const response = await fetch(this.greenlistUrl, {
this.cachedCatalog = catalog;
this.cacheTime = Date.now();
logger.dim(`Loaded ${catalog.models.length} catalog models`);
return catalog;
} catch (error) {
logger.warn(
`Failed to fetch model catalog: ${error instanceof Error ? error.message : String(error)}`,
);
if (!this.cachedCatalog) {
logger.dim('Using fallback catalog');
return this.getFallbackCatalog();
}
return this.cachedCatalog;
}
}
public async isModelListed(modelName: string): Promise<boolean> {
return (await this.getModel(modelName)) !== null;
}
public async getModel(modelName: string): Promise<IModelCatalogEntry | null> {
const catalog = await this.fetchCatalog();
const normalized = this.normalizeModelName(modelName);
return catalog.models.find((model) => {
const candidates = [model.id, ...(model.aliases || [])];
return candidates.some((candidate) => this.normalizeModelName(candidate) === normalized);
}) || null;
}
public async getAllModels(): Promise<IModelCatalogEntry[]> {
const catalog = await this.fetchCatalog();
return catalog.models;
}
public async getModelsByEngine(engine: 'vllm'): Promise<IModelCatalogEntry[]> {
const catalog = await this.fetchCatalog();
return catalog.models.filter((model) => model.engine === engine);
}
public async getModelsWithinVram(maxVramGb: number): Promise<IModelCatalogEntry[]> {
const catalog = await this.fetchCatalog();
return catalog.models.filter((model) => model.requirements.minVramGb <= maxVramGb);
}
public async getRecommendedEngine(modelName: string): Promise<'vllm' | null> {
const model = await this.getModel(modelName);
return model ? model.engine : null;
}
public async getMinVram(modelName: string): Promise<number | null> {
const model = await this.getModel(modelName);
return model ? model.requirements.minVramGb : null;
}
public async modelFitsInVram(modelName: string, availableVramGb: number): Promise<boolean> {
const minVram = await this.getMinVram(modelName);
if (minVram === null) {
return false;
}
return availableVramGb >= minVram;
}
public async searchModels(pattern: string): Promise<IModelCatalogEntry[]> {
const catalog = await this.fetchCatalog();
const normalizedPattern = pattern.toLowerCase();
return catalog.models.filter((model) =>
model.id.toLowerCase().includes(normalizedPattern) ||
model.aliases?.some((alias) => alias.toLowerCase().includes(normalizedPattern)) ||
model.metadata?.summary?.toLowerCase().includes(normalizedPattern) ||
model.metadata?.tags?.some((tag) => tag.toLowerCase().includes(normalizedPattern))
);
}
public async getModelsByTags(tags: string[]): Promise<IModelCatalogEntry[]> {
const catalog = await this.fetchCatalog();
const normalizedTags = tags.map((tag) => tag.toLowerCase());
return catalog.models.filter((model) =>
model.metadata?.tags?.some((tag) => normalizedTags.includes(tag.toLowerCase()))
);
}
public clearCache(): void {
this.cachedCatalog = null;
this.cacheTime = 0;
}
public async printSummary(): Promise<void> {
const catalog = await this.fetchCatalog();
logger.logBoxTitle('Model Catalog', 70, 'info');
logger.logBoxLine(`Version: ${catalog.version}`);
logger.logBoxLine(`Generated: ${catalog.generatedAt}`);
logger.logBoxLine(`Total Models: ${catalog.models.length}`);
logger.logBoxLine('');
for (const model of catalog.models.slice(0, 10)) {
logger.logBoxLine(
`- ${model.id} (${model.requirements.minVramGb}GB, ${model.engine})`,
);
}
if (catalog.models.length > 10) {
logger.logBoxLine(`... and ${catalog.models.length - 10} more`);
}
logger.logBoxEnd();
}
private async readCatalogSource(source: string): Promise<IModelCatalog> {
if (source.startsWith('file://')) {
const filePath = new URL(source);
const content = await fs.readFile(filePath, 'utf-8');
return JSON.parse(content) as IModelCatalog;
}
if (source.startsWith('/')) {
const content = await fs.readFile(source, 'utf-8');
return JSON.parse(content) as IModelCatalog;
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30000);
try {
const response = await fetch(source, {
signal: controller.signal,
headers: {
'Accept': 'application/json',
Accept: 'application/json',
'User-Agent': 'ModelGrid/1.0',
},
});
clearTimeout(timeout);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const greenlist = await response.json() as IGreenlitModelsList;
// Validate structure
if (!greenlist.models || !Array.isArray(greenlist.models)) {
throw new Error('Invalid greenlist format: missing models array');
}
// Cache the result
this.cachedGreenlist = greenlist;
this.cacheTime = Date.now();
logger.dim(`Loaded ${greenlist.models.length} greenlit models`);
return greenlist;
} catch (error) {
logger.warn(`Failed to fetch greenlist: ${error instanceof Error ? error.message : String(error)}`);
// Return fallback if we have no cache
if (!this.cachedGreenlist) {
logger.dim('Using fallback greenlist');
return this.getFallbackGreenlist();
}
// Return stale cache
return this.cachedGreenlist;
return await response.json() as IModelCatalog;
} finally {
clearTimeout(timeout);
}
}
/**
* Get fallback greenlist
*/
private getFallbackGreenlist(): IGreenlitModelsList {
private getFallbackCatalog(): IModelCatalog {
return {
version: '1.0',
lastUpdated: new Date().toISOString(),
models: MODEL_REGISTRY.FALLBACK_GREENLIST as unknown as IGreenlitModel[],
generatedAt: new Date().toISOString(),
models: MODEL_REGISTRY.FALLBACK_CATALOG as unknown as IModelCatalogEntry[],
};
}
/**
* Check if a model is greenlit
*/
public async isModelGreenlit(modelName: string): Promise<boolean> {
const greenlist = await this.fetchGreenlist();
return greenlist.models.some((m) => this.normalizeModelName(m.name) === this.normalizeModelName(modelName));
}
/**
* Get greenlit model info
*/
public async getGreenlitModel(modelName: string): Promise<IGreenlitModel | null> {
const greenlist = await this.fetchGreenlist();
const normalized = this.normalizeModelName(modelName);
return greenlist.models.find((m) => this.normalizeModelName(m.name) === normalized) || null;
}
/**
* Get all greenlit models
*/
public async getAllGreenlitModels(): Promise<IGreenlitModel[]> {
const greenlist = await this.fetchGreenlist();
return greenlist.models;
}
/**
* Get greenlit models by container type
*/
public async getModelsByContainer(containerType: TContainerType): Promise<IGreenlitModel[]> {
const greenlist = await this.fetchGreenlist();
return greenlist.models.filter((m) => m.container === containerType);
}
/**
* Get greenlit models that fit within VRAM limit
*/
public async getModelsWithinVram(maxVramGb: number): Promise<IGreenlitModel[]> {
const greenlist = await this.fetchGreenlist();
return greenlist.models.filter((m) => m.minVram <= maxVramGb);
}
/**
* Get recommended container type for a model
*/
public async getRecommendedContainer(modelName: string): Promise<TContainerType | null> {
const model = await this.getGreenlitModel(modelName);
return model ? model.container : null;
}
/**
* Get minimum VRAM required for a model
*/
public async getMinVram(modelName: string): Promise<number | null> {
const model = await this.getGreenlitModel(modelName);
return model ? model.minVram : null;
}
/**
* Check if model fits in available VRAM
*/
public async modelFitsInVram(modelName: string, availableVramGb: number): Promise<boolean> {
const minVram = await this.getMinVram(modelName);
if (minVram === null) {
// Model not in greenlist, assume it might fit
return true;
}
return availableVramGb >= minVram;
}
/**
* Normalize model name for comparison
* Handles variations like "llama3:8b" vs "llama3:8B" vs "meta-llama/llama-3-8b"
*/
private normalizeModelName(name: string): string {
return name
.toLowerCase()
.replace(/[^a-z0-9:.-]/g, '')
.replace(/[^a-z0-9:/._-]/g, '')
.trim();
}
/**
* Search models by name pattern
*/
public async searchModels(pattern: string): Promise<IGreenlitModel[]> {
const greenlist = await this.fetchGreenlist();
const normalizedPattern = pattern.toLowerCase();
return greenlist.models.filter((m) =>
m.name.toLowerCase().includes(normalizedPattern) ||
m.description?.toLowerCase().includes(normalizedPattern) ||
m.tags?.some((t) => t.toLowerCase().includes(normalizedPattern))
);
}
/**
* Get models by tags
*/
public async getModelsByTags(tags: string[]): Promise<IGreenlitModel[]> {
const greenlist = await this.fetchGreenlist();
const normalizedTags = tags.map((t) => t.toLowerCase());
return greenlist.models.filter((m) =>
m.tags?.some((t) => normalizedTags.includes(t.toLowerCase()))
);
}
/**
* Clear the cached greenlist
*/
public clearCache(): void {
this.cachedGreenlist = null;
this.cacheTime = 0;
}
/**
* Print greenlist summary
*/
public async printSummary(): Promise<void> {
const greenlist = await this.fetchGreenlist();
// Group by container type
const byContainer = new Map<string, IGreenlitModel[]>();
for (const model of greenlist.models) {
if (!byContainer.has(model.container)) {
byContainer.set(model.container, []);
}
byContainer.get(model.container)!.push(model);
}
logger.logBoxTitle('Greenlit Models', 60, 'info');
logger.logBoxLine(`Version: ${greenlist.version}`);
logger.logBoxLine(`Last Updated: ${greenlist.lastUpdated}`);
logger.logBoxLine(`Total Models: ${greenlist.models.length}`);
logger.logBoxLine('');
for (const [container, models] of byContainer) {
logger.logBoxLine(`${container.toUpperCase()} (${models.length}):`);
for (const model of models.slice(0, 5)) {
logger.logBoxLine(` - ${model.name} (${model.minVram}GB VRAM)`);
}
if (models.length > 5) {
logger.logBoxLine(` ... and ${models.length - 5} more`);
}
logger.logBoxLine('');
}
logger.logBoxEnd();
}
}