feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+107 -4
View File
@@ -14,10 +14,13 @@ import { SystemInfo } from './hardware/system-info.ts';
import { DriverManager } from './drivers/driver-manager.ts';
import { DockerManager } from './docker/docker-manager.ts';
import { ContainerManager } from './containers/container-manager.ts';
import { ClusterCoordinator } from './cluster/coordinator.ts';
import { ClusterManager } from './cluster/cluster-manager.ts';
import { ModelRegistry } from './models/registry.ts';
import { ModelLoader } from './models/loader.ts';
import { GpuHandler } from './cli/gpu-handler.ts';
import { ContainerHandler } from './cli/container-handler.ts';
import { ClusterHandler } from './cli/cluster-handler.ts';
import { ModelHandler } from './cli/model-handler.ts';
import { ConfigHandler } from './cli/config-handler.ts';
import { ServiceHandler } from './cli/service-handler.ts';
@@ -35,12 +38,15 @@ export class ModelGrid {
private driverManager: DriverManager;
private dockerManager: DockerManager;
private containerManager: ContainerManager;
private clusterManager: ClusterManager;
private clusterCoordinator?: ClusterCoordinator;
private modelRegistry: ModelRegistry;
private modelLoader?: ModelLoader;
// CLI Handlers
private gpuHandler: GpuHandler;
private containerHandler: ContainerHandler;
private clusterHandler: ClusterHandler;
private modelHandler: ModelHandler;
private configHandler: ConfigHandler;
private serviceHandler: ServiceHandler;
@@ -52,6 +58,7 @@ export class ModelGrid {
this.driverManager = new DriverManager();
this.dockerManager = new DockerManager();
this.containerManager = new ContainerManager();
this.clusterManager = new ClusterManager();
this.modelRegistry = new ModelRegistry();
this.systemd = new Systemd();
this.daemon = new Daemon(this);
@@ -59,7 +66,12 @@ export class ModelGrid {
// Initialize CLI handlers
this.gpuHandler = new GpuHandler();
this.containerHandler = new ContainerHandler(this.containerManager);
this.modelHandler = new ModelHandler(this.containerManager, this.modelRegistry);
this.clusterHandler = new ClusterHandler();
this.modelHandler = new ModelHandler(
this.containerManager,
this.getClusterCoordinator(),
this.modelRegistry,
);
this.configHandler = new ConfigHandler();
this.serviceHandler = new ServiceHandler(this);
}
@@ -70,7 +82,14 @@ export class ModelGrid {
public async loadConfig(): Promise<void> {
try {
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
this.config = JSON.parse(configContent) as IModelGridConfig;
this.config = this.normalizeConfig(
JSON.parse(configContent) as Partial<IModelGridConfig> & {
models?: {
greenlistUrl?: string;
autoPull?: boolean;
} & Partial<IModelGridConfig['models']>;
},
);
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
@@ -163,6 +182,23 @@ export class ModelGrid {
return this.containerManager;
}
public getClusterManager(): ClusterManager {
return this.clusterManager;
}
public getClusterCoordinator(): ClusterCoordinator {
if (!this.clusterCoordinator) {
this.clusterCoordinator = new ClusterCoordinator(
this.clusterManager,
this.containerManager,
this.modelRegistry,
this.getModelLoader(),
);
}
return this.clusterCoordinator;
}
/**
* Get Model Registry instance
*/
@@ -203,6 +239,10 @@ export class ModelGrid {
return this.modelHandler;
}
public getClusterHandler(): ClusterHandler {
return this.clusterHandler;
}
/**
* Get Config Handler
*/
@@ -234,18 +274,81 @@ export class ModelGrid {
}
// Initialize model registry
this.modelRegistry.setGreenlistUrl(this.config.models.greenlistUrl);
this.modelRegistry.setCatalogUrl(this.config.models.registryUrl);
this.clusterManager.configure(this.config.cluster);
await this.clusterManager.initialize();
// Create model loader
this.modelLoader = new ModelLoader(
this.modelRegistry,
this.containerManager,
this.config.models.autoPull,
this.config.models.autoDeploy,
);
this.clusterCoordinator = new ClusterCoordinator(
this.clusterManager,
this.containerManager,
this.modelRegistry,
this.modelLoader,
);
logger.success('ModelGrid initialized');
}
private normalizeConfig(
config: Partial<IModelGridConfig> & {
models?: {
greenlistUrl?: string;
autoPull?: boolean;
} & Partial<IModelGridConfig['models']>;
},
): IModelGridConfig {
const filteredContainers = (config.containers || []).filter(
(container) => (container as { type?: string }).type !== 'ollama',
);
return {
version: config.version || VERSION,
api: {
port: config.api?.port || 8080,
host: config.api?.host || '0.0.0.0',
apiKeys: config.api?.apiKeys || [],
rateLimit: config.api?.rateLimit,
cors: config.api?.cors ?? true,
corsOrigins: config.api?.corsOrigins || ['*'],
},
docker: {
networkName: config.docker?.networkName || 'modelgrid',
runtime: config.docker?.runtime || 'docker',
socketPath: config.docker?.socketPath,
},
gpus: {
autoDetect: config.gpus?.autoDetect ?? true,
assignments: config.gpus?.assignments || {},
},
containers: filteredContainers,
models: {
registryUrl: config.models?.registryUrl || config.models?.greenlistUrl ||
'https://list.modelgrid.com/catalog/models.json',
autoDeploy: config.models?.autoDeploy ?? config.models?.autoPull ?? true,
defaultEngine: 'vllm',
autoLoad: config.models?.autoLoad || [],
},
cluster: {
enabled: config.cluster?.enabled ?? false,
nodeName: config.cluster?.nodeName || 'modelgrid-local',
role: config.cluster?.role || 'standalone',
bindHost: config.cluster?.bindHost || '0.0.0.0',
gossipPort: config.cluster?.gossipPort || 7946,
sharedSecret: config.cluster?.sharedSecret,
advertiseUrl: config.cluster?.advertiseUrl,
controlPlaneUrl: config.cluster?.controlPlaneUrl,
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
seedNodes: config.cluster?.seedNodes || [],
},
checkInterval: config.checkInterval || 30000,
};
}
/**
* Shutdown the ModelGrid system
*/