feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+107
-4
@@ -14,10 +14,13 @@ import { SystemInfo } from './hardware/system-info.ts';
|
||||
import { DriverManager } from './drivers/driver-manager.ts';
|
||||
import { DockerManager } from './docker/docker-manager.ts';
|
||||
import { ContainerManager } from './containers/container-manager.ts';
|
||||
import { ClusterCoordinator } from './cluster/coordinator.ts';
|
||||
import { ClusterManager } from './cluster/cluster-manager.ts';
|
||||
import { ModelRegistry } from './models/registry.ts';
|
||||
import { ModelLoader } from './models/loader.ts';
|
||||
import { GpuHandler } from './cli/gpu-handler.ts';
|
||||
import { ContainerHandler } from './cli/container-handler.ts';
|
||||
import { ClusterHandler } from './cli/cluster-handler.ts';
|
||||
import { ModelHandler } from './cli/model-handler.ts';
|
||||
import { ConfigHandler } from './cli/config-handler.ts';
|
||||
import { ServiceHandler } from './cli/service-handler.ts';
|
||||
@@ -35,12 +38,15 @@ export class ModelGrid {
|
||||
private driverManager: DriverManager;
|
||||
private dockerManager: DockerManager;
|
||||
private containerManager: ContainerManager;
|
||||
private clusterManager: ClusterManager;
|
||||
private clusterCoordinator?: ClusterCoordinator;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader?: ModelLoader;
|
||||
|
||||
// CLI Handlers
|
||||
private gpuHandler: GpuHandler;
|
||||
private containerHandler: ContainerHandler;
|
||||
private clusterHandler: ClusterHandler;
|
||||
private modelHandler: ModelHandler;
|
||||
private configHandler: ConfigHandler;
|
||||
private serviceHandler: ServiceHandler;
|
||||
@@ -52,6 +58,7 @@ export class ModelGrid {
|
||||
this.driverManager = new DriverManager();
|
||||
this.dockerManager = new DockerManager();
|
||||
this.containerManager = new ContainerManager();
|
||||
this.clusterManager = new ClusterManager();
|
||||
this.modelRegistry = new ModelRegistry();
|
||||
this.systemd = new Systemd();
|
||||
this.daemon = new Daemon(this);
|
||||
@@ -59,7 +66,12 @@ export class ModelGrid {
|
||||
// Initialize CLI handlers
|
||||
this.gpuHandler = new GpuHandler();
|
||||
this.containerHandler = new ContainerHandler(this.containerManager);
|
||||
this.modelHandler = new ModelHandler(this.containerManager, this.modelRegistry);
|
||||
this.clusterHandler = new ClusterHandler();
|
||||
this.modelHandler = new ModelHandler(
|
||||
this.containerManager,
|
||||
this.getClusterCoordinator(),
|
||||
this.modelRegistry,
|
||||
);
|
||||
this.configHandler = new ConfigHandler();
|
||||
this.serviceHandler = new ServiceHandler(this);
|
||||
}
|
||||
@@ -70,7 +82,14 @@ export class ModelGrid {
|
||||
public async loadConfig(): Promise<void> {
|
||||
try {
|
||||
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
||||
this.config = JSON.parse(configContent) as IModelGridConfig;
|
||||
this.config = this.normalizeConfig(
|
||||
JSON.parse(configContent) as Partial<IModelGridConfig> & {
|
||||
models?: {
|
||||
greenlistUrl?: string;
|
||||
autoPull?: boolean;
|
||||
} & Partial<IModelGridConfig['models']>;
|
||||
},
|
||||
);
|
||||
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
@@ -163,6 +182,23 @@ export class ModelGrid {
|
||||
return this.containerManager;
|
||||
}
|
||||
|
||||
public getClusterManager(): ClusterManager {
|
||||
return this.clusterManager;
|
||||
}
|
||||
|
||||
public getClusterCoordinator(): ClusterCoordinator {
|
||||
if (!this.clusterCoordinator) {
|
||||
this.clusterCoordinator = new ClusterCoordinator(
|
||||
this.clusterManager,
|
||||
this.containerManager,
|
||||
this.modelRegistry,
|
||||
this.getModelLoader(),
|
||||
);
|
||||
}
|
||||
|
||||
return this.clusterCoordinator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Model Registry instance
|
||||
*/
|
||||
@@ -203,6 +239,10 @@ export class ModelGrid {
|
||||
return this.modelHandler;
|
||||
}
|
||||
|
||||
public getClusterHandler(): ClusterHandler {
|
||||
return this.clusterHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Config Handler
|
||||
*/
|
||||
@@ -234,18 +274,81 @@ export class ModelGrid {
|
||||
}
|
||||
|
||||
// Initialize model registry
|
||||
this.modelRegistry.setGreenlistUrl(this.config.models.greenlistUrl);
|
||||
this.modelRegistry.setCatalogUrl(this.config.models.registryUrl);
|
||||
this.clusterManager.configure(this.config.cluster);
|
||||
await this.clusterManager.initialize();
|
||||
|
||||
// Create model loader
|
||||
this.modelLoader = new ModelLoader(
|
||||
this.modelRegistry,
|
||||
this.containerManager,
|
||||
this.config.models.autoPull,
|
||||
this.config.models.autoDeploy,
|
||||
);
|
||||
this.clusterCoordinator = new ClusterCoordinator(
|
||||
this.clusterManager,
|
||||
this.containerManager,
|
||||
this.modelRegistry,
|
||||
this.modelLoader,
|
||||
);
|
||||
|
||||
logger.success('ModelGrid initialized');
|
||||
}
|
||||
|
||||
private normalizeConfig(
|
||||
config: Partial<IModelGridConfig> & {
|
||||
models?: {
|
||||
greenlistUrl?: string;
|
||||
autoPull?: boolean;
|
||||
} & Partial<IModelGridConfig['models']>;
|
||||
},
|
||||
): IModelGridConfig {
|
||||
const filteredContainers = (config.containers || []).filter(
|
||||
(container) => (container as { type?: string }).type !== 'ollama',
|
||||
);
|
||||
|
||||
return {
|
||||
version: config.version || VERSION,
|
||||
api: {
|
||||
port: config.api?.port || 8080,
|
||||
host: config.api?.host || '0.0.0.0',
|
||||
apiKeys: config.api?.apiKeys || [],
|
||||
rateLimit: config.api?.rateLimit,
|
||||
cors: config.api?.cors ?? true,
|
||||
corsOrigins: config.api?.corsOrigins || ['*'],
|
||||
},
|
||||
docker: {
|
||||
networkName: config.docker?.networkName || 'modelgrid',
|
||||
runtime: config.docker?.runtime || 'docker',
|
||||
socketPath: config.docker?.socketPath,
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: config.gpus?.autoDetect ?? true,
|
||||
assignments: config.gpus?.assignments || {},
|
||||
},
|
||||
containers: filteredContainers,
|
||||
models: {
|
||||
registryUrl: config.models?.registryUrl || config.models?.greenlistUrl ||
|
||||
'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models?.autoDeploy ?? config.models?.autoPull ?? true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: config.models?.autoLoad || [],
|
||||
},
|
||||
cluster: {
|
||||
enabled: config.cluster?.enabled ?? false,
|
||||
nodeName: config.cluster?.nodeName || 'modelgrid-local',
|
||||
role: config.cluster?.role || 'standalone',
|
||||
bindHost: config.cluster?.bindHost || '0.0.0.0',
|
||||
gossipPort: config.cluster?.gossipPort || 7946,
|
||||
sharedSecret: config.cluster?.sharedSecret,
|
||||
advertiseUrl: config.cluster?.advertiseUrl,
|
||||
controlPlaneUrl: config.cluster?.controlPlaneUrl,
|
||||
heartbeatIntervalMs: config.cluster?.heartbeatIntervalMs || 5000,
|
||||
seedNodes: config.cluster?.seedNodes || [],
|
||||
},
|
||||
checkInterval: config.checkInterval || 30000,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the ModelGrid system
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user