feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+62
-12
@@ -25,6 +25,26 @@ export class ConfigHandler {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
const modelConfig = {
|
||||
registryUrl: config.models.registryUrl ||
|
||||
(config.models as { greenlistUrl?: string }).greenlistUrl ||
|
||||
'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models.autoDeploy ??
|
||||
(config.models as { autoPull?: boolean }).autoPull ?? true,
|
||||
defaultEngine: config.models.defaultEngine || 'vllm',
|
||||
autoLoad: config.models.autoLoad || [],
|
||||
};
|
||||
const clusterConfig = config.cluster || {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
sharedSecret: undefined,
|
||||
advertiseUrl: undefined,
|
||||
controlPlaneUrl: undefined,
|
||||
heartbeatIntervalMs: 5000,
|
||||
};
|
||||
|
||||
// Overview
|
||||
logger.logBox(
|
||||
@@ -48,9 +68,7 @@ export class ConfigHandler {
|
||||
`Host: ${theme.info(config.api.host)}`,
|
||||
`Port: ${theme.highlight(String(config.api.port))}`,
|
||||
`API Keys: ${config.api.apiKeys.length} configured`,
|
||||
...(config.api.rateLimit
|
||||
? [`Rate Limit: ${config.api.rateLimit} req/min`]
|
||||
: []),
|
||||
...(config.api.rateLimit ? [`Rate Limit: ${config.api.rateLimit} req/min`] : []),
|
||||
'',
|
||||
theme.dim('Endpoint:'),
|
||||
` http://${config.api.host}:${config.api.port}/v1/chat/completions`,
|
||||
@@ -88,12 +106,33 @@ export class ConfigHandler {
|
||||
logger.logBox(
|
||||
'Models',
|
||||
[
|
||||
`Auto Pull: ${config.models.autoPull ? theme.success('Enabled') : theme.dim('Disabled')}`,
|
||||
`Default Container: ${config.models.defaultContainer}`,
|
||||
`Auto Load: ${config.models.autoLoad.length} model(s)`,
|
||||
`Auto Deploy: ${
|
||||
modelConfig.autoDeploy ? theme.success('Enabled') : theme.dim('Disabled')
|
||||
}`,
|
||||
`Default Engine: ${modelConfig.defaultEngine}`,
|
||||
`Auto Load: ${modelConfig.autoLoad.length} model(s)`,
|
||||
'',
|
||||
theme.dim('Greenlist URL:'),
|
||||
` ${config.models.greenlistUrl}`,
|
||||
theme.dim('Registry URL:'),
|
||||
` ${modelConfig.registryUrl}`,
|
||||
],
|
||||
70,
|
||||
'default',
|
||||
);
|
||||
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'Cluster',
|
||||
[
|
||||
`Enabled: ${clusterConfig.enabled ? theme.success('Yes') : theme.dim('No')}`,
|
||||
`Node: ${clusterConfig.nodeName}`,
|
||||
`Role: ${clusterConfig.role}`,
|
||||
`Bind Host: ${clusterConfig.bindHost}:${clusterConfig.gossipPort}`,
|
||||
`Shared Secret: ${
|
||||
clusterConfig.sharedSecret ? theme.success('Configured') : theme.dim('Not set')
|
||||
}`,
|
||||
`Advertise URL: ${clusterConfig.advertiseUrl || theme.dim('Default loopback')}`,
|
||||
`Control Plane: ${clusterConfig.controlPlaneUrl || theme.dim('Not configured')}`,
|
||||
`Heartbeat: ${clusterConfig.heartbeatIntervalMs}ms`,
|
||||
],
|
||||
70,
|
||||
'default',
|
||||
@@ -110,7 +149,7 @@ export class ConfigHandler {
|
||||
name: c.name,
|
||||
type: c.type,
|
||||
image: c.image.length > 40 ? c.image.substring(0, 37) + '...' : c.image,
|
||||
port: c.port,
|
||||
port: String(c.port),
|
||||
gpus: c.gpuIds.length > 0 ? c.gpuIds.join(',') : theme.dim('None'),
|
||||
}));
|
||||
|
||||
@@ -189,11 +228,22 @@ export class ConfigHandler {
|
||||
},
|
||||
containers: [],
|
||||
models: {
|
||||
greenlistUrl: 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
|
||||
autoPull: true,
|
||||
defaultContainer: 'ollama',
|
||||
registryUrl: 'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: [],
|
||||
},
|
||||
cluster: {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
sharedSecret: '',
|
||||
advertiseUrl: 'http://127.0.0.1:8080',
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
},
|
||||
checkInterval: 30000,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user