feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Model catalog interfaces for list.modelgrid.com.
|
||||
*/
|
||||
|
||||
export interface IModelCapabilitySet {
|
||||
chat?: boolean;
|
||||
completions?: boolean;
|
||||
embeddings?: boolean;
|
||||
tools?: boolean;
|
||||
}
|
||||
|
||||
export interface IVllmLaunchProfile {
|
||||
replicas?: number;
|
||||
tensorParallelSize?: number;
|
||||
pipelineParallelSize?: number;
|
||||
maxModelLen?: number;
|
||||
gpuMemoryUtilization?: number;
|
||||
quantization?: string;
|
||||
dtype?: string;
|
||||
generationConfig?: 'auto' | 'vllm';
|
||||
extraArgs?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface IModelCatalogEntry {
|
||||
id: string;
|
||||
aliases?: string[];
|
||||
engine: 'vllm';
|
||||
source: {
|
||||
repo: string;
|
||||
revision?: string;
|
||||
tokenizer?: string;
|
||||
license?: string;
|
||||
homepage?: string;
|
||||
};
|
||||
capabilities: IModelCapabilitySet;
|
||||
requirements: {
|
||||
minVramGb: number;
|
||||
recommendedVramGb?: number;
|
||||
minGpuCount?: number;
|
||||
};
|
||||
launchDefaults?: IVllmLaunchProfile;
|
||||
metadata?: {
|
||||
family?: string;
|
||||
parameterCount?: string;
|
||||
contextWindow?: number;
|
||||
summary?: string;
|
||||
tags?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface IModelCatalog {
|
||||
version: string;
|
||||
generatedAt: string;
|
||||
models: IModelCatalogEntry[];
|
||||
}
|
||||
Reference in New Issue
Block a user