701 lines
19 KiB
TypeScript
701 lines
19 KiB
TypeScript
import { assert, assertEquals, assertExists } from 'jsr:@std/assert@^1.0.0';
|
|
import { shortId } from '../ts/helpers/shortid.ts';
|
|
import { ClusterManager } from '../ts/cluster/cluster-manager.ts';
|
|
import { buildGpuTopologyGroups } from '../ts/cluster/placement.ts';
|
|
|
|
// =============================================================================
|
|
// UNIT TESTS - ModelGrid Core Components
|
|
// =============================================================================
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// shortId() Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('shortId: generates 6-character string', () => {
|
|
const id = shortId();
|
|
assertEquals(id.length, 6);
|
|
});
|
|
|
|
Deno.test('shortId: contains only alphanumeric characters', () => {
|
|
const id = shortId();
|
|
const alphanumericRegex = /^[a-zA-Z0-9]+$/;
|
|
assert(alphanumericRegex.test(id), `ID "${id}" contains non-alphanumeric characters`);
|
|
});
|
|
|
|
Deno.test('shortId: generates unique IDs', () => {
|
|
const ids = new Set<string>();
|
|
const count = 100;
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
ids.add(shortId());
|
|
}
|
|
|
|
// All IDs should be unique (statistically extremely likely for 100 IDs)
|
|
assertEquals(ids.size, count, 'Generated IDs should be unique');
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Interface Type Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('IModelGridConfig: valid config structure', () => {
|
|
const config = {
|
|
version: '1.0',
|
|
api: {
|
|
port: 8080,
|
|
host: '0.0.0.0',
|
|
apiKeys: ['test-key'],
|
|
},
|
|
docker: {
|
|
networkName: 'modelgrid',
|
|
runtime: 'docker' as const,
|
|
},
|
|
gpus: {
|
|
autoDetect: true,
|
|
assignments: {},
|
|
},
|
|
containers: [],
|
|
models: {
|
|
registryUrl: 'https://example.com/models.json',
|
|
autoDeploy: true,
|
|
defaultEngine: 'vllm' as const,
|
|
autoLoad: [],
|
|
},
|
|
cluster: {
|
|
enabled: false,
|
|
nodeName: 'modelgrid-local',
|
|
role: 'standalone' as const,
|
|
bindHost: '0.0.0.0',
|
|
gossipPort: 7946,
|
|
sharedSecret: '',
|
|
advertiseUrl: 'http://127.0.0.1:8080',
|
|
heartbeatIntervalMs: 5000,
|
|
seedNodes: [],
|
|
},
|
|
checkInterval: 30000,
|
|
};
|
|
|
|
assertExists(config.version);
|
|
assertExists(config.api);
|
|
assertExists(config.docker);
|
|
assertExists(config.gpus);
|
|
assertExists(config.containers);
|
|
assertExists(config.models);
|
|
assertEquals(config.api.port, 8080);
|
|
assertEquals(config.docker.runtime, 'docker');
|
|
});
|
|
|
|
Deno.test('IGpuInfo: valid GPU info structure', () => {
|
|
const gpu = {
|
|
id: 'gpu-0',
|
|
vendor: 'nvidia' as const,
|
|
model: 'RTX 4090',
|
|
vram: 24576,
|
|
driverVersion: '535.154.05',
|
|
cudaVersion: '12.2',
|
|
pciSlot: '0000:01:00.0',
|
|
};
|
|
|
|
assertExists(gpu.id);
|
|
assertExists(gpu.vendor);
|
|
assertExists(gpu.model);
|
|
assert(gpu.vram > 0, 'VRAM should be positive');
|
|
assert(['nvidia', 'amd', 'intel'].includes(gpu.vendor), 'Vendor should be valid');
|
|
});
|
|
|
|
Deno.test('IContainerConfig: valid container config structure', () => {
|
|
const container = {
|
|
id: 'vllm-1',
|
|
type: 'vllm' as const,
|
|
name: 'vLLM Deployment',
|
|
image: 'vllm/vllm-openai:latest',
|
|
gpuIds: ['gpu-0'],
|
|
port: 8000,
|
|
models: ['meta-llama/Llama-3.1-8B-Instruct'],
|
|
};
|
|
|
|
assertExists(container.id);
|
|
assertExists(container.type);
|
|
assertExists(container.name);
|
|
assertExists(container.image);
|
|
assert(container.gpuIds.length > 0, 'Should have at least one GPU');
|
|
assert(container.port > 0, 'Port should be positive');
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Model Catalog Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('Catalog model validation: valid model passes', () => {
|
|
const catalog = {
|
|
version: '1.0',
|
|
models: [
|
|
{ id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', requirements: { minVramGb: 18 } },
|
|
{ id: 'Qwen/Qwen2.5-7B-Instruct', engine: 'vllm', requirements: { minVramGb: 16 } },
|
|
],
|
|
};
|
|
|
|
const requestedModel = 'meta-llama/Llama-3.1-8B-Instruct';
|
|
const availableVram = 24; // GB
|
|
|
|
const model = catalog.models.find((m) => m.id === requestedModel);
|
|
assertExists(model, 'Model should be in catalog');
|
|
assert(availableVram >= model.requirements.minVramGb, 'Should have enough VRAM');
|
|
});
|
|
|
|
Deno.test('Catalog model validation: insufficient VRAM fails', () => {
|
|
const catalog = {
|
|
version: '1.0',
|
|
models: [
|
|
{ id: 'meta-llama/Llama-3.1-70B-Instruct', engine: 'vllm', requirements: { minVramGb: 48 } },
|
|
],
|
|
};
|
|
|
|
const requestedModel = 'meta-llama/Llama-3.1-70B-Instruct';
|
|
const availableVram = 24; // GB
|
|
|
|
const model = catalog.models.find((m) => m.id === requestedModel);
|
|
assertExists(model, 'Model should be in catalog');
|
|
assert(availableVram < model.requirements.minVramGb, 'Should NOT have enough VRAM');
|
|
});
|
|
|
|
Deno.test('Catalog model validation: unlisted model rejected', () => {
|
|
const catalog = {
|
|
version: '1.0',
|
|
models: [
|
|
{ id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', requirements: { minVramGb: 18 } },
|
|
],
|
|
};
|
|
|
|
const requestedModel = 'some-random-model:latest';
|
|
const model = catalog.models.find((m) => m.id === requestedModel);
|
|
assertEquals(model, undefined, 'Model should NOT be in catalog');
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// API Request Validation Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('Chat completion request: valid request passes', () => {
|
|
const request = {
|
|
model: 'llama3:8b',
|
|
messages: [
|
|
{ role: 'user', content: 'Hello!' },
|
|
],
|
|
};
|
|
|
|
assertExists(request.model, 'Model should be specified');
|
|
assert(request.messages.length > 0, 'Should have at least one message');
|
|
assert(
|
|
request.messages.every((m) => m.role && m.content),
|
|
'All messages should have role and content',
|
|
);
|
|
});
|
|
|
|
Deno.test('Chat completion request: missing model fails', () => {
|
|
const request = {
|
|
messages: [
|
|
{ role: 'user', content: 'Hello!' },
|
|
],
|
|
};
|
|
|
|
assertEquals((request as { model?: string }).model, undefined, 'Model should be missing');
|
|
});
|
|
|
|
Deno.test('Chat completion request: empty messages fails', () => {
|
|
const request = {
|
|
model: 'llama3:8b',
|
|
messages: [],
|
|
};
|
|
|
|
assertEquals(request.messages.length, 0, 'Messages should be empty');
|
|
});
|
|
|
|
Deno.test('Embedding request: valid request passes', () => {
|
|
const request = {
|
|
model: 'llama3:8b',
|
|
input: 'Hello, world!',
|
|
};
|
|
|
|
assertExists(request.model, 'Model should be specified');
|
|
assertExists(request.input, 'Input should be specified');
|
|
});
|
|
|
|
Deno.test('Embedding request: array input passes', () => {
|
|
const request = {
|
|
model: 'llama3:8b',
|
|
input: ['Hello', 'World'],
|
|
};
|
|
|
|
assertExists(request.model, 'Model should be specified');
|
|
assert(Array.isArray(request.input), 'Input should be an array');
|
|
assert(request.input.length > 0, 'Input should not be empty');
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Container Type Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('Container types: vllm base configuration', () => {
|
|
const vllmConfig = {
|
|
type: 'vllm' as const,
|
|
image: 'vllm/vllm-openai:latest',
|
|
defaultPort: 8000,
|
|
apiPath: '/v1',
|
|
};
|
|
|
|
assertEquals(vllmConfig.type, 'vllm');
|
|
assertEquals(vllmConfig.defaultPort, 8000);
|
|
});
|
|
|
|
Deno.test('Container types: vllm configuration', () => {
|
|
const vllmConfig = {
|
|
type: 'vllm' as const,
|
|
image: 'vllm/vllm-openai:latest',
|
|
defaultPort: 8000,
|
|
apiPath: '/v1',
|
|
};
|
|
|
|
assertEquals(vllmConfig.type, 'vllm');
|
|
assertEquals(vllmConfig.defaultPort, 8000);
|
|
});
|
|
|
|
Deno.test('Container types: tgi configuration', () => {
|
|
const tgiConfig = {
|
|
type: 'tgi' as const,
|
|
image: 'ghcr.io/huggingface/text-generation-inference:latest',
|
|
defaultPort: 80,
|
|
apiPath: '/generate',
|
|
};
|
|
|
|
assertEquals(tgiConfig.type, 'tgi');
|
|
assertEquals(tgiConfig.defaultPort, 80);
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// GPU Vendor Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('GPU vendors: NVIDIA detection pattern', () => {
|
|
const nvidiaPatterns = ['NVIDIA', 'GeForce', 'Quadro', 'Tesla', 'RTX', 'GTX'];
|
|
const gpuName = 'NVIDIA GeForce RTX 4090';
|
|
|
|
const isNvidia = nvidiaPatterns.some((pattern) =>
|
|
gpuName.toUpperCase().includes(pattern.toUpperCase())
|
|
);
|
|
assert(isNvidia, 'Should detect NVIDIA GPU');
|
|
});
|
|
|
|
Deno.test('GPU vendors: AMD detection pattern', () => {
|
|
const amdPatterns = ['AMD', 'Radeon', 'RX'];
|
|
const gpuName = 'AMD Radeon RX 7900 XTX';
|
|
|
|
const isAmd = amdPatterns.some((pattern) =>
|
|
gpuName.toUpperCase().includes(pattern.toUpperCase())
|
|
);
|
|
assert(isAmd, 'Should detect AMD GPU');
|
|
});
|
|
|
|
Deno.test('GPU vendors: Intel detection pattern', () => {
|
|
const intelPatterns = ['Intel', 'Arc', 'Iris', 'UHD'];
|
|
const gpuName = 'Intel Arc A770';
|
|
|
|
const isIntel = intelPatterns.some((pattern) =>
|
|
gpuName.toUpperCase().includes(pattern.toUpperCase())
|
|
);
|
|
assert(isIntel, 'Should detect Intel GPU');
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// VRAM Calculation Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('VRAM calculation: MB to GB conversion', () => {
|
|
const vramMB = 24576; // 24 GB in MB
|
|
const vramGB = vramMB / 1024;
|
|
assertEquals(vramGB, 24);
|
|
});
|
|
|
|
Deno.test('VRAM calculation: model fits in available VRAM', () => {
|
|
const availableVramGB = 24;
|
|
const modelRequiredVramGB = 8;
|
|
const overhead = 2; // GB for system overhead
|
|
|
|
const fits = (modelRequiredVramGB + overhead) <= availableVramGB;
|
|
assert(fits, 'Model should fit in available VRAM');
|
|
});
|
|
|
|
Deno.test('VRAM calculation: multiple models VRAM sum', () => {
|
|
const models = [
|
|
{ name: 'llama3:8b', vram: 8 },
|
|
{ name: 'mistral:7b', vram: 8 },
|
|
];
|
|
|
|
const totalVram = models.reduce((sum, m) => sum + m.vram, 0);
|
|
assertEquals(totalVram, 16);
|
|
});
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Cluster Scheduling Tests
|
|
// -----------------------------------------------------------------------------
|
|
|
|
Deno.test('Cluster manager resolves local model first', () => {
|
|
const clusterManager = new ClusterManager();
|
|
clusterManager.configure({
|
|
enabled: true,
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
bindHost: '0.0.0.0',
|
|
gossipPort: 7946,
|
|
advertiseUrl: 'http://control:8080',
|
|
heartbeatIntervalMs: 5000,
|
|
seedNodes: [],
|
|
});
|
|
|
|
clusterManager.updateLocalNode({
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
endpoint: 'http://control:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
availableVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
largestGpuGroupCount: 2,
|
|
largestGpuGroupVramGb: 48,
|
|
deploymentCount: 1,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1'],
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [1, 2],
|
|
},
|
|
],
|
|
},
|
|
deployments: [
|
|
{
|
|
modelId: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
engine: 'vllm',
|
|
endpoint: 'http://control:8080',
|
|
healthy: true,
|
|
containerId: 'vllm-llama',
|
|
},
|
|
],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
clusterManager.upsertNode({
|
|
nodeName: 'worker-a',
|
|
role: 'worker',
|
|
endpoint: 'http://worker-a:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 4,
|
|
totalVramGb: 96,
|
|
availableVramGb: 72,
|
|
maxSingleGpuVramGb: 24,
|
|
largestGpuGroupCount: 4,
|
|
largestGpuGroupVramGb: 96,
|
|
deploymentCount: 2,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1', 'gpu-2', 'gpu-3'],
|
|
gpuCount: 4,
|
|
totalVramGb: 96,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [1, 2, 3, 4],
|
|
},
|
|
],
|
|
},
|
|
deployments: [
|
|
{
|
|
modelId: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
engine: 'vllm',
|
|
endpoint: 'http://worker-a:8080',
|
|
healthy: true,
|
|
containerId: 'vllm-llama-worker',
|
|
},
|
|
],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
const resolved = clusterManager.resolveModel('meta-llama/Llama-3.1-8B-Instruct');
|
|
assertExists(resolved);
|
|
assertEquals(resolved.nodeName, 'control');
|
|
});
|
|
|
|
Deno.test('Cluster manager stores desired deployments', () => {
|
|
const clusterManager = new ClusterManager();
|
|
const desired = clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 3);
|
|
|
|
assertEquals(desired.modelId, 'meta-llama/Llama-3.1-8B-Instruct');
|
|
assertEquals(desired.desiredReplicas, 3);
|
|
assertEquals(clusterManager.getDesiredDeployments().length, 1);
|
|
});
|
|
|
|
Deno.test('Cluster manager picks the node with enough VRAM', () => {
|
|
const clusterManager = new ClusterManager();
|
|
clusterManager.configure({
|
|
enabled: true,
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
bindHost: '0.0.0.0',
|
|
gossipPort: 7946,
|
|
advertiseUrl: 'http://control:8080',
|
|
heartbeatIntervalMs: 5000,
|
|
seedNodes: [],
|
|
});
|
|
|
|
clusterManager.updateLocalNode({
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
endpoint: 'http://control:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 1,
|
|
totalVramGb: 16,
|
|
availableVramGb: 8,
|
|
maxSingleGpuVramGb: 8,
|
|
largestGpuGroupCount: 1,
|
|
largestGpuGroupVramGb: 8,
|
|
deploymentCount: 1,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0'],
|
|
gpuCount: 1,
|
|
totalVramGb: 8,
|
|
maxSingleGpuVramGb: 8,
|
|
busNumbers: [1],
|
|
},
|
|
],
|
|
},
|
|
deployments: [],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
clusterManager.upsertNode({
|
|
nodeName: 'worker-a',
|
|
role: 'worker',
|
|
endpoint: 'http://worker-a:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
availableVramGb: 32,
|
|
maxSingleGpuVramGb: 16,
|
|
largestGpuGroupCount: 2,
|
|
largestGpuGroupVramGb: 32,
|
|
deploymentCount: 0,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1'],
|
|
gpuCount: 2,
|
|
totalVramGb: 32,
|
|
maxSingleGpuVramGb: 16,
|
|
busNumbers: [1, 2],
|
|
},
|
|
],
|
|
},
|
|
deployments: [],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
const selected = clusterManager.pickNodeForModel({
|
|
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
engine: 'vllm',
|
|
source: {
|
|
repo: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
},
|
|
capabilities: {
|
|
chat: true,
|
|
},
|
|
requirements: {
|
|
minVramGb: 18,
|
|
minGpuCount: 1,
|
|
},
|
|
});
|
|
|
|
assertExists(selected);
|
|
assertEquals(selected.nodeName, 'worker-a');
|
|
});
|
|
|
|
Deno.test('Cluster manager excludes cordoned nodes from placement', () => {
|
|
const clusterManager = new ClusterManager();
|
|
clusterManager.configure({
|
|
enabled: true,
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
bindHost: '0.0.0.0',
|
|
gossipPort: 7946,
|
|
advertiseUrl: 'http://control:8080',
|
|
heartbeatIntervalMs: 5000,
|
|
seedNodes: [],
|
|
});
|
|
|
|
clusterManager.updateLocalNode({
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
endpoint: 'http://control:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
availableVramGb: 32,
|
|
maxSingleGpuVramGb: 24,
|
|
largestGpuGroupCount: 2,
|
|
largestGpuGroupVramGb: 48,
|
|
deploymentCount: 0,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1'],
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [1, 2],
|
|
},
|
|
],
|
|
},
|
|
deployments: [],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
clusterManager.upsertNode({
|
|
nodeName: 'worker-a',
|
|
role: 'worker',
|
|
endpoint: 'http://worker-a:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
availableVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
largestGpuGroupCount: 2,
|
|
largestGpuGroupVramGb: 48,
|
|
deploymentCount: 0,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1'],
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [1, 2],
|
|
},
|
|
],
|
|
},
|
|
deployments: [],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
clusterManager.setNodeSchedulerState('worker-a', 'cordoned');
|
|
|
|
const selected = clusterManager.pickNodeForModel({
|
|
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
engine: 'vllm',
|
|
source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' },
|
|
capabilities: { chat: true },
|
|
requirements: {
|
|
minVramGb: 18,
|
|
minGpuCount: 1,
|
|
},
|
|
});
|
|
|
|
assertExists(selected);
|
|
assertEquals(selected.nodeName, 'control');
|
|
});
|
|
|
|
Deno.test('Topology grouping keeps distant PCI buses separate', () => {
|
|
const groups = buildGpuTopologyGroups([
|
|
{ id: 'gpu-0', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:01:00.0', index: 0 },
|
|
{ id: 'gpu-1', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:02:00.0', index: 1 },
|
|
{ id: 'gpu-2', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:41:00.0', index: 2 },
|
|
{ id: 'gpu-3', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:42:00.0', index: 3 },
|
|
]);
|
|
|
|
assertEquals(groups.length, 2);
|
|
assertEquals(groups[0].gpuCount, 2);
|
|
assertEquals(groups[1].gpuCount, 2);
|
|
});
|
|
|
|
Deno.test('Cluster manager rejects node without suitable topology group', () => {
|
|
const clusterManager = new ClusterManager();
|
|
clusterManager.configure({
|
|
enabled: true,
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
bindHost: '0.0.0.0',
|
|
gossipPort: 7946,
|
|
advertiseUrl: 'http://control:8080',
|
|
heartbeatIntervalMs: 5000,
|
|
seedNodes: [],
|
|
});
|
|
|
|
clusterManager.updateLocalNode({
|
|
nodeName: 'control',
|
|
role: 'control-plane',
|
|
endpoint: 'http://control:8080',
|
|
healthy: true,
|
|
resources: {
|
|
gpuCount: 4,
|
|
totalVramGb: 96,
|
|
availableVramGb: 96,
|
|
maxSingleGpuVramGb: 24,
|
|
largestGpuGroupCount: 2,
|
|
largestGpuGroupVramGb: 48,
|
|
deploymentCount: 0,
|
|
topologyGroups: [
|
|
{
|
|
id: 'nvidia-1',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-0', 'gpu-1'],
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [1, 2],
|
|
},
|
|
{
|
|
id: 'nvidia-2',
|
|
vendor: 'nvidia',
|
|
gpuIds: ['gpu-2', 'gpu-3'],
|
|
gpuCount: 2,
|
|
totalVramGb: 48,
|
|
maxSingleGpuVramGb: 24,
|
|
busNumbers: [65, 66],
|
|
},
|
|
],
|
|
},
|
|
deployments: [],
|
|
lastSeenAt: Date.now(),
|
|
});
|
|
|
|
const selected = clusterManager.pickNodeForModel({
|
|
id: 'meta-llama/Llama-3.1-70B-Instruct',
|
|
engine: 'vllm',
|
|
source: { repo: 'meta-llama/Llama-3.1-70B-Instruct' },
|
|
capabilities: { chat: true },
|
|
requirements: {
|
|
minVramGb: 72,
|
|
minGpuCount: 4,
|
|
},
|
|
launchDefaults: {
|
|
tensorParallelSize: 4,
|
|
},
|
|
});
|
|
|
|
assertEquals(selected, null);
|
|
});
|