import { assert, assertEquals, assertExists } from 'jsr:@std/assert@^1.0.0'; import { shortId } from '../ts/helpers/shortid.ts'; import { ClusterManager } from '../ts/cluster/cluster-manager.ts'; import { buildGpuTopologyGroups } from '../ts/cluster/placement.ts'; // ============================================================================= // UNIT TESTS - ModelGrid Core Components // ============================================================================= // ----------------------------------------------------------------------------- // shortId() Tests // ----------------------------------------------------------------------------- Deno.test('shortId: generates 6-character string', () => { const id = shortId(); assertEquals(id.length, 6); }); Deno.test('shortId: contains only alphanumeric characters', () => { const id = shortId(); const alphanumericRegex = /^[a-zA-Z0-9]+$/; assert(alphanumericRegex.test(id), `ID "${id}" contains non-alphanumeric characters`); }); Deno.test('shortId: generates unique IDs', () => { const ids = new Set(); const count = 100; for (let i = 0; i < count; i++) { ids.add(shortId()); } // All IDs should be unique (statistically extremely likely for 100 IDs) assertEquals(ids.size, count, 'Generated IDs should be unique'); }); // ----------------------------------------------------------------------------- // Interface Type Tests // ----------------------------------------------------------------------------- Deno.test('IModelGridConfig: valid config structure', () => { const config = { version: '1.0', api: { port: 8080, host: '0.0.0.0', apiKeys: ['test-key'], }, docker: { networkName: 'modelgrid', runtime: 'docker' as const, }, gpus: { autoDetect: true, assignments: {}, }, containers: [], models: { registryUrl: 'https://example.com/models.json', autoDeploy: true, defaultEngine: 'vllm' as const, autoLoad: [], }, cluster: { enabled: false, nodeName: 'modelgrid-local', role: 'standalone' as const, bindHost: '0.0.0.0', gossipPort: 7946, sharedSecret: '', advertiseUrl: 'http://127.0.0.1:8080', heartbeatIntervalMs: 5000, seedNodes: [], }, checkInterval: 30000, }; assertExists(config.version); assertExists(config.api); assertExists(config.docker); assertExists(config.gpus); assertExists(config.containers); assertExists(config.models); assertEquals(config.api.port, 8080); assertEquals(config.docker.runtime, 'docker'); }); Deno.test('IGpuInfo: valid GPU info structure', () => { const gpu = { id: 'gpu-0', vendor: 'nvidia' as const, model: 'RTX 4090', vram: 24576, driverVersion: '535.154.05', cudaVersion: '12.2', pciSlot: '0000:01:00.0', }; assertExists(gpu.id); assertExists(gpu.vendor); assertExists(gpu.model); assert(gpu.vram > 0, 'VRAM should be positive'); assert(['nvidia', 'amd', 'intel'].includes(gpu.vendor), 'Vendor should be valid'); }); Deno.test('IContainerConfig: valid container config structure', () => { const container = { id: 'vllm-1', type: 'vllm' as const, name: 'vLLM Deployment', image: 'vllm/vllm-openai:latest', gpuIds: ['gpu-0'], port: 8000, models: ['meta-llama/Llama-3.1-8B-Instruct'], }; assertExists(container.id); assertExists(container.type); assertExists(container.name); assertExists(container.image); assert(container.gpuIds.length > 0, 'Should have at least one GPU'); assert(container.port > 0, 'Port should be positive'); }); // ----------------------------------------------------------------------------- // Model Catalog Tests // ----------------------------------------------------------------------------- Deno.test('Catalog model validation: valid model passes', () => { const catalog = { version: '1.0', models: [ { id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', requirements: { minVramGb: 18 } }, { id: 'Qwen/Qwen2.5-7B-Instruct', engine: 'vllm', requirements: { minVramGb: 16 } }, ], }; const requestedModel = 'meta-llama/Llama-3.1-8B-Instruct'; const availableVram = 24; // GB const model = catalog.models.find((m) => m.id === requestedModel); assertExists(model, 'Model should be in catalog'); assert(availableVram >= model.requirements.minVramGb, 'Should have enough VRAM'); }); Deno.test('Catalog model validation: insufficient VRAM fails', () => { const catalog = { version: '1.0', models: [ { id: 'meta-llama/Llama-3.1-70B-Instruct', engine: 'vllm', requirements: { minVramGb: 48 } }, ], }; const requestedModel = 'meta-llama/Llama-3.1-70B-Instruct'; const availableVram = 24; // GB const model = catalog.models.find((m) => m.id === requestedModel); assertExists(model, 'Model should be in catalog'); assert(availableVram < model.requirements.minVramGb, 'Should NOT have enough VRAM'); }); Deno.test('Catalog model validation: unlisted model rejected', () => { const catalog = { version: '1.0', models: [ { id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', requirements: { minVramGb: 18 } }, ], }; const requestedModel = 'some-random-model:latest'; const model = catalog.models.find((m) => m.id === requestedModel); assertEquals(model, undefined, 'Model should NOT be in catalog'); }); // ----------------------------------------------------------------------------- // API Request Validation Tests // ----------------------------------------------------------------------------- Deno.test('Chat completion request: valid request passes', () => { const request = { model: 'llama3:8b', messages: [ { role: 'user', content: 'Hello!' }, ], }; assertExists(request.model, 'Model should be specified'); assert(request.messages.length > 0, 'Should have at least one message'); assert( request.messages.every((m) => m.role && m.content), 'All messages should have role and content', ); }); Deno.test('Chat completion request: missing model fails', () => { const request = { messages: [ { role: 'user', content: 'Hello!' }, ], }; assertEquals((request as { model?: string }).model, undefined, 'Model should be missing'); }); Deno.test('Chat completion request: empty messages fails', () => { const request = { model: 'llama3:8b', messages: [], }; assertEquals(request.messages.length, 0, 'Messages should be empty'); }); Deno.test('Embedding request: valid request passes', () => { const request = { model: 'llama3:8b', input: 'Hello, world!', }; assertExists(request.model, 'Model should be specified'); assertExists(request.input, 'Input should be specified'); }); Deno.test('Embedding request: array input passes', () => { const request = { model: 'llama3:8b', input: ['Hello', 'World'], }; assertExists(request.model, 'Model should be specified'); assert(Array.isArray(request.input), 'Input should be an array'); assert(request.input.length > 0, 'Input should not be empty'); }); // ----------------------------------------------------------------------------- // Container Type Tests // ----------------------------------------------------------------------------- Deno.test('Container types: vllm base configuration', () => { const vllmConfig = { type: 'vllm' as const, image: 'vllm/vllm-openai:latest', defaultPort: 8000, apiPath: '/v1', }; assertEquals(vllmConfig.type, 'vllm'); assertEquals(vllmConfig.defaultPort, 8000); }); Deno.test('Container types: vllm configuration', () => { const vllmConfig = { type: 'vllm' as const, image: 'vllm/vllm-openai:latest', defaultPort: 8000, apiPath: '/v1', }; assertEquals(vllmConfig.type, 'vllm'); assertEquals(vllmConfig.defaultPort, 8000); }); Deno.test('Container types: tgi configuration', () => { const tgiConfig = { type: 'tgi' as const, image: 'ghcr.io/huggingface/text-generation-inference:latest', defaultPort: 80, apiPath: '/generate', }; assertEquals(tgiConfig.type, 'tgi'); assertEquals(tgiConfig.defaultPort, 80); }); // ----------------------------------------------------------------------------- // GPU Vendor Tests // ----------------------------------------------------------------------------- Deno.test('GPU vendors: NVIDIA detection pattern', () => { const nvidiaPatterns = ['NVIDIA', 'GeForce', 'Quadro', 'Tesla', 'RTX', 'GTX']; const gpuName = 'NVIDIA GeForce RTX 4090'; const isNvidia = nvidiaPatterns.some((pattern) => gpuName.toUpperCase().includes(pattern.toUpperCase()) ); assert(isNvidia, 'Should detect NVIDIA GPU'); }); Deno.test('GPU vendors: AMD detection pattern', () => { const amdPatterns = ['AMD', 'Radeon', 'RX']; const gpuName = 'AMD Radeon RX 7900 XTX'; const isAmd = amdPatterns.some((pattern) => gpuName.toUpperCase().includes(pattern.toUpperCase()) ); assert(isAmd, 'Should detect AMD GPU'); }); Deno.test('GPU vendors: Intel detection pattern', () => { const intelPatterns = ['Intel', 'Arc', 'Iris', 'UHD']; const gpuName = 'Intel Arc A770'; const isIntel = intelPatterns.some((pattern) => gpuName.toUpperCase().includes(pattern.toUpperCase()) ); assert(isIntel, 'Should detect Intel GPU'); }); // ----------------------------------------------------------------------------- // VRAM Calculation Tests // ----------------------------------------------------------------------------- Deno.test('VRAM calculation: MB to GB conversion', () => { const vramMB = 24576; // 24 GB in MB const vramGB = vramMB / 1024; assertEquals(vramGB, 24); }); Deno.test('VRAM calculation: model fits in available VRAM', () => { const availableVramGB = 24; const modelRequiredVramGB = 8; const overhead = 2; // GB for system overhead const fits = (modelRequiredVramGB + overhead) <= availableVramGB; assert(fits, 'Model should fit in available VRAM'); }); Deno.test('VRAM calculation: multiple models VRAM sum', () => { const models = [ { name: 'llama3:8b', vram: 8 }, { name: 'mistral:7b', vram: 8 }, ]; const totalVram = models.reduce((sum, m) => sum + m.vram, 0); assertEquals(totalVram, 16); }); // ----------------------------------------------------------------------------- // Cluster Scheduling Tests // ----------------------------------------------------------------------------- Deno.test('Cluster manager resolves local model first', () => { const clusterManager = new ClusterManager(); clusterManager.configure({ enabled: true, nodeName: 'control', role: 'control-plane', bindHost: '0.0.0.0', gossipPort: 7946, advertiseUrl: 'http://control:8080', heartbeatIntervalMs: 5000, seedNodes: [], }); clusterManager.updateLocalNode({ nodeName: 'control', role: 'control-plane', endpoint: 'http://control:8080', healthy: true, resources: { gpuCount: 2, totalVramGb: 48, availableVramGb: 48, maxSingleGpuVramGb: 24, largestGpuGroupCount: 2, largestGpuGroupVramGb: 48, deploymentCount: 1, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1'], gpuCount: 2, totalVramGb: 48, maxSingleGpuVramGb: 24, busNumbers: [1, 2], }, ], }, deployments: [ { modelId: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', endpoint: 'http://control:8080', healthy: true, containerId: 'vllm-llama', }, ], lastSeenAt: Date.now(), }); clusterManager.upsertNode({ nodeName: 'worker-a', role: 'worker', endpoint: 'http://worker-a:8080', healthy: true, resources: { gpuCount: 4, totalVramGb: 96, availableVramGb: 72, maxSingleGpuVramGb: 24, largestGpuGroupCount: 4, largestGpuGroupVramGb: 96, deploymentCount: 2, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1', 'gpu-2', 'gpu-3'], gpuCount: 4, totalVramGb: 96, maxSingleGpuVramGb: 24, busNumbers: [1, 2, 3, 4], }, ], }, deployments: [ { modelId: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', endpoint: 'http://worker-a:8080', healthy: true, containerId: 'vllm-llama-worker', }, ], lastSeenAt: Date.now(), }); const resolved = clusterManager.resolveModel('meta-llama/Llama-3.1-8B-Instruct'); assertExists(resolved); assertEquals(resolved.nodeName, 'control'); }); Deno.test('Cluster manager stores desired deployments', () => { const clusterManager = new ClusterManager(); const desired = clusterManager.upsertDesiredDeployment('meta-llama/Llama-3.1-8B-Instruct', 3); assertEquals(desired.modelId, 'meta-llama/Llama-3.1-8B-Instruct'); assertEquals(desired.desiredReplicas, 3); assertEquals(clusterManager.getDesiredDeployments().length, 1); }); Deno.test('Cluster manager picks the node with enough VRAM', () => { const clusterManager = new ClusterManager(); clusterManager.configure({ enabled: true, nodeName: 'control', role: 'control-plane', bindHost: '0.0.0.0', gossipPort: 7946, advertiseUrl: 'http://control:8080', heartbeatIntervalMs: 5000, seedNodes: [], }); clusterManager.updateLocalNode({ nodeName: 'control', role: 'control-plane', endpoint: 'http://control:8080', healthy: true, resources: { gpuCount: 1, totalVramGb: 16, availableVramGb: 8, maxSingleGpuVramGb: 8, largestGpuGroupCount: 1, largestGpuGroupVramGb: 8, deploymentCount: 1, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0'], gpuCount: 1, totalVramGb: 8, maxSingleGpuVramGb: 8, busNumbers: [1], }, ], }, deployments: [], lastSeenAt: Date.now(), }); clusterManager.upsertNode({ nodeName: 'worker-a', role: 'worker', endpoint: 'http://worker-a:8080', healthy: true, resources: { gpuCount: 2, totalVramGb: 48, availableVramGb: 32, maxSingleGpuVramGb: 16, largestGpuGroupCount: 2, largestGpuGroupVramGb: 32, deploymentCount: 0, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1'], gpuCount: 2, totalVramGb: 32, maxSingleGpuVramGb: 16, busNumbers: [1, 2], }, ], }, deployments: [], lastSeenAt: Date.now(), }); const selected = clusterManager.pickNodeForModel({ id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', source: { repo: 'meta-llama/Llama-3.1-8B-Instruct', }, capabilities: { chat: true, }, requirements: { minVramGb: 18, minGpuCount: 1, }, }); assertExists(selected); assertEquals(selected.nodeName, 'worker-a'); }); Deno.test('Cluster manager excludes cordoned nodes from placement', () => { const clusterManager = new ClusterManager(); clusterManager.configure({ enabled: true, nodeName: 'control', role: 'control-plane', bindHost: '0.0.0.0', gossipPort: 7946, advertiseUrl: 'http://control:8080', heartbeatIntervalMs: 5000, seedNodes: [], }); clusterManager.updateLocalNode({ nodeName: 'control', role: 'control-plane', endpoint: 'http://control:8080', healthy: true, resources: { gpuCount: 2, totalVramGb: 48, availableVramGb: 32, maxSingleGpuVramGb: 24, largestGpuGroupCount: 2, largestGpuGroupVramGb: 48, deploymentCount: 0, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1'], gpuCount: 2, totalVramGb: 48, maxSingleGpuVramGb: 24, busNumbers: [1, 2], }, ], }, deployments: [], lastSeenAt: Date.now(), }); clusterManager.upsertNode({ nodeName: 'worker-a', role: 'worker', endpoint: 'http://worker-a:8080', healthy: true, resources: { gpuCount: 2, totalVramGb: 48, availableVramGb: 48, maxSingleGpuVramGb: 24, largestGpuGroupCount: 2, largestGpuGroupVramGb: 48, deploymentCount: 0, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1'], gpuCount: 2, totalVramGb: 48, maxSingleGpuVramGb: 24, busNumbers: [1, 2], }, ], }, deployments: [], lastSeenAt: Date.now(), }); clusterManager.setNodeSchedulerState('worker-a', 'cordoned'); const selected = clusterManager.pickNodeForModel({ id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' }, capabilities: { chat: true }, requirements: { minVramGb: 18, minGpuCount: 1, }, }); assertExists(selected); assertEquals(selected.nodeName, 'control'); }); Deno.test('Topology grouping keeps distant PCI buses separate', () => { const groups = buildGpuTopologyGroups([ { id: 'gpu-0', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:01:00.0', index: 0 }, { id: 'gpu-1', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:02:00.0', index: 1 }, { id: 'gpu-2', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:41:00.0', index: 2 }, { id: 'gpu-3', vendor: 'nvidia', model: 'A', vram: 24576, pciSlot: '0000:42:00.0', index: 3 }, ]); assertEquals(groups.length, 2); assertEquals(groups[0].gpuCount, 2); assertEquals(groups[1].gpuCount, 2); }); Deno.test('Cluster manager rejects node without suitable topology group', () => { const clusterManager = new ClusterManager(); clusterManager.configure({ enabled: true, nodeName: 'control', role: 'control-plane', bindHost: '0.0.0.0', gossipPort: 7946, advertiseUrl: 'http://control:8080', heartbeatIntervalMs: 5000, seedNodes: [], }); clusterManager.updateLocalNode({ nodeName: 'control', role: 'control-plane', endpoint: 'http://control:8080', healthy: true, resources: { gpuCount: 4, totalVramGb: 96, availableVramGb: 96, maxSingleGpuVramGb: 24, largestGpuGroupCount: 2, largestGpuGroupVramGb: 48, deploymentCount: 0, topologyGroups: [ { id: 'nvidia-1', vendor: 'nvidia', gpuIds: ['gpu-0', 'gpu-1'], gpuCount: 2, totalVramGb: 48, maxSingleGpuVramGb: 24, busNumbers: [1, 2], }, { id: 'nvidia-2', vendor: 'nvidia', gpuIds: ['gpu-2', 'gpu-3'], gpuCount: 2, totalVramGb: 48, maxSingleGpuVramGb: 24, busNumbers: [65, 66], }, ], }, deployments: [], lastSeenAt: Date.now(), }); const selected = clusterManager.pickNodeForModel({ id: 'meta-llama/Llama-3.1-70B-Instruct', engine: 'vllm', source: { repo: 'meta-llama/Llama-3.1-70B-Instruct' }, capabilities: { chat: true }, requirements: { minVramGb: 72, minGpuCount: 4, }, launchDefaults: { tensorParallelSize: 4, }, }); assertEquals(selected, null); });