import type { IModelCatalogEntry } from '../interfaces/catalog.ts'; import type { IGpuInfo, TGpuVendor } from '../interfaces/gpu.ts'; import type { IClusterGpuTopologyGroup } from '../interfaces/cluster.ts'; function parsePciBusNumber(gpu: IGpuInfo): number { const source = gpu.pciBusId || gpu.pciSlot; const match = source.match(/(?:[0-9a-f]{4}:)?([0-9a-f]{2}):/i); if (!match) { return gpu.index; } return parseInt(match[1], 16); } export function buildGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] { const sorted = [...gpus].sort((left, right) => { if (left.vendor !== right.vendor) { return left.vendor.localeCompare(right.vendor); } return parsePciBusNumber(left) - parsePciBusNumber(right); }); const groups: IClusterGpuTopologyGroup[] = []; for (const gpu of sorted) { const busNumber = parsePciBusNumber(gpu); const previousGroup = groups[groups.length - 1]; const previousBus = previousGroup?.busNumbers[previousGroup.busNumbers.length - 1]; const belongsToPreviousGroup = previousGroup && previousGroup.vendor === gpu.vendor && previousBus !== undefined && busNumber - previousBus <= 1; if (belongsToPreviousGroup) { previousGroup.gpuIds.push(gpu.id); previousGroup.busNumbers.push(busNumber); previousGroup.totalVramGb += Math.round(gpu.vram / 1024); previousGroup.maxSingleGpuVramGb = Math.max( previousGroup.maxSingleGpuVramGb, Math.round(gpu.vram / 1024), ); continue; } groups.push({ id: `${gpu.vendor}-${groups.length + 1}`, vendor: gpu.vendor, gpuIds: [gpu.id], gpuCount: 1, totalVramGb: Math.round(gpu.vram / 1024), maxSingleGpuVramGb: Math.round(gpu.vram / 1024), busNumbers: [busNumber], }); } for (const group of groups) { group.gpuCount = group.gpuIds.length; } return groups; } export function summarizeGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] { return buildGpuTopologyGroups(gpus); } export function selectPlacementForModel( model: IModelCatalogEntry, gpus: IGpuInfo[], ): { gpuIds: string[]; tensorParallelSize: number; topologyGroupId: string } | null { const minGpuCount = model.requirements.minGpuCount || 1; const preferredTensorParallel = model.launchDefaults?.tensorParallelSize || minGpuCount; const topologyGroups = buildGpuTopologyGroups(gpus); const eligibleGroups = topologyGroups.filter((group) => group.gpuCount >= minGpuCount && group.totalVramGb >= model.requirements.minVramGb ); if (eligibleGroups.length === 0) { return null; } eligibleGroups.sort((left, right) => { const leftCountDelta = Math.abs(left.gpuCount - preferredTensorParallel); const rightCountDelta = Math.abs(right.gpuCount - preferredTensorParallel); if (leftCountDelta !== rightCountDelta) { return leftCountDelta - rightCountDelta; } const leftVramDelta = left.totalVramGb - model.requirements.minVramGb; const rightVramDelta = right.totalVramGb - model.requirements.minVramGb; if (leftVramDelta !== rightVramDelta) { return leftVramDelta - rightVramDelta; } return left.id.localeCompare(right.id); }); const selectedGroup = eligibleGroups[0]; const tensorParallelSize = Math.min(preferredTensorParallel, selectedGroup.gpuCount); return { gpuIds: selectedGroup.gpuIds.slice(0, tensorParallelSize), tensorParallelSize, topologyGroupId: selectedGroup.id, }; } export function filterOutUsedGpus(gpus: IGpuInfo[], usedGpuIds: string[]): IGpuInfo[] { const usedSet = new Set(usedGpuIds); return gpus.filter((gpu) => !usedSet.has(gpu.id)); }