Files
modelgrid/ts/cluster/placement.ts
T

115 lines
3.6 KiB
TypeScript

import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
import type { IGpuInfo, TGpuVendor } from '../interfaces/gpu.ts';
import type { IClusterGpuTopologyGroup } from '../interfaces/cluster.ts';
function parsePciBusNumber(gpu: IGpuInfo): number {
const source = gpu.pciBusId || gpu.pciSlot;
const match = source.match(/(?:[0-9a-f]{4}:)?([0-9a-f]{2}):/i);
if (!match) {
return gpu.index;
}
return parseInt(match[1], 16);
}
export function buildGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] {
const sorted = [...gpus].sort((left, right) => {
if (left.vendor !== right.vendor) {
return left.vendor.localeCompare(right.vendor);
}
return parsePciBusNumber(left) - parsePciBusNumber(right);
});
const groups: IClusterGpuTopologyGroup[] = [];
for (const gpu of sorted) {
const busNumber = parsePciBusNumber(gpu);
const previousGroup = groups[groups.length - 1];
const previousBus = previousGroup?.busNumbers[previousGroup.busNumbers.length - 1];
const belongsToPreviousGroup = previousGroup &&
previousGroup.vendor === gpu.vendor &&
previousBus !== undefined &&
busNumber - previousBus <= 1;
if (belongsToPreviousGroup) {
previousGroup.gpuIds.push(gpu.id);
previousGroup.busNumbers.push(busNumber);
previousGroup.totalVramGb += Math.round(gpu.vram / 1024);
previousGroup.maxSingleGpuVramGb = Math.max(
previousGroup.maxSingleGpuVramGb,
Math.round(gpu.vram / 1024),
);
continue;
}
groups.push({
id: `${gpu.vendor}-${groups.length + 1}`,
vendor: gpu.vendor,
gpuIds: [gpu.id],
gpuCount: 1,
totalVramGb: Math.round(gpu.vram / 1024),
maxSingleGpuVramGb: Math.round(gpu.vram / 1024),
busNumbers: [busNumber],
});
}
for (const group of groups) {
group.gpuCount = group.gpuIds.length;
}
return groups;
}
export function summarizeGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] {
return buildGpuTopologyGroups(gpus);
}
export function selectPlacementForModel(
model: IModelCatalogEntry,
gpus: IGpuInfo[],
): { gpuIds: string[]; tensorParallelSize: number; topologyGroupId: string } | null {
const minGpuCount = model.requirements.minGpuCount || 1;
const preferredTensorParallel = model.launchDefaults?.tensorParallelSize || minGpuCount;
const topologyGroups = buildGpuTopologyGroups(gpus);
const eligibleGroups = topologyGroups.filter((group) =>
group.gpuCount >= minGpuCount && group.totalVramGb >= model.requirements.minVramGb
);
if (eligibleGroups.length === 0) {
return null;
}
eligibleGroups.sort((left, right) => {
const leftCountDelta = Math.abs(left.gpuCount - preferredTensorParallel);
const rightCountDelta = Math.abs(right.gpuCount - preferredTensorParallel);
if (leftCountDelta !== rightCountDelta) {
return leftCountDelta - rightCountDelta;
}
const leftVramDelta = left.totalVramGb - model.requirements.minVramGb;
const rightVramDelta = right.totalVramGb - model.requirements.minVramGb;
if (leftVramDelta !== rightVramDelta) {
return leftVramDelta - rightVramDelta;
}
return left.id.localeCompare(right.id);
});
const selectedGroup = eligibleGroups[0];
const tensorParallelSize = Math.min(preferredTensorParallel, selectedGroup.gpuCount);
return {
gpuIds: selectedGroup.gpuIds.slice(0, tensorParallelSize),
tensorParallelSize,
topologyGroupId: selectedGroup.id,
};
}
export function filterOutUsedGpus(gpus: IGpuInfo[], usedGpuIds: string[]): IGpuInfo[] {
const usedSet = new Set(usedGpuIds);
return gpus.filter((gpu) => !usedSet.has(gpu.id));
}