115 lines
3.6 KiB
TypeScript
115 lines
3.6 KiB
TypeScript
|
|
import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
|
||
|
|
import type { IGpuInfo, TGpuVendor } from '../interfaces/gpu.ts';
|
||
|
|
import type { IClusterGpuTopologyGroup } from '../interfaces/cluster.ts';
|
||
|
|
|
||
|
|
function parsePciBusNumber(gpu: IGpuInfo): number {
|
||
|
|
const source = gpu.pciBusId || gpu.pciSlot;
|
||
|
|
const match = source.match(/(?:[0-9a-f]{4}:)?([0-9a-f]{2}):/i);
|
||
|
|
if (!match) {
|
||
|
|
return gpu.index;
|
||
|
|
}
|
||
|
|
|
||
|
|
return parseInt(match[1], 16);
|
||
|
|
}
|
||
|
|
|
||
|
|
export function buildGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] {
|
||
|
|
const sorted = [...gpus].sort((left, right) => {
|
||
|
|
if (left.vendor !== right.vendor) {
|
||
|
|
return left.vendor.localeCompare(right.vendor);
|
||
|
|
}
|
||
|
|
|
||
|
|
return parsePciBusNumber(left) - parsePciBusNumber(right);
|
||
|
|
});
|
||
|
|
|
||
|
|
const groups: IClusterGpuTopologyGroup[] = [];
|
||
|
|
|
||
|
|
for (const gpu of sorted) {
|
||
|
|
const busNumber = parsePciBusNumber(gpu);
|
||
|
|
const previousGroup = groups[groups.length - 1];
|
||
|
|
const previousBus = previousGroup?.busNumbers[previousGroup.busNumbers.length - 1];
|
||
|
|
|
||
|
|
const belongsToPreviousGroup = previousGroup &&
|
||
|
|
previousGroup.vendor === gpu.vendor &&
|
||
|
|
previousBus !== undefined &&
|
||
|
|
busNumber - previousBus <= 1;
|
||
|
|
|
||
|
|
if (belongsToPreviousGroup) {
|
||
|
|
previousGroup.gpuIds.push(gpu.id);
|
||
|
|
previousGroup.busNumbers.push(busNumber);
|
||
|
|
previousGroup.totalVramGb += Math.round(gpu.vram / 1024);
|
||
|
|
previousGroup.maxSingleGpuVramGb = Math.max(
|
||
|
|
previousGroup.maxSingleGpuVramGb,
|
||
|
|
Math.round(gpu.vram / 1024),
|
||
|
|
);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
groups.push({
|
||
|
|
id: `${gpu.vendor}-${groups.length + 1}`,
|
||
|
|
vendor: gpu.vendor,
|
||
|
|
gpuIds: [gpu.id],
|
||
|
|
gpuCount: 1,
|
||
|
|
totalVramGb: Math.round(gpu.vram / 1024),
|
||
|
|
maxSingleGpuVramGb: Math.round(gpu.vram / 1024),
|
||
|
|
busNumbers: [busNumber],
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
for (const group of groups) {
|
||
|
|
group.gpuCount = group.gpuIds.length;
|
||
|
|
}
|
||
|
|
|
||
|
|
return groups;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function summarizeGpuTopologyGroups(gpus: IGpuInfo[]): IClusterGpuTopologyGroup[] {
|
||
|
|
return buildGpuTopologyGroups(gpus);
|
||
|
|
}
|
||
|
|
|
||
|
|
export function selectPlacementForModel(
|
||
|
|
model: IModelCatalogEntry,
|
||
|
|
gpus: IGpuInfo[],
|
||
|
|
): { gpuIds: string[]; tensorParallelSize: number; topologyGroupId: string } | null {
|
||
|
|
const minGpuCount = model.requirements.minGpuCount || 1;
|
||
|
|
const preferredTensorParallel = model.launchDefaults?.tensorParallelSize || minGpuCount;
|
||
|
|
const topologyGroups = buildGpuTopologyGroups(gpus);
|
||
|
|
|
||
|
|
const eligibleGroups = topologyGroups.filter((group) =>
|
||
|
|
group.gpuCount >= minGpuCount && group.totalVramGb >= model.requirements.minVramGb
|
||
|
|
);
|
||
|
|
|
||
|
|
if (eligibleGroups.length === 0) {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
eligibleGroups.sort((left, right) => {
|
||
|
|
const leftCountDelta = Math.abs(left.gpuCount - preferredTensorParallel);
|
||
|
|
const rightCountDelta = Math.abs(right.gpuCount - preferredTensorParallel);
|
||
|
|
if (leftCountDelta !== rightCountDelta) {
|
||
|
|
return leftCountDelta - rightCountDelta;
|
||
|
|
}
|
||
|
|
|
||
|
|
const leftVramDelta = left.totalVramGb - model.requirements.minVramGb;
|
||
|
|
const rightVramDelta = right.totalVramGb - model.requirements.minVramGb;
|
||
|
|
if (leftVramDelta !== rightVramDelta) {
|
||
|
|
return leftVramDelta - rightVramDelta;
|
||
|
|
}
|
||
|
|
|
||
|
|
return left.id.localeCompare(right.id);
|
||
|
|
});
|
||
|
|
|
||
|
|
const selectedGroup = eligibleGroups[0];
|
||
|
|
const tensorParallelSize = Math.min(preferredTensorParallel, selectedGroup.gpuCount);
|
||
|
|
|
||
|
|
return {
|
||
|
|
gpuIds: selectedGroup.gpuIds.slice(0, tensorParallelSize),
|
||
|
|
tensorParallelSize,
|
||
|
|
topologyGroupId: selectedGroup.id,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
export function filterOutUsedGpus(gpus: IGpuInfo[], usedGpuIds: string[]): IGpuInfo[] {
|
||
|
|
const usedSet = new Set(usedGpuIds);
|
||
|
|
return gpus.filter((gpu) => !usedSet.has(gpu.id));
|
||
|
|
}
|