Files
modelgrid/ts/api/handlers/cluster.ts
T

317 lines
9.5 KiB
TypeScript

import * as http from 'node:http';
import type { IApiError } from '../../interfaces/api.ts';
import type { IClusterNodeHeartbeat } from '../../interfaces/cluster.ts';
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
import { CLUSTER } from '../../constants.ts';
export class ClusterHandler {
private clusterCoordinator: ClusterCoordinator;
constructor(clusterCoordinator: ClusterCoordinator) {
this.clusterCoordinator = clusterCoordinator;
}
public async handle(
req: http.IncomingMessage,
res: http.ServerResponse,
path: string,
url: URL,
): Promise<void> {
if (!this.authenticate(req)) {
return this.sendError(res, 401, 'Invalid cluster secret', 'authentication_error');
}
if (path === '/_cluster/status' && req.method === 'GET') {
return this.sendJson(res, 200, this.clusterCoordinator.getStatus());
}
if (path === '/_cluster/nodes' && req.method === 'GET') {
return this.sendJson(res, 200, this.clusterCoordinator.getStatus().nodes);
}
if (path === '/_cluster/desired' && req.method === 'GET') {
return this.sendJson(res, 200, this.clusterCoordinator.getDesiredDeployments());
}
if (
(path === '/_cluster/nodes/register' || path === '/_cluster/nodes/heartbeat') &&
req.method === 'POST'
) {
const body = await this.parseBody(req) as IClusterNodeHeartbeat | null;
if (!body) {
return this.sendError(
res,
400,
'Invalid cluster heartbeat payload',
'invalid_request_error',
);
}
this.clusterCoordinator.acceptHeartbeat(body);
return this.sendJson(res, 200, { ok: true });
}
if (path === '/_cluster/models/resolve' && req.method === 'GET') {
const model = url.searchParams.get('model');
if (!model) {
return this.sendError(res, 400, 'Missing model query parameter', 'invalid_request_error');
}
const resolved = await this.clusterCoordinator.resolveModel(model);
if (!resolved) {
return this.sendError(res, 404, `Model "${model}" not found in cluster`, 'model_not_found');
}
return this.sendJson(res, 200, resolved);
}
if (path === '/_cluster/models/ensure' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { model?: string } | null;
if (!body?.model) {
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
}
const ensured = await this.clusterCoordinator.ensureModel(body.model);
if (!ensured) {
return this.sendError(res, 503, `Unable to schedule model "${body.model}"`, 'server_error');
}
return this.sendJson(res, 200, ensured);
}
if (path === '/_cluster/models/desired' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { model?: string; desiredReplicas?: number } | null;
if (!body?.model || body.desiredReplicas === undefined) {
return this.sendError(
res,
400,
'Missing model or desiredReplicas in request body',
'invalid_request_error',
);
}
const desiredDeployment = await this.clusterCoordinator.setDesiredReplicas(
body.model,
body.desiredReplicas,
);
if (!desiredDeployment) {
return this.sendError(res, 404, `Model "${body.model}" not found`, 'model_not_found');
}
return this.sendJson(res, 200, desiredDeployment);
}
if (path === '/_cluster/models/desired/remove' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { model?: string } | null;
if (!body?.model) {
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
}
const removed = await this.clusterCoordinator.clearDesiredDeployment(body.model);
return this.sendJson(res, 200, { removed });
}
if (path === '/_cluster/deployments' && req.method === 'POST') {
const body = await this.parseBody(req) as { model?: string; replicaOrdinal?: number } | null;
if (!body?.model) {
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
}
const deployed = body.replicaOrdinal !== undefined
? await this.clusterCoordinator.deployReplicaLocally(body.model, body.replicaOrdinal)
: await this.clusterCoordinator.deployModelLocally(body.model);
if (!deployed) {
return this.sendError(res, 503, `Unable to deploy model "${body.model}"`, 'server_error');
}
return this.sendJson(res, 200, deployed);
}
if (path === '/_cluster/nodes/cordon' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { nodeName?: string } | null;
if (!body?.nodeName) {
return this.sendError(
res,
400,
'Missing nodeName in request body',
'invalid_request_error',
);
}
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(
body.nodeName,
'cordoned',
);
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
}
if (path === '/_cluster/nodes/uncordon' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { nodeName?: string } | null;
if (!body?.nodeName) {
return this.sendError(
res,
400,
'Missing nodeName in request body',
'invalid_request_error',
);
}
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(body.nodeName, 'active');
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
}
if (path === '/_cluster/nodes/drain' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { nodeName?: string } | null;
if (!body?.nodeName) {
return this.sendError(
res,
400,
'Missing nodeName in request body',
'invalid_request_error',
);
}
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(
body.nodeName,
'draining',
);
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
}
if (path === '/_cluster/nodes/activate' && req.method === 'POST') {
if (!this.clusterCoordinator.canManageClusterState()) {
return this.sendError(
res,
409,
'This node is not the control plane',
'invalid_request_error',
);
}
const body = await this.parseBody(req) as { nodeName?: string } | null;
if (!body?.nodeName) {
return this.sendError(
res,
400,
'Missing nodeName in request body',
'invalid_request_error',
);
}
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(body.nodeName, 'active');
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
}
return this.sendError(res, 404, `Unknown cluster endpoint: ${path}`, 'invalid_request_error');
}
private authenticate(req: http.IncomingMessage): boolean {
const sharedSecret = this.clusterCoordinator.getSharedSecret();
if (!sharedSecret) {
return true;
}
return req.headers[CLUSTER.AUTH_HEADER_NAME] === sharedSecret;
}
private async parseBody(req: http.IncomingMessage): Promise<unknown | null> {
return new Promise((resolve) => {
let body = '';
req.on('data', (chunk) => {
body += chunk.toString();
});
req.on('end', () => {
if (!body) {
resolve(null);
return;
}
try {
resolve(JSON.parse(body));
} catch {
resolve(null);
}
});
req.on('error', () => resolve(null));
});
}
private sendJson(res: http.ServerResponse, statusCode: number, body: unknown): void {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(body));
}
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
): void {
const error: IApiError = {
error: {
message,
type,
},
};
this.sendJson(res, statusCode, error);
}
}