feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -0,0 +1,316 @@
|
||||
import * as http from 'node:http';
|
||||
import type { IApiError } from '../../interfaces/api.ts';
|
||||
import type { IClusterNodeHeartbeat } from '../../interfaces/cluster.ts';
|
||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||
import { CLUSTER } from '../../constants.ts';
|
||||
|
||||
export class ClusterHandler {
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
|
||||
constructor(clusterCoordinator: ClusterCoordinator) {
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
}
|
||||
|
||||
public async handle(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
path: string,
|
||||
url: URL,
|
||||
): Promise<void> {
|
||||
if (!this.authenticate(req)) {
|
||||
return this.sendError(res, 401, 'Invalid cluster secret', 'authentication_error');
|
||||
}
|
||||
|
||||
if (path === '/_cluster/status' && req.method === 'GET') {
|
||||
return this.sendJson(res, 200, this.clusterCoordinator.getStatus());
|
||||
}
|
||||
|
||||
if (path === '/_cluster/nodes' && req.method === 'GET') {
|
||||
return this.sendJson(res, 200, this.clusterCoordinator.getStatus().nodes);
|
||||
}
|
||||
|
||||
if (path === '/_cluster/desired' && req.method === 'GET') {
|
||||
return this.sendJson(res, 200, this.clusterCoordinator.getDesiredDeployments());
|
||||
}
|
||||
|
||||
if (
|
||||
(path === '/_cluster/nodes/register' || path === '/_cluster/nodes/heartbeat') &&
|
||||
req.method === 'POST'
|
||||
) {
|
||||
const body = await this.parseBody(req) as IClusterNodeHeartbeat | null;
|
||||
if (!body) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Invalid cluster heartbeat payload',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
this.clusterCoordinator.acceptHeartbeat(body);
|
||||
return this.sendJson(res, 200, { ok: true });
|
||||
}
|
||||
|
||||
if (path === '/_cluster/models/resolve' && req.method === 'GET') {
|
||||
const model = url.searchParams.get('model');
|
||||
if (!model) {
|
||||
return this.sendError(res, 400, 'Missing model query parameter', 'invalid_request_error');
|
||||
}
|
||||
|
||||
const resolved = await this.clusterCoordinator.resolveModel(model);
|
||||
if (!resolved) {
|
||||
return this.sendError(res, 404, `Model "${model}" not found in cluster`, 'model_not_found');
|
||||
}
|
||||
|
||||
return this.sendJson(res, 200, resolved);
|
||||
}
|
||||
|
||||
if (path === '/_cluster/models/ensure' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { model?: string } | null;
|
||||
if (!body?.model) {
|
||||
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
|
||||
}
|
||||
|
||||
const ensured = await this.clusterCoordinator.ensureModel(body.model);
|
||||
if (!ensured) {
|
||||
return this.sendError(res, 503, `Unable to schedule model "${body.model}"`, 'server_error');
|
||||
}
|
||||
|
||||
return this.sendJson(res, 200, ensured);
|
||||
}
|
||||
|
||||
if (path === '/_cluster/models/desired' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { model?: string; desiredReplicas?: number } | null;
|
||||
if (!body?.model || body.desiredReplicas === undefined) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Missing model or desiredReplicas in request body',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const desiredDeployment = await this.clusterCoordinator.setDesiredReplicas(
|
||||
body.model,
|
||||
body.desiredReplicas,
|
||||
);
|
||||
if (!desiredDeployment) {
|
||||
return this.sendError(res, 404, `Model "${body.model}" not found`, 'model_not_found');
|
||||
}
|
||||
|
||||
return this.sendJson(res, 200, desiredDeployment);
|
||||
}
|
||||
|
||||
if (path === '/_cluster/models/desired/remove' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { model?: string } | null;
|
||||
if (!body?.model) {
|
||||
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
|
||||
}
|
||||
|
||||
const removed = await this.clusterCoordinator.clearDesiredDeployment(body.model);
|
||||
return this.sendJson(res, 200, { removed });
|
||||
}
|
||||
|
||||
if (path === '/_cluster/deployments' && req.method === 'POST') {
|
||||
const body = await this.parseBody(req) as { model?: string; replicaOrdinal?: number } | null;
|
||||
if (!body?.model) {
|
||||
return this.sendError(res, 400, 'Missing model in request body', 'invalid_request_error');
|
||||
}
|
||||
|
||||
const deployed = body.replicaOrdinal !== undefined
|
||||
? await this.clusterCoordinator.deployReplicaLocally(body.model, body.replicaOrdinal)
|
||||
: await this.clusterCoordinator.deployModelLocally(body.model);
|
||||
if (!deployed) {
|
||||
return this.sendError(res, 503, `Unable to deploy model "${body.model}"`, 'server_error');
|
||||
}
|
||||
|
||||
return this.sendJson(res, 200, deployed);
|
||||
}
|
||||
|
||||
if (path === '/_cluster/nodes/cordon' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { nodeName?: string } | null;
|
||||
if (!body?.nodeName) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Missing nodeName in request body',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(
|
||||
body.nodeName,
|
||||
'cordoned',
|
||||
);
|
||||
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
|
||||
}
|
||||
|
||||
if (path === '/_cluster/nodes/uncordon' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { nodeName?: string } | null;
|
||||
if (!body?.nodeName) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Missing nodeName in request body',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(body.nodeName, 'active');
|
||||
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
|
||||
}
|
||||
|
||||
if (path === '/_cluster/nodes/drain' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { nodeName?: string } | null;
|
||||
if (!body?.nodeName) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Missing nodeName in request body',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(
|
||||
body.nodeName,
|
||||
'draining',
|
||||
);
|
||||
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
|
||||
}
|
||||
|
||||
if (path === '/_cluster/nodes/activate' && req.method === 'POST') {
|
||||
if (!this.clusterCoordinator.canManageClusterState()) {
|
||||
return this.sendError(
|
||||
res,
|
||||
409,
|
||||
'This node is not the control plane',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const body = await this.parseBody(req) as { nodeName?: string } | null;
|
||||
if (!body?.nodeName) {
|
||||
return this.sendError(
|
||||
res,
|
||||
400,
|
||||
'Missing nodeName in request body',
|
||||
'invalid_request_error',
|
||||
);
|
||||
}
|
||||
|
||||
const schedulerState = this.clusterCoordinator.setNodeSchedulerState(body.nodeName, 'active');
|
||||
return this.sendJson(res, 200, { nodeName: body.nodeName, schedulerState });
|
||||
}
|
||||
|
||||
return this.sendError(res, 404, `Unknown cluster endpoint: ${path}`, 'invalid_request_error');
|
||||
}
|
||||
|
||||
private authenticate(req: http.IncomingMessage): boolean {
|
||||
const sharedSecret = this.clusterCoordinator.getSharedSecret();
|
||||
if (!sharedSecret) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return req.headers[CLUSTER.AUTH_HEADER_NAME] === sharedSecret;
|
||||
}
|
||||
|
||||
private async parseBody(req: http.IncomingMessage): Promise<unknown | null> {
|
||||
return new Promise((resolve) => {
|
||||
let body = '';
|
||||
|
||||
req.on('data', (chunk) => {
|
||||
body += chunk.toString();
|
||||
});
|
||||
|
||||
req.on('end', () => {
|
||||
if (!body) {
|
||||
resolve(null);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
resolve(JSON.parse(body));
|
||||
} catch {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => resolve(null));
|
||||
});
|
||||
}
|
||||
|
||||
private sendJson(res: http.ServerResponse, statusCode: number, body: unknown): void {
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(body));
|
||||
}
|
||||
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
},
|
||||
};
|
||||
|
||||
this.sendJson(res, statusCode, error);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user