feat(cloudly): accept Spark node heartbeats

This commit is contained in:
2026-05-24 12:47:15 +00:00
parent ebb4f36c67
commit 6565c44c29
7 changed files with 189 additions and 17 deletions
+10
View File
@@ -2,6 +2,16 @@
## Pending
### Features
- accept Spark node heartbeats
- Adds a Spark heartbeat HTTP endpoint for cluster nodes
- Stores Spark metrics and runtime info on cluster node records
- Extends jump onboarding with per-node Spark telemetry credentials
### Maintenance
- refresh release tooling dependencies
## 2026-05-24 - 5.8.2
+3 -3
View File
@@ -23,15 +23,15 @@
"docs": "tsdoc aidoc"
},
"devDependencies": {
"@git.zone/tsbuild": "^4.4.0",
"@git.zone/tsbuild": "^4.4.1",
"@git.zone/tsbundle": "^2.10.4",
"@git.zone/tsdoc": "^2.0.5",
"@git.zone/tsdocker": "^2.2.6",
"@git.zone/tsdocker": "^2.3.0",
"@git.zone/tspublish": "^1.11.7",
"@git.zone/tstest": "^3.6.6",
"@git.zone/tswatch": "^3.3.5",
"@push.rocks/smartnetwork": "^4.7.1",
"@types/node": "^25.6.2"
"@types/node": "^25.8.0"
},
"dependencies": {
"@api.global/typedrequest": "3.3.1",
+25 -13
View File
@@ -151,8 +151,8 @@ importers:
version: 9.5.1
devDependencies:
'@git.zone/tsbuild':
specifier: ^4.4.0
version: 4.4.0
specifier: ^4.4.1
version: 4.4.1
'@git.zone/tsbundle':
specifier: ^2.10.4
version: 2.10.4
@@ -160,8 +160,8 @@ importers:
specifier: ^2.0.5
version: 2.0.5(ws@8.20.0)(zod@4.4.3)
'@git.zone/tsdocker':
specifier: ^2.2.6
version: 2.2.6
specifier: ^2.3.0
version: 2.3.0
'@git.zone/tspublish':
specifier: ^1.11.7
version: 1.11.7
@@ -175,8 +175,8 @@ importers:
specifier: ^4.7.1
version: 4.7.1
'@types/node':
specifier: ^25.6.2
version: 25.6.2
specifier: ^25.8.0
version: 25.8.0
packages:
@@ -831,8 +831,8 @@ packages:
'@gerrit0/mini-shiki@3.23.0':
resolution: {integrity: sha512-bEMORlG0cqdjVyCEuU0cDQbORWX+kYCeo0kV1lbxF5bt4r7SID2l9bqsxJEM0zndaxpOUT7riCyIVEuqq/Ynxg==}
'@git.zone/tsbuild@4.4.0':
resolution: {integrity: sha512-98igHfppi6blFYDyzNukNkj4FUO5ZlyXEaSyJh8vCkkZM8SyAgfZj+NUWA1D1iaPXE58UvK1Pt/o8p8iI9UHHw==}
'@git.zone/tsbuild@4.4.1':
resolution: {integrity: sha512-usxx8BBQsAypxjFOfd1GEV9pL9EUshRKktXtRWHMDByb6ps83+PdUIb3D7O+nkkBp4C9PXo3cfbsR4Asvo33CA==}
hasBin: true
'@git.zone/tsbundle@2.10.1':
@@ -847,8 +847,8 @@ packages:
resolution: {integrity: sha512-s0Jbq9q1lvPppaIsLRr0VJR5lJn9bBzSr4POssXHKFJlVXRU5UeefR7sRERXNYz45FUCXLn+PLAB786PKEAKXg==}
hasBin: true
'@git.zone/tsdocker@2.2.6':
resolution: {integrity: sha512-vF0QT5od+t7UyWT8dQt6grybAiVx8EhpH6OZoMsleOrAyLMHEcfAKaPfTELXFnF5A+GPhAree+3KpRGyp5cGCg==}
'@git.zone/tsdocker@2.3.0':
resolution: {integrity: sha512-im2hD3Fu7vSb6qM+WMg2tbvLbFfEpX8qVmjy491R5iELky4Pw9cqRMkwzmxW92etn8v+f53ODUQDOoc9DufX2A==}
hasBin: true
'@git.zone/tspublish@1.11.6':
@@ -2509,6 +2509,9 @@ packages:
'@types/node@25.6.2':
resolution: {integrity: sha512-sokuT28dxf9JT5Kady1fsXOvI4HVpjZa95NKT5y9PNTIrs2AsobR4GFAA90ZG8M+nxVRLysCXsVj6eGC7Vbrlw==}
'@types/node@25.8.0':
resolution: {integrity: sha512-TCFSk8IZh+iLX1xtksoBVtdmgL+1IX0fC9BeU4QqFSuNdN/K+HUlhqOzEmSYYpZUVsLYcPqc9KX+60iDuninSQ==}
'@types/randomatic@3.1.5':
resolution: {integrity: sha512-VCwCTw6qh1pRRw+5rNTAwqPmf6A+hdrkdM7dBpZVmhl7g+em3ONXlYK/bWPVKqVGMWgP0d1bog8Vc/X6zRwRRQ==}
@@ -4589,6 +4592,9 @@ packages:
undici-types@7.19.2:
resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==}
undici-types@7.24.6:
resolution: {integrity: sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==}
unified@11.0.5:
resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==}
@@ -5729,9 +5735,9 @@ snapshots:
'@shikijs/types': 3.23.0
'@shikijs/vscode-textmate': 10.0.2
'@git.zone/tsbuild@4.4.0':
'@git.zone/tsbuild@4.4.1':
dependencies:
'@git.zone/tspublish': 1.11.6
'@git.zone/tspublish': 1.11.7
'@push.rocks/early': 4.0.4
'@push.rocks/smartcli': 4.0.21
'@push.rocks/smartdelay': 3.1.0
@@ -5841,7 +5847,7 @@ snapshots:
- ws
- zod
'@git.zone/tsdocker@2.2.6':
'@git.zone/tsdocker@2.3.0':
dependencies:
'@push.rocks/lik': 6.4.1
'@push.rocks/projectinfo': 5.1.0
@@ -8398,6 +8404,10 @@ snapshots:
dependencies:
undici-types: 7.19.2
'@types/node@25.8.0':
dependencies:
undici-types: 7.24.6
'@types/randomatic@3.1.5': {}
'@types/relateurl@0.2.33': {}
@@ -10883,6 +10893,8 @@ snapshots:
undici-types@7.19.2: {}
undici-types@7.24.6: {}
unified@11.0.5:
dependencies:
'@types/unist': 3.0.3
+5
View File
@@ -120,6 +120,11 @@ export class CloudlyServer {
'POST',
async (ctx) => this.cloudlyRef.baseOsManager.handleHeartbeatHttpRequest(ctx),
);
this.typedServer.addRoute(
'/spark/v1/nodes/heartbeat',
'POST',
async (ctx) => this.cloudlyRef.nodeManager.handleSparkHeartbeatHttpRequest(ctx),
);
this.typedServer.addRoute(
'/baseos/v1/images/:buildId/download',
'GET',
+7 -1
View File
@@ -15,6 +15,7 @@ interface IClaimJumpCodeResponse {
accepted: boolean;
message?: string;
nodeId?: string;
sparkNodeToken?: string;
cloudlyUrl?: string;
coreflowJumpCode?: string;
}
@@ -148,8 +149,10 @@ export class CloudlyJumpManager {
const nodeId = plugins.smartunique.shortId(8);
const now = Date.now();
const sparkNodeToken = await this.cloudlyRef.authManager.createNewSecureToken();
const node = new this.cloudlyRef.nodeManager.CClusterNode();
node.id = nodeId;
node.sparkNodeTokenHash = this.hashSecret(sparkNodeToken);
node.data = {
clusterId: cluster.id,
nodeType: jumpCodeDoc.data.nodeType,
@@ -178,6 +181,7 @@ export class CloudlyJumpManager {
return {
accepted: true,
nodeId: node.id,
sparkNodeToken,
cloudlyUrl: cluster.data.cloudlyUrl || `${this.getPublicCloudlyUrl()}/`,
coreflowJumpCode,
};
@@ -292,8 +296,10 @@ CLAIM_RESPONSE="$(curl -fsSL -X POST "\${CLAIM_URL}" -H 'content-type: applicati
export CLAIM_RESPONSE
CLOUDLY_URL="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.accepted) { throw new Error(data.message || "Cloudly rejected jump code"); } process.stdout.write(data.cloudlyUrl);')"
COREFLOW_JUMPCODE="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.coreflowJumpCode) { throw new Error("Cloudly did not return a Coreflow jump code"); } process.stdout.write(data.coreflowJumpCode);')"
SPARK_NODE_ID="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.nodeId) { throw new Error("Cloudly did not return a Spark node id"); } process.stdout.write(data.nodeId);')"
SPARK_NODE_TOKEN="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.sparkNodeToken) { throw new Error("Cloudly did not return a Spark node token"); } process.stdout.write(data.sparkNodeToken);')"
spark installdaemon --mode=coreflow-node --cloudlyUrl="\${CLOUDLY_URL}" --jumpcode="\${COREFLOW_JUMPCODE}"
spark installdaemon --mode=coreflow-node --cloudlyUrl="\${CLOUDLY_URL}" --jumpcode="\${COREFLOW_JUMPCODE}" --nodeId="\${SPARK_NODE_ID}" --nodeToken="\${SPARK_NODE_TOKEN}"
echo "Cloudly jump completed. This system is now connected."
`;
+19
View File
@@ -39,6 +39,9 @@ export class ClusterNode extends plugins.smartdata.SmartDataDbDoc<
@plugins.smartdata.svDb()
public data!: plugins.servezoneInterfaces.data.IClusterNode['data'];
@plugins.smartdata.svDb()
public sparkNodeTokenHash?: string;
constructor() {
super();
}
@@ -54,6 +57,22 @@ export class ClusterNode extends plugins.smartdata.SmartDataDbDoc<
await this.save();
}
public async updateSparkHeartbeat(
metricsArg: plugins.servezoneInterfaces.data.IClusterNodeMetrics,
runtimeInfoArg: Record<string, unknown>,
) {
this.data.metrics = metricsArg;
(this.data as plugins.servezoneInterfaces.data.IClusterNode['data'] & {
sparkRuntimeInfo?: Record<string, unknown>;
}).sparkRuntimeInfo = runtimeInfoArg;
this.data.status = 'online';
this.data.lastHealthCheck = Date.now();
if (typeof runtimeInfoArg.swarmNodeId === 'string' && runtimeInfoArg.swarmNodeId) {
this.data.swarmNodeId = runtimeInfoArg.swarmNodeId;
}
await this.save();
}
public async updateStatus(status: plugins.servezoneInterfaces.data.IClusterNode['data']['status']) {
this.data.status = status;
await this.save();
+120
View File
@@ -4,6 +4,18 @@ import { Cluster } from '../manager.cluster/classes.cluster.js';
import { ClusterNode } from './classes.clusternode.js';
import { CurlFresh } from './classes.curlfresh.js';
interface ISparkHeartbeatRequest {
nodeId?: string;
nodeToken?: string;
metrics?: plugins.servezoneInterfaces.data.IClusterNodeMetrics;
runtimeInfo?: Record<string, unknown>;
}
interface ISparkHeartbeatResponse {
accepted: boolean;
message?: string;
}
export class CloudlyNodeManager {
public cloudlyRef: Cloudly;
public typedRouter = new plugins.typedrequest.TypedRouter();
@@ -51,6 +63,69 @@ export class CloudlyNodeManager {
public async stop() {}
public async handleSparkHeartbeatHttpRequest(
ctxArg: plugins.typedserver.IRequestContext,
): Promise<Response> {
try {
const requestData = await this.readJsonBody<ISparkHeartbeatRequest>(ctxArg);
const response = await this.acceptSparkHeartbeat(requestData);
return this.createJsonResponse(200, response);
} catch (error) {
return this.createJsonResponse(400, {
accepted: false,
message: `Spark heartbeat failed: ${(error as Error).message}`,
} satisfies ISparkHeartbeatResponse);
}
}
public async acceptSparkHeartbeat(
requestDataArg: ISparkHeartbeatRequest,
): Promise<ISparkHeartbeatResponse> {
if (!requestDataArg.nodeId) {
return {
accepted: false,
message: 'Spark node id is missing',
};
}
if (!requestDataArg.nodeToken) {
return {
accepted: false,
message: 'Spark node token is missing',
};
}
if (!this.isSparkMetrics(requestDataArg.metrics)) {
return {
accepted: false,
message: 'Spark metrics are missing or invalid',
};
}
if (!this.isSparkRuntimeInfo(requestDataArg.runtimeInfo, requestDataArg.nodeId)) {
return {
accepted: false,
message: 'Spark runtime info is missing or invalid',
};
}
const node = await this.CClusterNode.getInstance({ id: requestDataArg.nodeId });
if (!node) {
return {
accepted: false,
message: 'Spark node is unknown',
};
}
if (node.sparkNodeTokenHash !== this.hashSecret(requestDataArg.nodeToken)) {
return {
accepted: false,
message: 'Spark node token is invalid',
};
}
await node.updateSparkHeartbeat(requestDataArg.metrics, requestDataArg.runtimeInfo);
return {
accepted: true,
};
}
/**
* creates the node infrastructure on hetzner
* ensures that there are exactly the resources that are needed
@@ -133,4 +208,49 @@ export class CloudlyNodeManager {
});
return results;
}
private isSparkMetrics(valueArg: unknown): valueArg is plugins.servezoneInterfaces.data.IClusterNodeMetrics {
if (!valueArg || typeof valueArg !== 'object') {
return false;
}
const metrics = valueArg as Partial<plugins.servezoneInterfaces.data.IClusterNodeMetrics>;
return typeof metrics.cpuUsagePercent === 'number'
&& typeof metrics.memoryUsedMB === 'number'
&& typeof metrics.memoryAvailableMB === 'number'
&& typeof metrics.diskUsedGB === 'number'
&& typeof metrics.diskAvailableGB === 'number'
&& typeof metrics.containerCount === 'number'
&& typeof metrics.timestamp === 'number';
}
private isSparkRuntimeInfo(valueArg: unknown, nodeIdArg: string): valueArg is Record<string, unknown> {
if (!valueArg || typeof valueArg !== 'object') {
return false;
}
const runtimeInfo = valueArg as Record<string, unknown>;
return runtimeInfo.runtime === 'spark'
&& runtimeInfo.nodeId === nodeIdArg
&& typeof runtimeInfo.checkedAt === 'number';
}
private hashSecret(secretArg: string) {
return plugins.crypto.createHash('sha256').update(secretArg).digest('hex');
}
private async readJsonBody<T>(ctxArg: plugins.typedserver.IRequestContext): Promise<T> {
const bodyString = (await ctxArg.text()).trim();
return bodyString ? JSON.parse(bodyString) as T : {} as T;
}
private createJsonResponse(
statusCodeArg: number,
bodyArg: object,
): Response {
return new Response(JSON.stringify(bodyArg), {
status: statusCodeArg,
headers: {
'Content-Type': 'application/json',
},
});
}
}