feat(cloudly): accept Spark node heartbeats
This commit is contained in:
@@ -2,6 +2,16 @@
|
||||
|
||||
## Pending
|
||||
|
||||
### Features
|
||||
|
||||
- accept Spark node heartbeats
|
||||
- Adds a Spark heartbeat HTTP endpoint for cluster nodes
|
||||
- Stores Spark metrics and runtime info on cluster node records
|
||||
- Extends jump onboarding with per-node Spark telemetry credentials
|
||||
|
||||
### Maintenance
|
||||
|
||||
- refresh release tooling dependencies
|
||||
|
||||
## 2026-05-24 - 5.8.2
|
||||
|
||||
|
||||
+3
-3
@@ -23,15 +23,15 @@
|
||||
"docs": "tsdoc aidoc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbuild": "^4.4.0",
|
||||
"@git.zone/tsbuild": "^4.4.1",
|
||||
"@git.zone/tsbundle": "^2.10.4",
|
||||
"@git.zone/tsdoc": "^2.0.5",
|
||||
"@git.zone/tsdocker": "^2.2.6",
|
||||
"@git.zone/tsdocker": "^2.3.0",
|
||||
"@git.zone/tspublish": "^1.11.7",
|
||||
"@git.zone/tstest": "^3.6.6",
|
||||
"@git.zone/tswatch": "^3.3.5",
|
||||
"@push.rocks/smartnetwork": "^4.7.1",
|
||||
"@types/node": "^25.6.2"
|
||||
"@types/node": "^25.8.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@api.global/typedrequest": "3.3.1",
|
||||
|
||||
Generated
+25
-13
@@ -151,8 +151,8 @@ importers:
|
||||
version: 9.5.1
|
||||
devDependencies:
|
||||
'@git.zone/tsbuild':
|
||||
specifier: ^4.4.0
|
||||
version: 4.4.0
|
||||
specifier: ^4.4.1
|
||||
version: 4.4.1
|
||||
'@git.zone/tsbundle':
|
||||
specifier: ^2.10.4
|
||||
version: 2.10.4
|
||||
@@ -160,8 +160,8 @@ importers:
|
||||
specifier: ^2.0.5
|
||||
version: 2.0.5(ws@8.20.0)(zod@4.4.3)
|
||||
'@git.zone/tsdocker':
|
||||
specifier: ^2.2.6
|
||||
version: 2.2.6
|
||||
specifier: ^2.3.0
|
||||
version: 2.3.0
|
||||
'@git.zone/tspublish':
|
||||
specifier: ^1.11.7
|
||||
version: 1.11.7
|
||||
@@ -175,8 +175,8 @@ importers:
|
||||
specifier: ^4.7.1
|
||||
version: 4.7.1
|
||||
'@types/node':
|
||||
specifier: ^25.6.2
|
||||
version: 25.6.2
|
||||
specifier: ^25.8.0
|
||||
version: 25.8.0
|
||||
|
||||
packages:
|
||||
|
||||
@@ -831,8 +831,8 @@ packages:
|
||||
'@gerrit0/mini-shiki@3.23.0':
|
||||
resolution: {integrity: sha512-bEMORlG0cqdjVyCEuU0cDQbORWX+kYCeo0kV1lbxF5bt4r7SID2l9bqsxJEM0zndaxpOUT7riCyIVEuqq/Ynxg==}
|
||||
|
||||
'@git.zone/tsbuild@4.4.0':
|
||||
resolution: {integrity: sha512-98igHfppi6blFYDyzNukNkj4FUO5ZlyXEaSyJh8vCkkZM8SyAgfZj+NUWA1D1iaPXE58UvK1Pt/o8p8iI9UHHw==}
|
||||
'@git.zone/tsbuild@4.4.1':
|
||||
resolution: {integrity: sha512-usxx8BBQsAypxjFOfd1GEV9pL9EUshRKktXtRWHMDByb6ps83+PdUIb3D7O+nkkBp4C9PXo3cfbsR4Asvo33CA==}
|
||||
hasBin: true
|
||||
|
||||
'@git.zone/tsbundle@2.10.1':
|
||||
@@ -847,8 +847,8 @@ packages:
|
||||
resolution: {integrity: sha512-s0Jbq9q1lvPppaIsLRr0VJR5lJn9bBzSr4POssXHKFJlVXRU5UeefR7sRERXNYz45FUCXLn+PLAB786PKEAKXg==}
|
||||
hasBin: true
|
||||
|
||||
'@git.zone/tsdocker@2.2.6':
|
||||
resolution: {integrity: sha512-vF0QT5od+t7UyWT8dQt6grybAiVx8EhpH6OZoMsleOrAyLMHEcfAKaPfTELXFnF5A+GPhAree+3KpRGyp5cGCg==}
|
||||
'@git.zone/tsdocker@2.3.0':
|
||||
resolution: {integrity: sha512-im2hD3Fu7vSb6qM+WMg2tbvLbFfEpX8qVmjy491R5iELky4Pw9cqRMkwzmxW92etn8v+f53ODUQDOoc9DufX2A==}
|
||||
hasBin: true
|
||||
|
||||
'@git.zone/tspublish@1.11.6':
|
||||
@@ -2509,6 +2509,9 @@ packages:
|
||||
'@types/node@25.6.2':
|
||||
resolution: {integrity: sha512-sokuT28dxf9JT5Kady1fsXOvI4HVpjZa95NKT5y9PNTIrs2AsobR4GFAA90ZG8M+nxVRLysCXsVj6eGC7Vbrlw==}
|
||||
|
||||
'@types/node@25.8.0':
|
||||
resolution: {integrity: sha512-TCFSk8IZh+iLX1xtksoBVtdmgL+1IX0fC9BeU4QqFSuNdN/K+HUlhqOzEmSYYpZUVsLYcPqc9KX+60iDuninSQ==}
|
||||
|
||||
'@types/randomatic@3.1.5':
|
||||
resolution: {integrity: sha512-VCwCTw6qh1pRRw+5rNTAwqPmf6A+hdrkdM7dBpZVmhl7g+em3ONXlYK/bWPVKqVGMWgP0d1bog8Vc/X6zRwRRQ==}
|
||||
|
||||
@@ -4589,6 +4592,9 @@ packages:
|
||||
undici-types@7.19.2:
|
||||
resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==}
|
||||
|
||||
undici-types@7.24.6:
|
||||
resolution: {integrity: sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==}
|
||||
|
||||
unified@11.0.5:
|
||||
resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==}
|
||||
|
||||
@@ -5729,9 +5735,9 @@ snapshots:
|
||||
'@shikijs/types': 3.23.0
|
||||
'@shikijs/vscode-textmate': 10.0.2
|
||||
|
||||
'@git.zone/tsbuild@4.4.0':
|
||||
'@git.zone/tsbuild@4.4.1':
|
||||
dependencies:
|
||||
'@git.zone/tspublish': 1.11.6
|
||||
'@git.zone/tspublish': 1.11.7
|
||||
'@push.rocks/early': 4.0.4
|
||||
'@push.rocks/smartcli': 4.0.21
|
||||
'@push.rocks/smartdelay': 3.1.0
|
||||
@@ -5841,7 +5847,7 @@ snapshots:
|
||||
- ws
|
||||
- zod
|
||||
|
||||
'@git.zone/tsdocker@2.2.6':
|
||||
'@git.zone/tsdocker@2.3.0':
|
||||
dependencies:
|
||||
'@push.rocks/lik': 6.4.1
|
||||
'@push.rocks/projectinfo': 5.1.0
|
||||
@@ -8398,6 +8404,10 @@ snapshots:
|
||||
dependencies:
|
||||
undici-types: 7.19.2
|
||||
|
||||
'@types/node@25.8.0':
|
||||
dependencies:
|
||||
undici-types: 7.24.6
|
||||
|
||||
'@types/randomatic@3.1.5': {}
|
||||
|
||||
'@types/relateurl@0.2.33': {}
|
||||
@@ -10883,6 +10893,8 @@ snapshots:
|
||||
|
||||
undici-types@7.19.2: {}
|
||||
|
||||
undici-types@7.24.6: {}
|
||||
|
||||
unified@11.0.5:
|
||||
dependencies:
|
||||
'@types/unist': 3.0.3
|
||||
|
||||
@@ -120,6 +120,11 @@ export class CloudlyServer {
|
||||
'POST',
|
||||
async (ctx) => this.cloudlyRef.baseOsManager.handleHeartbeatHttpRequest(ctx),
|
||||
);
|
||||
this.typedServer.addRoute(
|
||||
'/spark/v1/nodes/heartbeat',
|
||||
'POST',
|
||||
async (ctx) => this.cloudlyRef.nodeManager.handleSparkHeartbeatHttpRequest(ctx),
|
||||
);
|
||||
this.typedServer.addRoute(
|
||||
'/baseos/v1/images/:buildId/download',
|
||||
'GET',
|
||||
|
||||
@@ -15,6 +15,7 @@ interface IClaimJumpCodeResponse {
|
||||
accepted: boolean;
|
||||
message?: string;
|
||||
nodeId?: string;
|
||||
sparkNodeToken?: string;
|
||||
cloudlyUrl?: string;
|
||||
coreflowJumpCode?: string;
|
||||
}
|
||||
@@ -148,8 +149,10 @@ export class CloudlyJumpManager {
|
||||
|
||||
const nodeId = plugins.smartunique.shortId(8);
|
||||
const now = Date.now();
|
||||
const sparkNodeToken = await this.cloudlyRef.authManager.createNewSecureToken();
|
||||
const node = new this.cloudlyRef.nodeManager.CClusterNode();
|
||||
node.id = nodeId;
|
||||
node.sparkNodeTokenHash = this.hashSecret(sparkNodeToken);
|
||||
node.data = {
|
||||
clusterId: cluster.id,
|
||||
nodeType: jumpCodeDoc.data.nodeType,
|
||||
@@ -178,6 +181,7 @@ export class CloudlyJumpManager {
|
||||
return {
|
||||
accepted: true,
|
||||
nodeId: node.id,
|
||||
sparkNodeToken,
|
||||
cloudlyUrl: cluster.data.cloudlyUrl || `${this.getPublicCloudlyUrl()}/`,
|
||||
coreflowJumpCode,
|
||||
};
|
||||
@@ -292,8 +296,10 @@ CLAIM_RESPONSE="$(curl -fsSL -X POST "\${CLAIM_URL}" -H 'content-type: applicati
|
||||
export CLAIM_RESPONSE
|
||||
CLOUDLY_URL="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.accepted) { throw new Error(data.message || "Cloudly rejected jump code"); } process.stdout.write(data.cloudlyUrl);')"
|
||||
COREFLOW_JUMPCODE="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.coreflowJumpCode) { throw new Error("Cloudly did not return a Coreflow jump code"); } process.stdout.write(data.coreflowJumpCode);')"
|
||||
SPARK_NODE_ID="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.nodeId) { throw new Error("Cloudly did not return a Spark node id"); } process.stdout.write(data.nodeId);')"
|
||||
SPARK_NODE_TOKEN="$(node -e 'const data = JSON.parse(process.env.CLAIM_RESPONSE); if (!data.sparkNodeToken) { throw new Error("Cloudly did not return a Spark node token"); } process.stdout.write(data.sparkNodeToken);')"
|
||||
|
||||
spark installdaemon --mode=coreflow-node --cloudlyUrl="\${CLOUDLY_URL}" --jumpcode="\${COREFLOW_JUMPCODE}"
|
||||
spark installdaemon --mode=coreflow-node --cloudlyUrl="\${CLOUDLY_URL}" --jumpcode="\${COREFLOW_JUMPCODE}" --nodeId="\${SPARK_NODE_ID}" --nodeToken="\${SPARK_NODE_TOKEN}"
|
||||
|
||||
echo "Cloudly jump completed. This system is now connected."
|
||||
`;
|
||||
|
||||
@@ -39,6 +39,9 @@ export class ClusterNode extends plugins.smartdata.SmartDataDbDoc<
|
||||
@plugins.smartdata.svDb()
|
||||
public data!: plugins.servezoneInterfaces.data.IClusterNode['data'];
|
||||
|
||||
@plugins.smartdata.svDb()
|
||||
public sparkNodeTokenHash?: string;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
@@ -54,6 +57,22 @@ export class ClusterNode extends plugins.smartdata.SmartDataDbDoc<
|
||||
await this.save();
|
||||
}
|
||||
|
||||
public async updateSparkHeartbeat(
|
||||
metricsArg: plugins.servezoneInterfaces.data.IClusterNodeMetrics,
|
||||
runtimeInfoArg: Record<string, unknown>,
|
||||
) {
|
||||
this.data.metrics = metricsArg;
|
||||
(this.data as plugins.servezoneInterfaces.data.IClusterNode['data'] & {
|
||||
sparkRuntimeInfo?: Record<string, unknown>;
|
||||
}).sparkRuntimeInfo = runtimeInfoArg;
|
||||
this.data.status = 'online';
|
||||
this.data.lastHealthCheck = Date.now();
|
||||
if (typeof runtimeInfoArg.swarmNodeId === 'string' && runtimeInfoArg.swarmNodeId) {
|
||||
this.data.swarmNodeId = runtimeInfoArg.swarmNodeId;
|
||||
}
|
||||
await this.save();
|
||||
}
|
||||
|
||||
public async updateStatus(status: plugins.servezoneInterfaces.data.IClusterNode['data']['status']) {
|
||||
this.data.status = status;
|
||||
await this.save();
|
||||
|
||||
@@ -4,6 +4,18 @@ import { Cluster } from '../manager.cluster/classes.cluster.js';
|
||||
import { ClusterNode } from './classes.clusternode.js';
|
||||
import { CurlFresh } from './classes.curlfresh.js';
|
||||
|
||||
interface ISparkHeartbeatRequest {
|
||||
nodeId?: string;
|
||||
nodeToken?: string;
|
||||
metrics?: plugins.servezoneInterfaces.data.IClusterNodeMetrics;
|
||||
runtimeInfo?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface ISparkHeartbeatResponse {
|
||||
accepted: boolean;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export class CloudlyNodeManager {
|
||||
public cloudlyRef: Cloudly;
|
||||
public typedRouter = new plugins.typedrequest.TypedRouter();
|
||||
@@ -51,6 +63,69 @@ export class CloudlyNodeManager {
|
||||
|
||||
public async stop() {}
|
||||
|
||||
public async handleSparkHeartbeatHttpRequest(
|
||||
ctxArg: plugins.typedserver.IRequestContext,
|
||||
): Promise<Response> {
|
||||
try {
|
||||
const requestData = await this.readJsonBody<ISparkHeartbeatRequest>(ctxArg);
|
||||
const response = await this.acceptSparkHeartbeat(requestData);
|
||||
return this.createJsonResponse(200, response);
|
||||
} catch (error) {
|
||||
return this.createJsonResponse(400, {
|
||||
accepted: false,
|
||||
message: `Spark heartbeat failed: ${(error as Error).message}`,
|
||||
} satisfies ISparkHeartbeatResponse);
|
||||
}
|
||||
}
|
||||
|
||||
public async acceptSparkHeartbeat(
|
||||
requestDataArg: ISparkHeartbeatRequest,
|
||||
): Promise<ISparkHeartbeatResponse> {
|
||||
if (!requestDataArg.nodeId) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark node id is missing',
|
||||
};
|
||||
}
|
||||
if (!requestDataArg.nodeToken) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark node token is missing',
|
||||
};
|
||||
}
|
||||
if (!this.isSparkMetrics(requestDataArg.metrics)) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark metrics are missing or invalid',
|
||||
};
|
||||
}
|
||||
if (!this.isSparkRuntimeInfo(requestDataArg.runtimeInfo, requestDataArg.nodeId)) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark runtime info is missing or invalid',
|
||||
};
|
||||
}
|
||||
|
||||
const node = await this.CClusterNode.getInstance({ id: requestDataArg.nodeId });
|
||||
if (!node) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark node is unknown',
|
||||
};
|
||||
}
|
||||
if (node.sparkNodeTokenHash !== this.hashSecret(requestDataArg.nodeToken)) {
|
||||
return {
|
||||
accepted: false,
|
||||
message: 'Spark node token is invalid',
|
||||
};
|
||||
}
|
||||
|
||||
await node.updateSparkHeartbeat(requestDataArg.metrics, requestDataArg.runtimeInfo);
|
||||
return {
|
||||
accepted: true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* creates the node infrastructure on hetzner
|
||||
* ensures that there are exactly the resources that are needed
|
||||
@@ -133,4 +208,49 @@ export class CloudlyNodeManager {
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
private isSparkMetrics(valueArg: unknown): valueArg is plugins.servezoneInterfaces.data.IClusterNodeMetrics {
|
||||
if (!valueArg || typeof valueArg !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const metrics = valueArg as Partial<plugins.servezoneInterfaces.data.IClusterNodeMetrics>;
|
||||
return typeof metrics.cpuUsagePercent === 'number'
|
||||
&& typeof metrics.memoryUsedMB === 'number'
|
||||
&& typeof metrics.memoryAvailableMB === 'number'
|
||||
&& typeof metrics.diskUsedGB === 'number'
|
||||
&& typeof metrics.diskAvailableGB === 'number'
|
||||
&& typeof metrics.containerCount === 'number'
|
||||
&& typeof metrics.timestamp === 'number';
|
||||
}
|
||||
|
||||
private isSparkRuntimeInfo(valueArg: unknown, nodeIdArg: string): valueArg is Record<string, unknown> {
|
||||
if (!valueArg || typeof valueArg !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const runtimeInfo = valueArg as Record<string, unknown>;
|
||||
return runtimeInfo.runtime === 'spark'
|
||||
&& runtimeInfo.nodeId === nodeIdArg
|
||||
&& typeof runtimeInfo.checkedAt === 'number';
|
||||
}
|
||||
|
||||
private hashSecret(secretArg: string) {
|
||||
return plugins.crypto.createHash('sha256').update(secretArg).digest('hex');
|
||||
}
|
||||
|
||||
private async readJsonBody<T>(ctxArg: plugins.typedserver.IRequestContext): Promise<T> {
|
||||
const bodyString = (await ctxArg.text()).trim();
|
||||
return bodyString ? JSON.parse(bodyString) as T : {} as T;
|
||||
}
|
||||
|
||||
private createJsonResponse(
|
||||
statusCodeArg: number,
|
||||
bodyArg: object,
|
||||
): Response {
|
||||
return new Response(JSON.stringify(bodyArg), {
|
||||
status: statusCodeArg,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user