feat(remoteingress): add heartbeat PING/PONG and liveness timeouts; implement fast-reconnect/backoff reset and JS crash-recovery auto-restart
This commit is contained in:
@@ -40,9 +40,16 @@ export interface IEdgeConfig {
|
||||
secret: string;
|
||||
}
|
||||
|
||||
const MAX_RESTART_ATTEMPTS = 10;
|
||||
const MAX_RESTART_BACKOFF_MS = 30_000;
|
||||
|
||||
export class RemoteIngressEdge extends EventEmitter {
|
||||
private bridge: InstanceType<typeof plugins.smartrust.RustBridge<TEdgeCommands>>;
|
||||
private started = false;
|
||||
private stopping = false;
|
||||
private savedConfig: IEdgeConfig | null = null;
|
||||
private restartBackoffMs = 1000;
|
||||
private restartAttempts = 0;
|
||||
private statusInterval: ReturnType<typeof setInterval> | undefined;
|
||||
|
||||
constructor() {
|
||||
@@ -109,11 +116,17 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
edgeConfig = config;
|
||||
}
|
||||
|
||||
this.savedConfig = edgeConfig;
|
||||
this.stopping = false;
|
||||
|
||||
const spawned = await this.bridge.spawn();
|
||||
if (!spawned) {
|
||||
throw new Error('Failed to spawn remoteingress-bin');
|
||||
}
|
||||
|
||||
// Register crash recovery handler
|
||||
this.bridge.on('exit', this.handleCrashRecovery);
|
||||
|
||||
await this.bridge.sendCommand('startEdge', {
|
||||
hubHost: edgeConfig.hubHost,
|
||||
hubPort: edgeConfig.hubPort ?? 8443,
|
||||
@@ -122,6 +135,8 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
});
|
||||
|
||||
this.started = true;
|
||||
this.restartAttempts = 0;
|
||||
this.restartBackoffMs = 1000;
|
||||
|
||||
// Start periodic status logging
|
||||
this.statusInterval = setInterval(async () => {
|
||||
@@ -142,6 +157,7 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
* Stop the edge and kill the Rust process.
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
this.stopping = true;
|
||||
if (this.statusInterval) {
|
||||
clearInterval(this.statusInterval);
|
||||
this.statusInterval = undefined;
|
||||
@@ -152,6 +168,7 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
} catch {
|
||||
// Process may already be dead
|
||||
}
|
||||
this.bridge.removeListener('exit', this.handleCrashRecovery);
|
||||
this.bridge.kill();
|
||||
this.started = false;
|
||||
}
|
||||
@@ -170,4 +187,55 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
public get running(): boolean {
|
||||
return this.bridge.running;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle unexpected Rust binary crash — auto-restart with backoff.
|
||||
*/
|
||||
private handleCrashRecovery = async (code: number | null, signal: string | null) => {
|
||||
if (this.stopping || !this.started || !this.savedConfig) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.error(
|
||||
`[RemoteIngressEdge] Rust binary crashed (code=${code}, signal=${signal}), ` +
|
||||
`attempt ${this.restartAttempts + 1}/${MAX_RESTART_ATTEMPTS}`
|
||||
);
|
||||
|
||||
this.started = false;
|
||||
|
||||
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
|
||||
console.error('[RemoteIngressEdge] Max restart attempts reached, giving up');
|
||||
this.emit('crashRecoveryFailed');
|
||||
return;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, this.restartBackoffMs));
|
||||
this.restartBackoffMs = Math.min(this.restartBackoffMs * 2, MAX_RESTART_BACKOFF_MS);
|
||||
this.restartAttempts++;
|
||||
|
||||
try {
|
||||
const spawned = await this.bridge.spawn();
|
||||
if (!spawned) {
|
||||
console.error('[RemoteIngressEdge] Failed to respawn binary');
|
||||
return;
|
||||
}
|
||||
|
||||
this.bridge.on('exit', this.handleCrashRecovery);
|
||||
|
||||
await this.bridge.sendCommand('startEdge', {
|
||||
hubHost: this.savedConfig.hubHost,
|
||||
hubPort: this.savedConfig.hubPort ?? 8443,
|
||||
edgeId: this.savedConfig.edgeId,
|
||||
secret: this.savedConfig.secret,
|
||||
});
|
||||
|
||||
this.started = true;
|
||||
this.restartAttempts = 0;
|
||||
this.restartBackoffMs = 1000;
|
||||
console.log('[RemoteIngressEdge] Successfully recovered from crash');
|
||||
this.emit('crashRecovered');
|
||||
} catch (err) {
|
||||
console.error(`[RemoteIngressEdge] Crash recovery failed: ${err}`);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user