529 lines
16 KiB
TypeScript
529 lines
16 KiB
TypeScript
import * as plugins from './taskbuffer.plugins.js';
|
|
import { logger } from './taskbuffer.logging.js';
|
|
import type {
|
|
TServiceState,
|
|
TServiceCriticality,
|
|
IServiceEvent,
|
|
IServiceStatus,
|
|
IRetryConfig,
|
|
IHealthCheckConfig,
|
|
IServiceOptions,
|
|
} from './taskbuffer.interfaces.js';
|
|
|
|
/**
|
|
* Service represents a long-running component with start/stop lifecycle,
|
|
* health checking, and retry capabilities.
|
|
*
|
|
* Use via builder pattern:
|
|
* new Service('MyService')
|
|
* .critical()
|
|
* .dependsOn('Database')
|
|
* .withStart(async () => { ... })
|
|
* .withStop(async (instance) => { ... })
|
|
* .withHealthCheck(async (instance) => { ... })
|
|
*
|
|
* Or extend for complex services:
|
|
* class MyService extends Service {
|
|
* protected async serviceStart() { ... }
|
|
* protected async serviceStop() { ... }
|
|
* }
|
|
*/
|
|
export class Service<T = any> {
|
|
public readonly name: string;
|
|
public readonly eventSubject = new plugins.smartrx.rxjs.Subject<IServiceEvent>();
|
|
|
|
// ── Internal state ─────────────────────────────────
|
|
private _state: TServiceState = 'stopped';
|
|
private _criticality: TServiceCriticality = 'optional';
|
|
private _dependencies: string[] = [];
|
|
private _retryConfig: IRetryConfig | undefined;
|
|
private _healthCheckConfig: IHealthCheckConfig | undefined;
|
|
private _startupTimeoutMs: number | undefined;
|
|
|
|
// Builder-provided functions
|
|
private _startFn: (() => Promise<T>) | undefined;
|
|
private _stopFn: ((instance: T) => Promise<void>) | undefined;
|
|
private _healthCheckFn: ((instance: T) => Promise<boolean>) | undefined;
|
|
|
|
// Instance: the resolved start result
|
|
private _instance: T | undefined;
|
|
|
|
// Labels
|
|
public labels: Record<string, string> = {};
|
|
|
|
// Runtime tracking
|
|
private _startedAt: number | undefined;
|
|
private _stoppedAt: number | undefined;
|
|
private _errorCount = 0;
|
|
private _lastError: string | undefined;
|
|
private _retryCount = 0;
|
|
|
|
// Health check tracking
|
|
private _healthCheckTimer: ReturnType<typeof setTimeout> | undefined;
|
|
private _lastHealthCheck: number | undefined;
|
|
private _healthCheckOk: boolean | undefined;
|
|
private _consecutiveHealthFailures = 0;
|
|
|
|
// Auto-restart tracking
|
|
private _autoRestartCount = 0;
|
|
private _autoRestartTimer: ReturnType<typeof setTimeout> | undefined;
|
|
|
|
constructor(nameOrOptions: string | IServiceOptions<T>) {
|
|
if (typeof nameOrOptions === 'string') {
|
|
this.name = nameOrOptions;
|
|
} else {
|
|
this.name = nameOrOptions.name;
|
|
this._startFn = nameOrOptions.start;
|
|
this._stopFn = nameOrOptions.stop;
|
|
this._healthCheckFn = nameOrOptions.healthCheck;
|
|
this._criticality = nameOrOptions.criticality || 'optional';
|
|
this._dependencies = nameOrOptions.dependencies || [];
|
|
this._retryConfig = nameOrOptions.retry;
|
|
this._healthCheckConfig = nameOrOptions.healthCheckConfig;
|
|
this._startupTimeoutMs = nameOrOptions.startupTimeoutMs;
|
|
if (nameOrOptions.labels) {
|
|
this.labels = { ...nameOrOptions.labels };
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Builder methods ──────────────────────────────────
|
|
|
|
public critical(): this {
|
|
this._criticality = 'critical';
|
|
return this;
|
|
}
|
|
|
|
public optional(): this {
|
|
this._criticality = 'optional';
|
|
return this;
|
|
}
|
|
|
|
public dependsOn(...serviceNames: string[]): this {
|
|
this._dependencies.push(...serviceNames);
|
|
return this;
|
|
}
|
|
|
|
public withStart(fn: () => Promise<T>): this {
|
|
this._startFn = fn;
|
|
return this;
|
|
}
|
|
|
|
public withStop(fn: (instance: T) => Promise<void>): this {
|
|
this._stopFn = fn;
|
|
return this;
|
|
}
|
|
|
|
public withHealthCheck(fn: (instance: T) => Promise<boolean>, config?: IHealthCheckConfig): this {
|
|
this._healthCheckFn = fn;
|
|
if (config) {
|
|
this._healthCheckConfig = config;
|
|
}
|
|
return this;
|
|
}
|
|
|
|
public withRetry(config: IRetryConfig): this {
|
|
this._retryConfig = config;
|
|
return this;
|
|
}
|
|
|
|
public withStartupTimeout(ms: number): this {
|
|
this._startupTimeoutMs = ms;
|
|
return this;
|
|
}
|
|
|
|
public withLabels(labelsArg: Record<string, string>): this {
|
|
Object.assign(this.labels, labelsArg);
|
|
return this;
|
|
}
|
|
|
|
// ── Label helpers ──────────────────────────────────
|
|
|
|
public setLabel(key: string, value: string): void {
|
|
this.labels[key] = value;
|
|
}
|
|
|
|
public getLabel(key: string): string | undefined {
|
|
return this.labels[key];
|
|
}
|
|
|
|
public removeLabel(key: string): boolean {
|
|
if (key in this.labels) {
|
|
delete this.labels[key];
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
public hasLabel(key: string, value?: string): boolean {
|
|
if (value !== undefined) {
|
|
return this.labels[key] === value;
|
|
}
|
|
return key in this.labels;
|
|
}
|
|
|
|
// ── Overridable hooks (for subclassing) ──────────────
|
|
|
|
protected async serviceStart(): Promise<T> {
|
|
if (this._startFn) {
|
|
return this._startFn();
|
|
}
|
|
throw new Error(`Service '${this.name}': no start function provided. Use withStart() or override serviceStart().`);
|
|
}
|
|
|
|
protected async serviceStop(): Promise<void> {
|
|
if (this._stopFn) {
|
|
return this._stopFn(this._instance as T);
|
|
}
|
|
// Default: no-op stop is fine (some services don't need explicit cleanup)
|
|
}
|
|
|
|
protected async serviceHealthCheck(): Promise<boolean> {
|
|
if (this._healthCheckFn) {
|
|
return this._healthCheckFn(this._instance as T);
|
|
}
|
|
// No health check configured — assume healthy if running
|
|
return this._state === 'running';
|
|
}
|
|
|
|
// ── Lifecycle (called by ServiceManager) ─────────────
|
|
|
|
public async start(): Promise<T> {
|
|
if (this._state === 'running') {
|
|
return this._instance as T;
|
|
}
|
|
|
|
this.setState('starting');
|
|
|
|
try {
|
|
let result: T;
|
|
if (this._startupTimeoutMs) {
|
|
result = await Promise.race([
|
|
this.serviceStart(),
|
|
new Promise<never>((_, reject) =>
|
|
setTimeout(() => reject(new Error(`Service '${this.name}': startup timed out after ${this._startupTimeoutMs}ms`)), this._startupTimeoutMs)
|
|
),
|
|
]);
|
|
} else {
|
|
result = await this.serviceStart();
|
|
}
|
|
this._instance = result;
|
|
this._startedAt = Date.now();
|
|
this._stoppedAt = undefined;
|
|
this._consecutiveHealthFailures = 0;
|
|
this._healthCheckOk = true;
|
|
this._autoRestartCount = 0;
|
|
this.setState('running');
|
|
this.emitEvent('started');
|
|
this.startHealthCheckTimer();
|
|
return result;
|
|
} catch (err) {
|
|
this._errorCount++;
|
|
this._lastError = err instanceof Error ? err.message : String(err);
|
|
this.setState('failed');
|
|
this.emitEvent('failed', { error: this._lastError });
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
public async stop(): Promise<void> {
|
|
if (this._state === 'stopped' || this._state === 'stopping') {
|
|
return;
|
|
}
|
|
|
|
this.stopHealthCheckTimer();
|
|
this.clearAutoRestartTimer();
|
|
this.setState('stopping');
|
|
|
|
try {
|
|
await this.serviceStop();
|
|
} catch (err) {
|
|
logger.log('warn', `Service '${this.name}' error during stop: ${err instanceof Error ? err.message : String(err)}`);
|
|
}
|
|
|
|
this._instance = undefined;
|
|
this._stoppedAt = Date.now();
|
|
this.setState('stopped');
|
|
this.emitEvent('stopped');
|
|
}
|
|
|
|
public async checkHealth(): Promise<boolean | undefined> {
|
|
if (!this._healthCheckFn && !this.hasOverriddenHealthCheck()) {
|
|
return undefined;
|
|
}
|
|
|
|
try {
|
|
const config = this._healthCheckConfig;
|
|
const timeoutMs = config?.timeoutMs ?? 5000;
|
|
|
|
const result = await Promise.race([
|
|
this.serviceHealthCheck(),
|
|
new Promise<boolean>((_, reject) =>
|
|
setTimeout(() => reject(new Error('Health check timed out')), timeoutMs)
|
|
),
|
|
]);
|
|
|
|
this._lastHealthCheck = Date.now();
|
|
this._healthCheckOk = result;
|
|
|
|
if (result) {
|
|
this._consecutiveHealthFailures = 0;
|
|
if (this._state === 'degraded') {
|
|
this.setState('running');
|
|
this.emitEvent('recovered');
|
|
}
|
|
} else {
|
|
this._consecutiveHealthFailures++;
|
|
this.handleHealthFailure();
|
|
}
|
|
|
|
this.emitEvent('healthCheck');
|
|
return result;
|
|
} catch (err) {
|
|
this._lastHealthCheck = Date.now();
|
|
this._healthCheckOk = false;
|
|
this._consecutiveHealthFailures++;
|
|
this.handleHealthFailure();
|
|
this.emitEvent('healthCheck');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// ── Wait / readiness ──────────────────────────────────
|
|
|
|
public async waitForState(
|
|
targetState: TServiceState | TServiceState[],
|
|
timeoutMs?: number,
|
|
): Promise<void> {
|
|
const states = Array.isArray(targetState) ? targetState : [targetState];
|
|
|
|
// Already in target state
|
|
if (states.includes(this._state)) {
|
|
return;
|
|
}
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
let settled = false;
|
|
|
|
const settle = (fn: () => void) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
subscription.unsubscribe();
|
|
if (timer) clearTimeout(timer);
|
|
fn();
|
|
};
|
|
|
|
const subscription = this.eventSubject.subscribe((event) => {
|
|
if (states.includes(event.state)) {
|
|
settle(resolve);
|
|
}
|
|
});
|
|
|
|
// Re-check after subscribing to close the race window
|
|
if (states.includes(this._state)) {
|
|
settle(resolve);
|
|
return;
|
|
}
|
|
|
|
if (timeoutMs !== undefined) {
|
|
timer = setTimeout(() => {
|
|
settle(() =>
|
|
reject(
|
|
new Error(
|
|
`Service '${this.name}': timed out waiting for state [${states.join(', ')}] after ${timeoutMs}ms (current: ${this._state})`,
|
|
),
|
|
),
|
|
);
|
|
}, timeoutMs);
|
|
}
|
|
});
|
|
}
|
|
|
|
public async waitForRunning(timeoutMs?: number): Promise<void> {
|
|
return this.waitForState('running', timeoutMs);
|
|
}
|
|
|
|
public async waitForStopped(timeoutMs?: number): Promise<void> {
|
|
return this.waitForState('stopped', timeoutMs);
|
|
}
|
|
|
|
// ── State ────────────────────────────────────────────
|
|
|
|
public get state(): TServiceState {
|
|
return this._state;
|
|
}
|
|
|
|
public get criticality(): TServiceCriticality {
|
|
return this._criticality;
|
|
}
|
|
|
|
public get dependencies(): string[] {
|
|
return [...this._dependencies];
|
|
}
|
|
|
|
public get retryConfig(): IRetryConfig | undefined {
|
|
return this._retryConfig;
|
|
}
|
|
|
|
public get startupTimeoutMs(): number | undefined {
|
|
return this._startupTimeoutMs;
|
|
}
|
|
|
|
public get instance(): T | undefined {
|
|
return this._instance;
|
|
}
|
|
|
|
public get errorCount(): number {
|
|
return this._errorCount;
|
|
}
|
|
|
|
public get retryCount(): number {
|
|
return this._retryCount;
|
|
}
|
|
|
|
public set retryCount(value: number) {
|
|
this._retryCount = value;
|
|
}
|
|
|
|
public getStatus(): IServiceStatus {
|
|
return {
|
|
name: this.name,
|
|
state: this._state,
|
|
criticality: this._criticality,
|
|
startedAt: this._startedAt,
|
|
stoppedAt: this._stoppedAt,
|
|
lastHealthCheck: this._lastHealthCheck,
|
|
healthCheckOk: this._healthCheckOk,
|
|
uptime: this._startedAt && this._state === 'running'
|
|
? Date.now() - this._startedAt
|
|
: undefined,
|
|
errorCount: this._errorCount,
|
|
lastError: this._lastError,
|
|
retryCount: this._retryCount,
|
|
dependencies: [...this._dependencies],
|
|
labels: { ...this.labels },
|
|
hasInstance: this._instance !== undefined,
|
|
};
|
|
}
|
|
|
|
// ── Internal helpers ─────────────────────────────────
|
|
|
|
private setState(state: TServiceState): void {
|
|
this._state = state;
|
|
}
|
|
|
|
private emitEvent(type: IServiceEvent['type'], extra?: Partial<IServiceEvent>): void {
|
|
this.eventSubject.next({
|
|
type,
|
|
serviceName: this.name,
|
|
state: this._state,
|
|
timestamp: Date.now(),
|
|
...extra,
|
|
});
|
|
}
|
|
|
|
private handleHealthFailure(): void {
|
|
const config = this._healthCheckConfig;
|
|
const failuresBeforeDegraded = config?.failuresBeforeDegraded ?? 3;
|
|
const failuresBeforeFailed = config?.failuresBeforeFailed ?? 5;
|
|
|
|
if (this._state === 'running' && this._consecutiveHealthFailures >= failuresBeforeDegraded) {
|
|
this.setState('degraded');
|
|
this.emitEvent('degraded');
|
|
}
|
|
|
|
if (this._consecutiveHealthFailures >= failuresBeforeFailed) {
|
|
this.setState('failed');
|
|
this._lastError = `Health check failed ${this._consecutiveHealthFailures} consecutive times`;
|
|
this.emitEvent('failed', { error: this._lastError });
|
|
this.stopHealthCheckTimer();
|
|
|
|
// Auto-restart if configured
|
|
if (config?.autoRestart) {
|
|
this.scheduleAutoRestart();
|
|
}
|
|
}
|
|
}
|
|
|
|
private scheduleAutoRestart(): void {
|
|
const config = this._healthCheckConfig;
|
|
const maxRestarts = config?.maxAutoRestarts ?? 3;
|
|
if (maxRestarts > 0 && this._autoRestartCount >= maxRestarts) {
|
|
logger.log('warn', `Service '${this.name}': max auto-restarts (${maxRestarts}) exceeded`);
|
|
return;
|
|
}
|
|
|
|
const baseDelay = config?.autoRestartDelayMs ?? 5000;
|
|
const factor = config?.autoRestartBackoffFactor ?? 2;
|
|
const delay = Math.min(baseDelay * Math.pow(factor, this._autoRestartCount), 60000);
|
|
|
|
this._autoRestartCount++;
|
|
this.emitEvent('autoRestarting', { attempt: this._autoRestartCount });
|
|
|
|
this._autoRestartTimer = setTimeout(async () => {
|
|
this._autoRestartTimer = undefined;
|
|
try {
|
|
// Stop first to clean up, then start fresh
|
|
this._instance = undefined;
|
|
this._stoppedAt = Date.now();
|
|
this.setState('stopped');
|
|
await this.start();
|
|
// Success — reset counter
|
|
this._autoRestartCount = 0;
|
|
} catch (err) {
|
|
logger.log('warn', `Service '${this.name}': auto-restart attempt ${this._autoRestartCount} failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
// Schedule another attempt
|
|
this.scheduleAutoRestart();
|
|
}
|
|
}, delay);
|
|
if (this._autoRestartTimer && typeof this._autoRestartTimer === 'object' && 'unref' in this._autoRestartTimer) {
|
|
(this._autoRestartTimer as any).unref();
|
|
}
|
|
}
|
|
|
|
private clearAutoRestartTimer(): void {
|
|
if (this._autoRestartTimer) {
|
|
clearTimeout(this._autoRestartTimer);
|
|
this._autoRestartTimer = undefined;
|
|
}
|
|
}
|
|
|
|
private startHealthCheckTimer(): void {
|
|
if (!this._healthCheckFn && !this.hasOverriddenHealthCheck()) {
|
|
return;
|
|
}
|
|
const config = this._healthCheckConfig;
|
|
const intervalMs = config?.intervalMs ?? 30000;
|
|
|
|
this.stopHealthCheckTimer();
|
|
|
|
const tick = () => {
|
|
if (this._state !== 'running' && this._state !== 'degraded') {
|
|
return;
|
|
}
|
|
this.checkHealth().catch(() => {});
|
|
this._healthCheckTimer = setTimeout(tick, intervalMs);
|
|
if (this._healthCheckTimer && typeof this._healthCheckTimer === 'object' && 'unref' in this._healthCheckTimer) {
|
|
(this._healthCheckTimer as any).unref();
|
|
}
|
|
};
|
|
|
|
this._healthCheckTimer = setTimeout(tick, intervalMs);
|
|
if (this._healthCheckTimer && typeof this._healthCheckTimer === 'object' && 'unref' in this._healthCheckTimer) {
|
|
(this._healthCheckTimer as any).unref();
|
|
}
|
|
}
|
|
|
|
private stopHealthCheckTimer(): void {
|
|
if (this._healthCheckTimer) {
|
|
clearTimeout(this._healthCheckTimer);
|
|
this._healthCheckTimer = undefined;
|
|
}
|
|
}
|
|
|
|
private hasOverriddenHealthCheck(): boolean {
|
|
return this.serviceHealthCheck !== Service.prototype.serviceHealthCheck;
|
|
}
|
|
}
|