Files
elasticsearch/ts/core/connection/health-check.ts

305 lines
7.1 KiB
TypeScript

import type { Client as ElasticClient } from '@elastic/elasticsearch';
/**
* Health status
*/
export enum HealthStatus {
HEALTHY = 'healthy',
DEGRADED = 'degraded',
UNHEALTHY = 'unhealthy',
UNKNOWN = 'unknown',
}
/**
* Cluster health status from Elasticsearch
*/
export enum ClusterHealth {
GREEN = 'green',
YELLOW = 'yellow',
RED = 'red',
}
/**
* Health check result
*/
export interface HealthCheckResult {
/** Overall health status */
status: HealthStatus;
/** Cluster health from Elasticsearch */
clusterHealth?: ClusterHealth;
/** Whether the cluster is available */
available: boolean;
/** Response time in milliseconds */
responseTimeMs?: number;
/** Number of active nodes */
activeNodes?: number;
/** Error if health check failed */
error?: Error;
/** Timestamp of health check */
timestamp: Date;
/** Additional details */
details?: Record<string, unknown>;
}
/**
* Health check configuration
*/
export interface HealthCheckConfig {
/** Interval between health checks in milliseconds */
interval: number;
/** Timeout for health check requests */
timeout: number;
/** Number of consecutive failures before marking unhealthy */
unhealthyThreshold: number;
/** Number of consecutive successes before marking healthy */
healthyThreshold: number;
/** Whether to check cluster health */
checkClusterHealth: boolean;
}
/**
* Default health check configuration
*/
export const DEFAULT_HEALTH_CHECK_CONFIG: HealthCheckConfig = {
interval: 30000, // 30 seconds
timeout: 5000, // 5 seconds
unhealthyThreshold: 3,
healthyThreshold: 2,
checkClusterHealth: true,
};
/**
* Health checker for Elasticsearch cluster
*/
export class HealthChecker {
private config: HealthCheckConfig;
private consecutiveFailures = 0;
private consecutiveSuccesses = 0;
private currentStatus: HealthStatus = HealthStatus.UNKNOWN;
private lastCheckResult?: HealthCheckResult;
private checkInterval?: NodeJS.Timeout;
private isChecking = false;
constructor(
private client: ElasticClient,
config: Partial<HealthCheckConfig> = {}
) {
this.config = { ...DEFAULT_HEALTH_CHECK_CONFIG, ...config };
}
/**
* Perform a single health check
*/
async check(): Promise<HealthCheckResult> {
const startTime = Date.now();
try {
// Ping the cluster
const pingResponse = await Promise.race([
this.client.ping(),
this.timeout(this.config.timeout),
]);
const responseTime = Date.now() - startTime;
const available = pingResponse === true || (pingResponse as any).statusCode === 200;
if (!available) {
throw new Error('Cluster ping failed');
}
// Check cluster health if enabled
let clusterHealth: ClusterHealth | undefined;
let activeNodes: number | undefined;
if (this.config.checkClusterHealth) {
try {
const healthResponse = await this.client.cluster.health({
timeout: `${this.config.timeout}ms`,
});
clusterHealth = healthResponse.status as ClusterHealth;
activeNodes = healthResponse.number_of_nodes;
} catch (error) {
// Cluster health check failed, but ping succeeded
// Mark as degraded
this.consecutiveSuccesses = 0;
this.consecutiveFailures++;
const result: HealthCheckResult = {
status: HealthStatus.DEGRADED,
available: true,
responseTimeMs: responseTime,
error: error as Error,
timestamp: new Date(),
};
this.lastCheckResult = result;
return result;
}
}
// Success!
this.consecutiveFailures = 0;
this.consecutiveSuccesses++;
// Determine status based on cluster health
let status: HealthStatus;
if (clusterHealth === ClusterHealth.GREEN) {
status = HealthStatus.HEALTHY;
} else if (clusterHealth === ClusterHealth.YELLOW) {
status = HealthStatus.DEGRADED;
} else if (clusterHealth === ClusterHealth.RED) {
status = HealthStatus.UNHEALTHY;
} else {
// No cluster health, but ping succeeded
status =
this.consecutiveSuccesses >= this.config.healthyThreshold
? HealthStatus.HEALTHY
: HealthStatus.DEGRADED;
}
this.currentStatus = status;
const result: HealthCheckResult = {
status,
clusterHealth,
available: true,
responseTimeMs: responseTime,
activeNodes,
timestamp: new Date(),
};
this.lastCheckResult = result;
return result;
} catch (error) {
this.consecutiveSuccesses = 0;
this.consecutiveFailures++;
const status =
this.consecutiveFailures >= this.config.unhealthyThreshold
? HealthStatus.UNHEALTHY
: HealthStatus.DEGRADED;
this.currentStatus = status;
const result: HealthCheckResult = {
status,
available: false,
error: error as Error,
timestamp: new Date(),
};
this.lastCheckResult = result;
return result;
}
}
/**
* Start periodic health checks
*/
startPeriodicChecks(onHealthChange?: (result: HealthCheckResult) => void): void {
if (this.checkInterval) {
return; // Already running
}
const performCheck = async () => {
if (this.isChecking) return;
this.isChecking = true;
try {
const previousStatus = this.currentStatus;
const result = await this.check();
if (onHealthChange && result.status !== previousStatus) {
onHealthChange(result);
}
} catch (error) {
// Error already handled in check()
} finally {
this.isChecking = false;
}
};
// Perform initial check
performCheck();
// Schedule periodic checks
this.checkInterval = setInterval(performCheck, this.config.interval);
}
/**
* Stop periodic health checks
*/
stopPeriodicChecks(): void {
if (this.checkInterval) {
clearInterval(this.checkInterval);
this.checkInterval = undefined;
}
}
/**
* Get current health status
*/
getStatus(): HealthStatus {
return this.currentStatus;
}
/**
* Get last health check result
*/
getLastCheckResult(): HealthCheckResult | undefined {
return this.lastCheckResult;
}
/**
* Check if cluster is healthy
*/
isHealthy(): boolean {
return this.currentStatus === HealthStatus.HEALTHY;
}
/**
* Check if cluster is available
*/
isAvailable(): boolean {
return this.lastCheckResult?.available ?? false;
}
/**
* Reset health check state
*/
reset(): void {
this.consecutiveFailures = 0;
this.consecutiveSuccesses = 0;
this.currentStatus = HealthStatus.UNKNOWN;
this.lastCheckResult = undefined;
}
/**
* Helper to create a timeout promise
*/
private timeout(ms: number): Promise<never> {
return new Promise((_, reject) => {
setTimeout(() => reject(new Error(`Health check timeout after ${ms}ms`)), ms);
});
}
/**
* Cleanup resources
*/
destroy(): void {
this.stopPeriodicChecks();
}
}