BREAKING CHANGE(core): Refactor to v3: introduce modular core/domain architecture, plugin system, observability and strict TypeScript configuration; remove legacy classes
This commit is contained in:
304
ts/core/connection/health-check.ts
Normal file
304
ts/core/connection/health-check.ts
Normal file
@@ -0,0 +1,304 @@
|
||||
import type { Client as ElasticClient } from '@elastic/elasticsearch';
|
||||
|
||||
/**
|
||||
* Health status
|
||||
*/
|
||||
export enum HealthStatus {
|
||||
HEALTHY = 'healthy',
|
||||
DEGRADED = 'degraded',
|
||||
UNHEALTHY = 'unhealthy',
|
||||
UNKNOWN = 'unknown',
|
||||
}
|
||||
|
||||
/**
|
||||
* Cluster health status from Elasticsearch
|
||||
*/
|
||||
export enum ClusterHealth {
|
||||
GREEN = 'green',
|
||||
YELLOW = 'yellow',
|
||||
RED = 'red',
|
||||
}
|
||||
|
||||
/**
|
||||
* Health check result
|
||||
*/
|
||||
export interface HealthCheckResult {
|
||||
/** Overall health status */
|
||||
status: HealthStatus;
|
||||
|
||||
/** Cluster health from Elasticsearch */
|
||||
clusterHealth?: ClusterHealth;
|
||||
|
||||
/** Whether the cluster is available */
|
||||
available: boolean;
|
||||
|
||||
/** Response time in milliseconds */
|
||||
responseTimeMs?: number;
|
||||
|
||||
/** Number of active nodes */
|
||||
activeNodes?: number;
|
||||
|
||||
/** Error if health check failed */
|
||||
error?: Error;
|
||||
|
||||
/** Timestamp of health check */
|
||||
timestamp: Date;
|
||||
|
||||
/** Additional details */
|
||||
details?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Health check configuration
|
||||
*/
|
||||
export interface HealthCheckConfig {
|
||||
/** Interval between health checks in milliseconds */
|
||||
interval: number;
|
||||
|
||||
/** Timeout for health check requests */
|
||||
timeout: number;
|
||||
|
||||
/** Number of consecutive failures before marking unhealthy */
|
||||
unhealthyThreshold: number;
|
||||
|
||||
/** Number of consecutive successes before marking healthy */
|
||||
healthyThreshold: number;
|
||||
|
||||
/** Whether to check cluster health */
|
||||
checkClusterHealth: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default health check configuration
|
||||
*/
|
||||
export const DEFAULT_HEALTH_CHECK_CONFIG: HealthCheckConfig = {
|
||||
interval: 30000, // 30 seconds
|
||||
timeout: 5000, // 5 seconds
|
||||
unhealthyThreshold: 3,
|
||||
healthyThreshold: 2,
|
||||
checkClusterHealth: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Health checker for Elasticsearch cluster
|
||||
*/
|
||||
export class HealthChecker {
|
||||
private config: HealthCheckConfig;
|
||||
private consecutiveFailures = 0;
|
||||
private consecutiveSuccesses = 0;
|
||||
private currentStatus: HealthStatus = HealthStatus.UNKNOWN;
|
||||
private lastCheckResult?: HealthCheckResult;
|
||||
private checkInterval?: NodeJS.Timeout;
|
||||
private isChecking = false;
|
||||
|
||||
constructor(
|
||||
private client: ElasticClient,
|
||||
config: Partial<HealthCheckConfig> = {}
|
||||
) {
|
||||
this.config = { ...DEFAULT_HEALTH_CHECK_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a single health check
|
||||
*/
|
||||
async check(): Promise<HealthCheckResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// Ping the cluster
|
||||
const pingResponse = await Promise.race([
|
||||
this.client.ping(),
|
||||
this.timeout(this.config.timeout),
|
||||
]);
|
||||
|
||||
const responseTime = Date.now() - startTime;
|
||||
const available = pingResponse === true || (pingResponse as any).statusCode === 200;
|
||||
|
||||
if (!available) {
|
||||
throw new Error('Cluster ping failed');
|
||||
}
|
||||
|
||||
// Check cluster health if enabled
|
||||
let clusterHealth: ClusterHealth | undefined;
|
||||
let activeNodes: number | undefined;
|
||||
|
||||
if (this.config.checkClusterHealth) {
|
||||
try {
|
||||
const healthResponse = await this.client.cluster.health({
|
||||
timeout: `${this.config.timeout}ms`,
|
||||
});
|
||||
|
||||
clusterHealth = healthResponse.status as ClusterHealth;
|
||||
activeNodes = healthResponse.number_of_nodes;
|
||||
} catch (error) {
|
||||
// Cluster health check failed, but ping succeeded
|
||||
// Mark as degraded
|
||||
this.consecutiveSuccesses = 0;
|
||||
this.consecutiveFailures++;
|
||||
|
||||
const result: HealthCheckResult = {
|
||||
status: HealthStatus.DEGRADED,
|
||||
available: true,
|
||||
responseTimeMs: responseTime,
|
||||
error: error as Error,
|
||||
timestamp: new Date(),
|
||||
};
|
||||
|
||||
this.lastCheckResult = result;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Success!
|
||||
this.consecutiveFailures = 0;
|
||||
this.consecutiveSuccesses++;
|
||||
|
||||
// Determine status based on cluster health
|
||||
let status: HealthStatus;
|
||||
if (clusterHealth === ClusterHealth.GREEN) {
|
||||
status = HealthStatus.HEALTHY;
|
||||
} else if (clusterHealth === ClusterHealth.YELLOW) {
|
||||
status = HealthStatus.DEGRADED;
|
||||
} else if (clusterHealth === ClusterHealth.RED) {
|
||||
status = HealthStatus.UNHEALTHY;
|
||||
} else {
|
||||
// No cluster health, but ping succeeded
|
||||
status =
|
||||
this.consecutiveSuccesses >= this.config.healthyThreshold
|
||||
? HealthStatus.HEALTHY
|
||||
: HealthStatus.DEGRADED;
|
||||
}
|
||||
|
||||
this.currentStatus = status;
|
||||
|
||||
const result: HealthCheckResult = {
|
||||
status,
|
||||
clusterHealth,
|
||||
available: true,
|
||||
responseTimeMs: responseTime,
|
||||
activeNodes,
|
||||
timestamp: new Date(),
|
||||
};
|
||||
|
||||
this.lastCheckResult = result;
|
||||
return result;
|
||||
} catch (error) {
|
||||
this.consecutiveSuccesses = 0;
|
||||
this.consecutiveFailures++;
|
||||
|
||||
const status =
|
||||
this.consecutiveFailures >= this.config.unhealthyThreshold
|
||||
? HealthStatus.UNHEALTHY
|
||||
: HealthStatus.DEGRADED;
|
||||
|
||||
this.currentStatus = status;
|
||||
|
||||
const result: HealthCheckResult = {
|
||||
status,
|
||||
available: false,
|
||||
error: error as Error,
|
||||
timestamp: new Date(),
|
||||
};
|
||||
|
||||
this.lastCheckResult = result;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start periodic health checks
|
||||
*/
|
||||
startPeriodicChecks(onHealthChange?: (result: HealthCheckResult) => void): void {
|
||||
if (this.checkInterval) {
|
||||
return; // Already running
|
||||
}
|
||||
|
||||
const performCheck = async () => {
|
||||
if (this.isChecking) return;
|
||||
|
||||
this.isChecking = true;
|
||||
try {
|
||||
const previousStatus = this.currentStatus;
|
||||
const result = await this.check();
|
||||
|
||||
if (onHealthChange && result.status !== previousStatus) {
|
||||
onHealthChange(result);
|
||||
}
|
||||
} catch (error) {
|
||||
// Error already handled in check()
|
||||
} finally {
|
||||
this.isChecking = false;
|
||||
}
|
||||
};
|
||||
|
||||
// Perform initial check
|
||||
performCheck();
|
||||
|
||||
// Schedule periodic checks
|
||||
this.checkInterval = setInterval(performCheck, this.config.interval);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop periodic health checks
|
||||
*/
|
||||
stopPeriodicChecks(): void {
|
||||
if (this.checkInterval) {
|
||||
clearInterval(this.checkInterval);
|
||||
this.checkInterval = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current health status
|
||||
*/
|
||||
getStatus(): HealthStatus {
|
||||
return this.currentStatus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last health check result
|
||||
*/
|
||||
getLastCheckResult(): HealthCheckResult | undefined {
|
||||
return this.lastCheckResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if cluster is healthy
|
||||
*/
|
||||
isHealthy(): boolean {
|
||||
return this.currentStatus === HealthStatus.HEALTHY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if cluster is available
|
||||
*/
|
||||
isAvailable(): boolean {
|
||||
return this.lastCheckResult?.available ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset health check state
|
||||
*/
|
||||
reset(): void {
|
||||
this.consecutiveFailures = 0;
|
||||
this.consecutiveSuccesses = 0;
|
||||
this.currentStatus = HealthStatus.UNKNOWN;
|
||||
this.lastCheckResult = undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to create a timeout promise
|
||||
*/
|
||||
private timeout(ms: number): Promise<never> {
|
||||
return new Promise((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`Health check timeout after ${ms}ms`)), ms);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup resources
|
||||
*/
|
||||
destroy(): void {
|
||||
this.stopPeriodicChecks();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user