Files
smartstorage/rust/src/cluster/membership.rs
T

244 lines
8.3 KiB
Rust

use anyhow::Result;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use super::drive_manager::{DriveManager, DriveStatus};
use super::protocol::{
ClusterRequest, ClusterResponse, DriveStateInfo, HeartbeatMessage, JoinRequestMessage,
NodeInfo,
};
use super::quic_transport::QuicTransport;
use super::state::ClusterState;
/// Manages cluster membership: heartbeating, joining, failure detection.
pub struct MembershipManager {
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval: Duration,
heartbeat_timeout: Duration,
local_node_info: NodeInfo,
drive_manager: Option<Arc<Mutex<DriveManager>>>,
}
impl MembershipManager {
pub fn new(
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval_ms: u64,
heartbeat_timeout_ms: u64,
local_node_info: NodeInfo,
) -> Self {
Self {
state,
transport,
heartbeat_interval: Duration::from_millis(heartbeat_interval_ms),
heartbeat_timeout: Duration::from_millis(heartbeat_timeout_ms),
local_node_info,
drive_manager: None,
}
}
/// Set the drive manager for health reporting in heartbeats.
pub fn with_drive_manager(mut self, dm: Arc<Mutex<DriveManager>>) -> Self {
self.drive_manager = Some(dm);
self
}
/// Join the cluster by contacting seed nodes.
/// Sends a JoinRequest to each seed node until one accepts.
pub async fn join_cluster(&self, seed_nodes: &[String], allow_bootstrap_on_failure: bool) -> Result<()> {
if seed_nodes.is_empty() {
tracing::info!("No seed nodes configured, starting as initial cluster node");
self.state.add_node(self.local_node_info.clone()).await;
return Ok(());
}
for seed in seed_nodes {
let addr: SocketAddr = match seed.parse() {
Ok(a) => a,
Err(e) => {
tracing::warn!("Invalid seed node address '{}': {}", seed, e);
continue;
}
};
tracing::info!("Attempting to join cluster via seed node {}", seed);
match self.try_join(addr).await {
Ok(()) => {
tracing::info!("Successfully joined cluster via {}", seed);
return Ok(());
}
Err(e) => {
tracing::warn!("Failed to join via {}: {}", seed, e);
}
}
}
if allow_bootstrap_on_failure {
tracing::warn!("Could not reach any seed nodes, bootstrapping a new cluster because no persisted topology exists");
self.state.add_node(self.local_node_info.clone()).await;
return Ok(());
}
anyhow::bail!("Could not reach any configured seed nodes; refusing unsafe cluster bootstrap")
}
async fn try_join(&self, addr: SocketAddr) -> Result<()> {
let conn = self
.transport
.get_connection("seed", addr)
.await?;
let request = ClusterRequest::JoinRequest(JoinRequestMessage {
node_info: self.local_node_info.clone(),
});
let response = self.transport.send_request(&conn, &request).await?;
match response {
ClusterResponse::JoinResponse(join_resp) => {
if join_resp.accepted {
if let Some(topology) = &join_resp.topology {
let topology_contains_self = topology
.nodes
.iter()
.any(|node| node.node_id == self.local_node_info.node_id);
self.state.apply_topology(topology).await;
if !topology_contains_self {
self.state.add_node(self.local_node_info.clone()).await;
}
tracing::info!(
"Applied cluster topology (version {}, {} nodes, {} erasure sets)",
topology.version,
topology.nodes.len(),
topology.erasure_sets.len(),
);
}
Ok(())
} else {
anyhow::bail!(
"Join rejected: {}",
join_resp.error.unwrap_or_default()
)
}
}
ClusterResponse::Error(e) => {
anyhow::bail!("Join error: {} - {}", e.code, e.message)
}
_ => anyhow::bail!("Unexpected response to join request"),
}
}
/// Run the heartbeat loop. Sends heartbeats to all peers periodically.
pub async fn heartbeat_loop(self: Arc<Self>, mut shutdown: tokio::sync::watch::Receiver<bool>) {
let mut interval = tokio::time::interval(self.heartbeat_interval);
loop {
tokio::select! {
_ = interval.tick() => {
self.send_heartbeats().await;
}
_ = shutdown.changed() => break,
}
}
}
async fn send_heartbeats(&self) {
let peers = self
.state
.all_nodes()
.await
.into_iter()
.filter(|node| node.info.node_id != self.local_node_info.node_id)
.collect::<Vec<_>>();
let topology_version = self.state.version().await;
let mut responded = Vec::new();
// Collect drive health states
let drive_states = self.collect_drive_states().await;
for peer in &peers {
let addr: SocketAddr = match peer.info.quic_addr.parse() {
Ok(a) => a,
Err(_) => continue,
};
let heartbeat = ClusterRequest::Heartbeat(HeartbeatMessage {
node_id: self.local_node_info.node_id.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
drive_states: drive_states.clone(),
topology_version,
});
match tokio::time::timeout(
self.heartbeat_timeout,
self.send_heartbeat_to_peer(&peer.info.node_id, addr, &heartbeat),
)
.await
{
Ok(Ok(())) => {
responded.push(peer.info.node_id.clone());
}
Ok(Err(e)) => {
tracing::debug!(
peer = %peer.info.node_id,
error = %e,
"Heartbeat failed"
);
}
Err(_) => {
tracing::debug!(peer = %peer.info.node_id, "Heartbeat timed out");
}
}
}
// Update state based on responses
let status_changes = self.state.tick_heartbeats(&responded).await;
for (node_id, status) in &status_changes {
tracing::info!(node = %node_id, status = ?status, "Node status changed");
}
}
async fn send_heartbeat_to_peer(
&self,
node_id: &str,
addr: SocketAddr,
heartbeat: &ClusterRequest,
) -> Result<()> {
let conn = self.transport.get_connection(node_id, addr).await?;
let _response = self.transport.send_request(&conn, heartbeat).await?;
Ok(())
}
/// Collect drive health states from the DriveManager, if available.
async fn collect_drive_states(&self) -> Vec<DriveStateInfo> {
let dm = match &self.drive_manager {
Some(dm) => dm,
None => return Vec::new(),
};
let mut manager = dm.lock().await;
let results = manager.check_all_drives().await;
results
.into_iter()
.map(|(idx, status)| {
let status_str = match status {
DriveStatus::Online => "online",
DriveStatus::Degraded => "degraded",
DriveStatus::Offline => "offline",
DriveStatus::Healing => "healing",
};
DriveStateInfo {
drive_index: idx as u32,
status: status_str.to_string(),
}
})
.collect()
}
}