Files
smartstorage/rust/src/cluster/protocol.rs

385 lines
10 KiB
Rust

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::metadata::ObjectManifest;
/// All inter-node cluster messages, serialized with bincode over QUIC streams.
///
/// Each message type gets its own bidirectional QUIC stream.
/// For shard data transfers, the header is sent first (bincode),
/// then raw shard bytes follow on the same stream.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterRequest {
// ============================
// Shard operations
// ============================
/// Write a shard to a specific drive on the target node.
/// Shard data follows after this header on the same stream.
ShardWrite(ShardWriteRequest),
/// Read a shard from the target node.
ShardRead(ShardReadRequest),
/// Delete a shard from the target node.
ShardDelete(ShardDeleteRequest),
/// Check if a shard exists and get its metadata.
ShardHead(ShardHeadRequest),
// ============================
// Manifest operations
// ============================
/// Store an object manifest on the target node.
ManifestWrite(ManifestWriteRequest),
/// Retrieve an object manifest from the target node.
ManifestRead(ManifestReadRequest),
/// Delete an object manifest from the target node.
ManifestDelete(ManifestDeleteRequest),
/// List all manifests for a bucket on the target node.
ManifestList(ManifestListRequest),
// ============================
// Cluster management
// ============================
/// Periodic heartbeat.
Heartbeat(HeartbeatMessage),
/// Request to join the cluster.
JoinRequest(JoinRequestMessage),
/// Synchronize cluster topology.
TopologySync(TopologySyncMessage),
// ============================
// Healing
// ============================
/// Request a shard to be reconstructed and placed on a target drive.
HealRequest(HealRequestMessage),
}
/// Responses to cluster requests.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterResponse {
// Shard ops
ShardWriteAck(ShardWriteAck),
ShardReadResponse(ShardReadResponse),
ShardDeleteAck(ShardDeleteAck),
ShardHeadResponse(ShardHeadResponse),
// Manifest ops
ManifestWriteAck(ManifestWriteAck),
ManifestReadResponse(ManifestReadResponse),
ManifestDeleteAck(ManifestDeleteAck),
ManifestListResponse(ManifestListResponse),
// Cluster mgmt
HeartbeatAck(HeartbeatAckMessage),
JoinResponse(JoinResponseMessage),
TopologySyncAck(TopologySyncAckMessage),
// Healing
HealResponse(HealResponseMessage),
// Error
Error(ErrorResponse),
}
// ============================
// Shard operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub shard_data_length: u64,
pub checksum: u32, // crc32c of shard data
pub object_metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadResponse {
pub request_id: String,
pub found: bool,
pub shard_data_length: u64,
pub checksum: u32,
// Shard data follows on the stream after this header
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadResponse {
pub request_id: String,
pub found: bool,
pub data_size: u64,
pub checksum: u32,
}
// ============================
// Manifest operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteRequest {
pub request_id: String,
pub manifest: ObjectManifest,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadResponse {
pub request_id: String,
pub found: bool,
pub manifest: Option<ObjectManifest>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListRequest {
pub request_id: String,
pub bucket: String,
pub prefix: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListResponse {
pub request_id: String,
pub manifests: Vec<ObjectManifest>,
}
// ============================
// Cluster management messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveStateInfo {
pub drive_index: u32,
pub status: String, // "online", "degraded", "offline", "healing"
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatMessage {
pub node_id: String,
pub timestamp: String,
pub drive_states: Vec<DriveStateInfo>,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatAckMessage {
pub node_id: String,
pub timestamp: String,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeInfo {
pub node_id: String,
pub quic_addr: String,
pub s3_addr: String,
pub drive_count: u32,
pub status: String,
pub version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinRequestMessage {
pub node_info: NodeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterTopology {
pub version: u64,
pub cluster_id: String,
pub nodes: Vec<NodeInfo>,
pub erasure_sets: Vec<ErasureSetInfo>,
pub data_shards: usize,
pub parity_shards: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErasureSetInfo {
pub set_id: u32,
pub drives: Vec<DriveLocationInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveLocationInfo {
pub node_id: String,
pub drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinResponseMessage {
pub accepted: bool,
pub topology: Option<ClusterTopology>,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncMessage {
pub topology: ClusterTopology,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncAckMessage {
pub accepted: bool,
pub current_version: u64,
}
// ============================
// Healing messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealRequestMessage {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub target_node_id: String,
pub target_drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealResponseMessage {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
// ============================
// Error response
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorResponse {
pub request_id: String,
pub code: String,
pub message: String,
}
// ============================
// Wire format helpers
// ============================
/// Serialize a request to bincode bytes with a 4-byte length prefix.
pub fn encode_request(req: &ClusterRequest) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(req)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Serialize a response to bincode bytes with a 4-byte length prefix.
pub fn encode_response(resp: &ClusterResponse) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(resp)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Read a length-prefixed bincode message from raw bytes.
/// Returns (decoded message, bytes consumed).
pub fn decode_request(data: &[u8]) -> anyhow::Result<(ClusterRequest, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterRequest = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}
/// Read a length-prefixed bincode response from raw bytes.
pub fn decode_response(data: &[u8]) -> anyhow::Result<(ClusterResponse, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterResponse = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}