feat(cluster): add clustered storage backend with QUIC transport, erasure coding, and shard management

This commit is contained in:
2026-03-21 21:50:42 +00:00
parent 4fcd05d3c6
commit d12d321079
25 changed files with 7472 additions and 3467 deletions
+173 -28
View File
@@ -23,25 +23,34 @@ use crate::auth::{self, AuthenticatedIdentity};
use crate::config::SmartStorageConfig;
use crate::policy::{self, PolicyDecision, PolicyStore};
use crate::error::StorageError;
use crate::storage::FileStore;
use crate::cluster::coordinator::DistributedStore;
use crate::cluster::config::ErasureConfig;
use crate::cluster::membership::MembershipManager;
use crate::cluster::placement;
use crate::cluster::protocol::NodeInfo;
use crate::cluster::quic_transport::QuicTransport;
use crate::cluster::shard_store::ShardStore;
use crate::cluster::state::ClusterState;
use crate::storage::{FileStore, StorageBackend};
use crate::xml_response;
pub struct StorageServer {
store: Arc<FileStore>,
store: Arc<StorageBackend>,
shutdown_tx: watch::Sender<bool>,
server_handle: tokio::task::JoinHandle<()>,
}
impl StorageServer {
pub async fn start(config: SmartStorageConfig) -> Result<Self> {
let store = Arc::new(FileStore::new(config.storage.directory.clone().into()));
// Initialize or reset storage
if config.storage.clean_slate {
store.reset().await?;
let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
if cluster_config.enabled {
Self::start_clustered(&config, cluster_config).await?
} else {
Self::start_standalone(&config).await?
}
} else {
store.initialize().await?;
}
Self::start_standalone(&config).await?
};
// Initialize policy store
let policy_store = Arc::new(PolicyStore::new(store.policies_dir()));
@@ -119,9 +128,145 @@ impl StorageServer {
let _ = self.server_handle.await;
}
pub fn store(&self) -> &FileStore {
pub fn store(&self) -> &StorageBackend {
&self.store
}
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
let store = Arc::new(StorageBackend::Standalone(
FileStore::new(config.storage.directory.clone().into()),
));
if config.storage.clean_slate {
store.reset().await?;
} else {
store.initialize().await?;
}
Ok(store)
}
async fn start_clustered(
config: &SmartStorageConfig,
cluster_config: &crate::cluster::config::ClusterConfig,
) -> Result<Arc<StorageBackend>> {
let erasure_config = cluster_config.erasure.clone();
let node_id = cluster_config
.node_id
.clone()
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
// Determine drive paths
let drive_paths: Vec<std::path::PathBuf> = if cluster_config.drives.paths.is_empty() {
// Default: use storage directory as a single drive
vec![std::path::PathBuf::from(&config.storage.directory)]
} else {
cluster_config
.drives
.paths
.iter()
.map(std::path::PathBuf::from)
.collect()
};
// Ensure directories exist
let manifest_dir = std::path::PathBuf::from(&config.storage.directory).join(".manifests");
let buckets_dir = std::path::PathBuf::from(&config.storage.directory).join(".buckets");
tokio::fs::create_dir_all(&manifest_dir).await?;
tokio::fs::create_dir_all(&buckets_dir).await?;
for path in &drive_paths {
tokio::fs::create_dir_all(path.join(".smartstorage")).await?;
}
// Initialize QUIC transport
let quic_addr: SocketAddr =
format!("{}:{}", config.server.address, cluster_config.quic_port).parse()?;
let transport = Arc::new(QuicTransport::new(quic_addr, node_id.clone()).await?);
// Initialize cluster state
let cluster_state = Arc::new(ClusterState::new(
node_id.clone(),
uuid::Uuid::new_v4().to_string(),
erasure_config.data_shards,
erasure_config.parity_shards,
));
// Form erasure sets from local drives (single-node for now)
let nodes = vec![(node_id.clone(), drive_paths.len() as u32)];
let erasure_sets =
placement::form_erasure_sets(&nodes, erasure_config.total_shards());
if erasure_sets.is_empty() {
tracing::warn!(
"Not enough drives ({}) for erasure set size ({}). \
Need at least {} drives.",
drive_paths.len(),
erasure_config.total_shards(),
erasure_config.total_shards(),
);
}
cluster_state.set_erasure_sets(erasure_sets).await;
// Register self as a node
let local_node_info = NodeInfo {
node_id: node_id.clone(),
quic_addr: quic_addr.to_string(),
s3_addr: format!("{}:{}", config.server.address, config.server.port),
drive_count: drive_paths.len() as u32,
status: "online".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
};
cluster_state.add_node(local_node_info.clone()).await;
// Join cluster if seed nodes are configured
let membership = Arc::new(MembershipManager::new(
cluster_state.clone(),
transport.clone(),
cluster_config.heartbeat_interval_ms,
local_node_info,
));
membership
.join_cluster(&cluster_config.seed_nodes)
.await?;
// Start QUIC accept loop for incoming connections
let shard_store_for_accept = Arc::new(ShardStore::new(drive_paths[0].clone()));
let (quic_shutdown_tx, quic_shutdown_rx) = watch::channel(false);
let transport_clone = transport.clone();
tokio::spawn(async move {
transport_clone
.accept_loop(shard_store_for_accept, quic_shutdown_rx)
.await;
});
// Start heartbeat loop
let membership_clone = membership.clone();
let (hb_shutdown_tx, hb_shutdown_rx) = watch::channel(false);
tokio::spawn(async move {
membership_clone.heartbeat_loop(hb_shutdown_rx).await;
});
// Create distributed store
let distributed_store = DistributedStore::new(
cluster_state,
transport,
erasure_config,
drive_paths,
manifest_dir,
buckets_dir,
)?;
let store = Arc::new(StorageBackend::Clustered(distributed_store));
if !config.server.silent {
tracing::info!(
"Cluster mode enabled (node_id={}, quic_port={})",
node_id,
cluster_config.quic_port
);
}
Ok(store)
}
}
impl SmartStorageConfig {
@@ -204,7 +349,7 @@ fn storage_error_response(err: &StorageError, request_id: &str) -> Response<BoxB
async fn handle_request(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
config: SmartStorageConfig,
policy_store: Arc<PolicyStore>,
) -> Result<Response<BoxBody>, std::convert::Infallible> {
@@ -325,7 +470,7 @@ async fn authorize_request(
async fn route_request(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
_config: &SmartStorageConfig,
request_id: &str,
policy_store: &Arc<PolicyStore>,
@@ -430,7 +575,7 @@ async fn route_request(
// ============================
async fn handle_list_buckets(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
request_id: &str,
) -> Result<Response<BoxBody>> {
let buckets = store.list_buckets().await?;
@@ -439,7 +584,7 @@ async fn handle_list_buckets(
}
async fn handle_create_bucket(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
@@ -448,7 +593,7 @@ async fn handle_create_bucket(
}
async fn handle_delete_bucket(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
request_id: &str,
policy_store: &Arc<PolicyStore>,
@@ -460,7 +605,7 @@ async fn handle_delete_bucket(
}
async fn handle_head_bucket(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
@@ -472,7 +617,7 @@ async fn handle_head_bucket(
}
async fn handle_list_objects(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
query: &HashMap<String, String>,
request_id: &str,
@@ -501,7 +646,7 @@ async fn handle_list_objects(
async fn handle_put_object(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
request_id: &str,
@@ -523,7 +668,7 @@ async fn handle_put_object(
async fn handle_get_object(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
request_id: &str,
@@ -576,7 +721,7 @@ async fn handle_get_object(
}
async fn handle_head_object(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
request_id: &str,
@@ -608,7 +753,7 @@ async fn handle_head_object(
}
async fn handle_delete_object(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
request_id: &str,
@@ -619,7 +764,7 @@ async fn handle_delete_object(
async fn handle_copy_object(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
dest_bucket: &str,
dest_key: &str,
request_id: &str,
@@ -688,7 +833,7 @@ async fn handle_get_bucket_policy(
async fn handle_put_bucket_policy(
req: Request<Incoming>,
store: &Arc<FileStore>,
store: &Arc<StorageBackend>,
policy_store: &Arc<PolicyStore>,
bucket: &str,
request_id: &str,
@@ -732,7 +877,7 @@ async fn handle_delete_bucket_policy(
async fn handle_initiate_multipart(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
request_id: &str,
@@ -745,7 +890,7 @@ async fn handle_initiate_multipart(
async fn handle_upload_part(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
query: &HashMap<String, String>,
request_id: &str,
) -> Result<Response<BoxBody>> {
@@ -774,7 +919,7 @@ async fn handle_upload_part(
async fn handle_complete_multipart(
req: Request<Incoming>,
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
key: &str,
upload_id: &str,
@@ -794,7 +939,7 @@ async fn handle_complete_multipart(
}
async fn handle_abort_multipart(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
upload_id: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
@@ -803,7 +948,7 @@ async fn handle_abort_multipart(
}
async fn handle_list_multipart_uploads(
store: Arc<FileStore>,
store: Arc<StorageBackend>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {