feat(cluster): add clustered storage backend with QUIC transport, erasure coding, and shard management
This commit is contained in:
+173
-28
@@ -23,25 +23,34 @@ use crate::auth::{self, AuthenticatedIdentity};
|
||||
use crate::config::SmartStorageConfig;
|
||||
use crate::policy::{self, PolicyDecision, PolicyStore};
|
||||
use crate::error::StorageError;
|
||||
use crate::storage::FileStore;
|
||||
use crate::cluster::coordinator::DistributedStore;
|
||||
use crate::cluster::config::ErasureConfig;
|
||||
use crate::cluster::membership::MembershipManager;
|
||||
use crate::cluster::placement;
|
||||
use crate::cluster::protocol::NodeInfo;
|
||||
use crate::cluster::quic_transport::QuicTransport;
|
||||
use crate::cluster::shard_store::ShardStore;
|
||||
use crate::cluster::state::ClusterState;
|
||||
use crate::storage::{FileStore, StorageBackend};
|
||||
use crate::xml_response;
|
||||
|
||||
pub struct StorageServer {
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
shutdown_tx: watch::Sender<bool>,
|
||||
server_handle: tokio::task::JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl StorageServer {
|
||||
pub async fn start(config: SmartStorageConfig) -> Result<Self> {
|
||||
let store = Arc::new(FileStore::new(config.storage.directory.clone().into()));
|
||||
|
||||
// Initialize or reset storage
|
||||
if config.storage.clean_slate {
|
||||
store.reset().await?;
|
||||
let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
|
||||
if cluster_config.enabled {
|
||||
Self::start_clustered(&config, cluster_config).await?
|
||||
} else {
|
||||
Self::start_standalone(&config).await?
|
||||
}
|
||||
} else {
|
||||
store.initialize().await?;
|
||||
}
|
||||
Self::start_standalone(&config).await?
|
||||
};
|
||||
|
||||
// Initialize policy store
|
||||
let policy_store = Arc::new(PolicyStore::new(store.policies_dir()));
|
||||
@@ -119,9 +128,145 @@ impl StorageServer {
|
||||
let _ = self.server_handle.await;
|
||||
}
|
||||
|
||||
pub fn store(&self) -> &FileStore {
|
||||
pub fn store(&self) -> &StorageBackend {
|
||||
&self.store
|
||||
}
|
||||
|
||||
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
|
||||
let store = Arc::new(StorageBackend::Standalone(
|
||||
FileStore::new(config.storage.directory.clone().into()),
|
||||
));
|
||||
if config.storage.clean_slate {
|
||||
store.reset().await?;
|
||||
} else {
|
||||
store.initialize().await?;
|
||||
}
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
async fn start_clustered(
|
||||
config: &SmartStorageConfig,
|
||||
cluster_config: &crate::cluster::config::ClusterConfig,
|
||||
) -> Result<Arc<StorageBackend>> {
|
||||
let erasure_config = cluster_config.erasure.clone();
|
||||
let node_id = cluster_config
|
||||
.node_id
|
||||
.clone()
|
||||
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
|
||||
|
||||
// Determine drive paths
|
||||
let drive_paths: Vec<std::path::PathBuf> = if cluster_config.drives.paths.is_empty() {
|
||||
// Default: use storage directory as a single drive
|
||||
vec![std::path::PathBuf::from(&config.storage.directory)]
|
||||
} else {
|
||||
cluster_config
|
||||
.drives
|
||||
.paths
|
||||
.iter()
|
||||
.map(std::path::PathBuf::from)
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Ensure directories exist
|
||||
let manifest_dir = std::path::PathBuf::from(&config.storage.directory).join(".manifests");
|
||||
let buckets_dir = std::path::PathBuf::from(&config.storage.directory).join(".buckets");
|
||||
tokio::fs::create_dir_all(&manifest_dir).await?;
|
||||
tokio::fs::create_dir_all(&buckets_dir).await?;
|
||||
for path in &drive_paths {
|
||||
tokio::fs::create_dir_all(path.join(".smartstorage")).await?;
|
||||
}
|
||||
|
||||
// Initialize QUIC transport
|
||||
let quic_addr: SocketAddr =
|
||||
format!("{}:{}", config.server.address, cluster_config.quic_port).parse()?;
|
||||
let transport = Arc::new(QuicTransport::new(quic_addr, node_id.clone()).await?);
|
||||
|
||||
// Initialize cluster state
|
||||
let cluster_state = Arc::new(ClusterState::new(
|
||||
node_id.clone(),
|
||||
uuid::Uuid::new_v4().to_string(),
|
||||
erasure_config.data_shards,
|
||||
erasure_config.parity_shards,
|
||||
));
|
||||
|
||||
// Form erasure sets from local drives (single-node for now)
|
||||
let nodes = vec![(node_id.clone(), drive_paths.len() as u32)];
|
||||
let erasure_sets =
|
||||
placement::form_erasure_sets(&nodes, erasure_config.total_shards());
|
||||
|
||||
if erasure_sets.is_empty() {
|
||||
tracing::warn!(
|
||||
"Not enough drives ({}) for erasure set size ({}). \
|
||||
Need at least {} drives.",
|
||||
drive_paths.len(),
|
||||
erasure_config.total_shards(),
|
||||
erasure_config.total_shards(),
|
||||
);
|
||||
}
|
||||
|
||||
cluster_state.set_erasure_sets(erasure_sets).await;
|
||||
|
||||
// Register self as a node
|
||||
let local_node_info = NodeInfo {
|
||||
node_id: node_id.clone(),
|
||||
quic_addr: quic_addr.to_string(),
|
||||
s3_addr: format!("{}:{}", config.server.address, config.server.port),
|
||||
drive_count: drive_paths.len() as u32,
|
||||
status: "online".to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
cluster_state.add_node(local_node_info.clone()).await;
|
||||
|
||||
// Join cluster if seed nodes are configured
|
||||
let membership = Arc::new(MembershipManager::new(
|
||||
cluster_state.clone(),
|
||||
transport.clone(),
|
||||
cluster_config.heartbeat_interval_ms,
|
||||
local_node_info,
|
||||
));
|
||||
membership
|
||||
.join_cluster(&cluster_config.seed_nodes)
|
||||
.await?;
|
||||
|
||||
// Start QUIC accept loop for incoming connections
|
||||
let shard_store_for_accept = Arc::new(ShardStore::new(drive_paths[0].clone()));
|
||||
let (quic_shutdown_tx, quic_shutdown_rx) = watch::channel(false);
|
||||
let transport_clone = transport.clone();
|
||||
tokio::spawn(async move {
|
||||
transport_clone
|
||||
.accept_loop(shard_store_for_accept, quic_shutdown_rx)
|
||||
.await;
|
||||
});
|
||||
|
||||
// Start heartbeat loop
|
||||
let membership_clone = membership.clone();
|
||||
let (hb_shutdown_tx, hb_shutdown_rx) = watch::channel(false);
|
||||
tokio::spawn(async move {
|
||||
membership_clone.heartbeat_loop(hb_shutdown_rx).await;
|
||||
});
|
||||
|
||||
// Create distributed store
|
||||
let distributed_store = DistributedStore::new(
|
||||
cluster_state,
|
||||
transport,
|
||||
erasure_config,
|
||||
drive_paths,
|
||||
manifest_dir,
|
||||
buckets_dir,
|
||||
)?;
|
||||
|
||||
let store = Arc::new(StorageBackend::Clustered(distributed_store));
|
||||
|
||||
if !config.server.silent {
|
||||
tracing::info!(
|
||||
"Cluster mode enabled (node_id={}, quic_port={})",
|
||||
node_id,
|
||||
cluster_config.quic_port
|
||||
);
|
||||
}
|
||||
|
||||
Ok(store)
|
||||
}
|
||||
}
|
||||
|
||||
impl SmartStorageConfig {
|
||||
@@ -204,7 +349,7 @@ fn storage_error_response(err: &StorageError, request_id: &str) -> Response<BoxB
|
||||
|
||||
async fn handle_request(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
config: SmartStorageConfig,
|
||||
policy_store: Arc<PolicyStore>,
|
||||
) -> Result<Response<BoxBody>, std::convert::Infallible> {
|
||||
@@ -325,7 +470,7 @@ async fn authorize_request(
|
||||
|
||||
async fn route_request(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
_config: &SmartStorageConfig,
|
||||
request_id: &str,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
@@ -430,7 +575,7 @@ async fn route_request(
|
||||
// ============================
|
||||
|
||||
async fn handle_list_buckets(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
let buckets = store.list_buckets().await?;
|
||||
@@ -439,7 +584,7 @@ async fn handle_list_buckets(
|
||||
}
|
||||
|
||||
async fn handle_create_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -448,7 +593,7 @@ async fn handle_create_bucket(
|
||||
}
|
||||
|
||||
async fn handle_delete_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
@@ -460,7 +605,7 @@ async fn handle_delete_bucket(
|
||||
}
|
||||
|
||||
async fn handle_head_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -472,7 +617,7 @@ async fn handle_head_bucket(
|
||||
}
|
||||
|
||||
async fn handle_list_objects(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
query: &HashMap<String, String>,
|
||||
request_id: &str,
|
||||
@@ -501,7 +646,7 @@ async fn handle_list_objects(
|
||||
|
||||
async fn handle_put_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -523,7 +668,7 @@ async fn handle_put_object(
|
||||
|
||||
async fn handle_get_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -576,7 +721,7 @@ async fn handle_get_object(
|
||||
}
|
||||
|
||||
async fn handle_head_object(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -608,7 +753,7 @@ async fn handle_head_object(
|
||||
}
|
||||
|
||||
async fn handle_delete_object(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -619,7 +764,7 @@ async fn handle_delete_object(
|
||||
|
||||
async fn handle_copy_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
dest_bucket: &str,
|
||||
dest_key: &str,
|
||||
request_id: &str,
|
||||
@@ -688,7 +833,7 @@ async fn handle_get_bucket_policy(
|
||||
|
||||
async fn handle_put_bucket_policy(
|
||||
req: Request<Incoming>,
|
||||
store: &Arc<FileStore>,
|
||||
store: &Arc<StorageBackend>,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
@@ -732,7 +877,7 @@ async fn handle_delete_bucket_policy(
|
||||
|
||||
async fn handle_initiate_multipart(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -745,7 +890,7 @@ async fn handle_initiate_multipart(
|
||||
|
||||
async fn handle_upload_part(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
query: &HashMap<String, String>,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -774,7 +919,7 @@ async fn handle_upload_part(
|
||||
|
||||
async fn handle_complete_multipart(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
upload_id: &str,
|
||||
@@ -794,7 +939,7 @@ async fn handle_complete_multipart(
|
||||
}
|
||||
|
||||
async fn handle_abort_multipart(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
upload_id: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -803,7 +948,7 @@ async fn handle_abort_multipart(
|
||||
}
|
||||
|
||||
async fn handle_list_multipart_uploads(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
|
||||
Reference in New Issue
Block a user