feat: enhance storage stats and cluster health reporting

- Introduced new data structures for bucket and storage statistics, including BucketSummary, StorageStats, and ClusterHealth.
- Implemented runtime statistics tracking for buckets, including object count and total size.
- Added methods to retrieve storage stats and bucket summaries in the FileStore.
- Enhanced the SmartStorage interface to expose storage stats and cluster health.
- Implemented tests for runtime stats, cluster health, and credential management.
- Added support for runtime-managed credentials with atomic replacement.
- Improved filesystem usage reporting for storage locations.
This commit is contained in:
2026-04-19 11:57:28 +00:00
parent c683b02e8c
commit 0e9862efca
16 changed files with 1803 additions and 85 deletions
+33 -5
View File
@@ -37,12 +37,14 @@ use crate::xml_response;
pub struct StorageServer {
store: Arc<StorageBackend>,
auth_runtime: Arc<auth::RuntimeCredentialStore>,
shutdown_tx: watch::Sender<bool>,
server_handle: tokio::task::JoinHandle<()>,
}
impl StorageServer {
pub async fn start(config: SmartStorageConfig) -> Result<Self> {
let auth_runtime = Arc::new(auth::RuntimeCredentialStore::new(&config.auth));
let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
if cluster_config.enabled {
Self::start_clustered(&config, cluster_config).await?
@@ -65,6 +67,7 @@ impl StorageServer {
let server_store = store.clone();
let server_config = config.clone();
let server_auth_runtime = auth_runtime.clone();
let server_policy_store = policy_store.clone();
let server_handle = tokio::spawn(async move {
@@ -78,15 +81,17 @@ impl StorageServer {
let io = TokioIo::new(stream);
let store = server_store.clone();
let cfg = server_config.clone();
let auth_runtime = server_auth_runtime.clone();
let ps = server_policy_store.clone();
tokio::spawn(async move {
let svc = service_fn(move |req: Request<Incoming>| {
let store = store.clone();
let cfg = cfg.clone();
let auth_runtime = auth_runtime.clone();
let ps = ps.clone();
async move {
handle_request(req, store, cfg, ps).await
handle_request(req, store, cfg, auth_runtime, ps).await
}
});
@@ -119,6 +124,7 @@ impl StorageServer {
Ok(Self {
store,
auth_runtime,
shutdown_tx,
server_handle,
})
@@ -133,6 +139,17 @@ impl StorageServer {
&self.store
}
pub async fn list_credentials(&self) -> Vec<crate::config::Credential> {
self.auth_runtime.list_credentials().await
}
pub async fn replace_credentials(
&self,
credentials: Vec<crate::config::Credential>,
) -> Result<(), StorageError> {
self.auth_runtime.replace_credentials(credentials).await
}
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
let store = Arc::new(StorageBackend::Standalone(
FileStore::new(config.storage.directory.clone().into()),
@@ -220,7 +237,7 @@ impl StorageServer {
// Initialize drive manager for health monitoring
let drive_manager = Arc::new(tokio::sync::Mutex::new(
DriveManager::new(&cluster_config.drives).await?,
DriveManager::from_paths(&drive_paths).await?,
));
// Join cluster if seed nodes are configured
@@ -231,7 +248,7 @@ impl StorageServer {
cluster_config.heartbeat_interval_ms,
local_node_info,
)
.with_drive_manager(drive_manager),
.with_drive_manager(drive_manager.clone()),
);
membership
.join_cluster(&cluster_config.seed_nodes)
@@ -261,12 +278,16 @@ impl StorageServer {
});
// Start healing service
let healing_runtime = Arc::new(tokio::sync::RwLock::new(
crate::cluster::healing::HealingRuntimeState::default(),
));
let healing_service = HealingService::new(
cluster_state.clone(),
&erasure_config,
local_shard_stores.clone(),
manifest_dir.clone(),
24, // scan every 24 hours
healing_runtime.clone(),
)?;
let (_heal_shutdown_tx, heal_shutdown_rx) = watch::channel(false);
tokio::spawn(async move {
@@ -278,11 +299,16 @@ impl StorageServer {
cluster_state,
transport,
erasure_config,
std::path::PathBuf::from(&config.storage.directory),
drive_paths,
drive_manager,
healing_runtime,
manifest_dir,
buckets_dir,
)?;
distributed_store.initialize_runtime_stats().await;
let store = Arc::new(StorageBackend::Clustered(distributed_store));
if !config.server.silent {
@@ -379,6 +405,7 @@ async fn handle_request(
req: Request<Incoming>,
store: Arc<StorageBackend>,
config: SmartStorageConfig,
auth_runtime: Arc<auth::RuntimeCredentialStore>,
policy_store: Arc<PolicyStore>,
) -> Result<Response<BoxBody>, std::convert::Infallible> {
let request_id = Uuid::new_v4().to_string();
@@ -396,7 +423,7 @@ async fn handle_request(
let request_ctx = action::resolve_action(&req);
// Step 2: Auth + policy pipeline
if config.auth.enabled {
if auth_runtime.enabled() {
// Attempt authentication
let identity = {
let has_auth_header = req
@@ -407,7 +434,8 @@ async fn handle_request(
.unwrap_or(false);
if has_auth_header {
match auth::verify_request(&req, &config) {
let credentials = auth_runtime.snapshot_credentials().await;
match auth::verify_request(&req, &credentials) {
Ok(id) => Some(id),
Err(e) => {
tracing::warn!("Auth failed: {}", e.message);