feat(cluster): add clustered storage backend with QUIC transport, erasure coding, and shard management
This commit is contained in:
140
rust/src/cluster/placement.rs
Normal file
140
rust/src/cluster/placement.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
use xxhash_rust::xxh64::xxh64;
|
||||
|
||||
/// Determines which erasure set an object belongs to, based on consistent hashing.
|
||||
///
|
||||
/// Uses xxhash64 of "{bucket}/{key}" to deterministically map objects to erasure sets.
|
||||
/// This is stateless — any node can independently compute the placement.
|
||||
pub fn erasure_set_for_object(bucket: &str, key: &str, num_erasure_sets: u32) -> u32 {
|
||||
if num_erasure_sets == 0 {
|
||||
return 0;
|
||||
}
|
||||
let hash_input = format!("{}/{}", bucket, key);
|
||||
let hash = xxh64(hash_input.as_bytes(), 0);
|
||||
(hash % num_erasure_sets as u64) as u32
|
||||
}
|
||||
|
||||
/// Represents a drive location within the cluster topology.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DriveLocation {
|
||||
pub node_id: String,
|
||||
pub drive_index: u32,
|
||||
}
|
||||
|
||||
/// An erasure set: a fixed group of drives that together store one complete
|
||||
/// set of shards for any object placed on them.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ErasureSet {
|
||||
pub set_id: u32,
|
||||
/// Ordered drives: index = shard_index
|
||||
pub drives: Vec<DriveLocation>,
|
||||
}
|
||||
|
||||
/// Form erasure sets from the available drives across all nodes.
|
||||
///
|
||||
/// Interleaves drives from different nodes for fault isolation:
|
||||
/// e.g., with 3 nodes x 4 drives and total_shards=6:
|
||||
/// Set 0: N0-D0, N1-D0, N2-D0, N0-D1, N1-D1, N2-D1
|
||||
/// Set 1: N0-D2, N1-D2, N2-D2, N0-D3, N1-D3, N2-D3
|
||||
pub fn form_erasure_sets(
|
||||
nodes: &[(String, u32)], // (node_id, drive_count)
|
||||
total_shards: usize,
|
||||
) -> Vec<ErasureSet> {
|
||||
// Collect all drives as (node_id, drive_index), interleaved by node
|
||||
let max_drives = nodes.iter().map(|(_, count)| *count).max().unwrap_or(0) as usize;
|
||||
let mut all_drives: Vec<DriveLocation> = Vec::new();
|
||||
|
||||
for drive_idx in 0..max_drives {
|
||||
for (node_id, drive_count) in nodes {
|
||||
if (drive_idx as u32) < *drive_count {
|
||||
all_drives.push(DriveLocation {
|
||||
node_id: node_id.clone(),
|
||||
drive_index: drive_idx as u32,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Form sets of total_shards drives each
|
||||
let num_sets = all_drives.len() / total_shards;
|
||||
let mut sets = Vec::with_capacity(num_sets);
|
||||
|
||||
for set_idx in 0..num_sets {
|
||||
let start = set_idx * total_shards;
|
||||
let end = start + total_shards;
|
||||
let drives = all_drives[start..end].to_vec();
|
||||
|
||||
sets.push(ErasureSet {
|
||||
set_id: set_idx as u32,
|
||||
drives,
|
||||
});
|
||||
}
|
||||
|
||||
sets
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_erasure_set_assignment_deterministic() {
|
||||
let set_a = erasure_set_for_object("mybucket", "mykey", 4);
|
||||
let set_b = erasure_set_for_object("mybucket", "mykey", 4);
|
||||
assert_eq!(set_a, set_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_erasure_set_distribution() {
|
||||
// Check that objects are distributed across sets
|
||||
let num_sets = 4u32;
|
||||
let mut counts = vec![0u32; num_sets as usize];
|
||||
for i in 0..1000 {
|
||||
let key = format!("key-{}", i);
|
||||
let set = erasure_set_for_object("bucket", &key, num_sets);
|
||||
assert!(set < num_sets);
|
||||
counts[set as usize] += 1;
|
||||
}
|
||||
// Each set should have some objects (not all in one set)
|
||||
for count in &counts {
|
||||
assert!(*count > 100, "Expected >100, got {}", count);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_erasure_sets_3x4() {
|
||||
// 3 nodes, 4 drives each, 6 shards per set => 2 sets
|
||||
let nodes = vec![
|
||||
("node1".to_string(), 4),
|
||||
("node2".to_string(), 4),
|
||||
("node3".to_string(), 4),
|
||||
];
|
||||
let sets = form_erasure_sets(&nodes, 6);
|
||||
assert_eq!(sets.len(), 2);
|
||||
|
||||
// Set 0 should interleave across nodes
|
||||
let set0_nodes: Vec<&str> = sets[0].drives.iter().map(|d| d.node_id.as_str()).collect();
|
||||
assert_eq!(set0_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
|
||||
|
||||
// Set 1 should also interleave
|
||||
let set1_nodes: Vec<&str> = sets[1].drives.iter().map(|d| d.node_id.as_str()).collect();
|
||||
assert_eq!(set1_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
|
||||
|
||||
// Drive indices should be different between sets
|
||||
let set0_drives: Vec<u32> = sets[0].drives.iter().map(|d| d.drive_index).collect();
|
||||
let set1_drives: Vec<u32> = sets[1].drives.iter().map(|d| d.drive_index).collect();
|
||||
assert_eq!(set0_drives, vec![0, 0, 0, 1, 1, 1]);
|
||||
assert_eq!(set1_drives, vec![2, 2, 2, 3, 3, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_erasure_sets_remainder() {
|
||||
// 2 nodes, 3 drives each, 4 shards => 1 set (2 drives left over)
|
||||
let nodes = vec![
|
||||
("a".to_string(), 3),
|
||||
("b".to_string(), 3),
|
||||
];
|
||||
let sets = form_erasure_sets(&nodes, 4);
|
||||
assert_eq!(sets.len(), 1);
|
||||
assert_eq!(sets[0].drives.len(), 4);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user