Files
smartstorage/rust/src/cluster/placement.rs

141 lines
4.8 KiB
Rust

use xxhash_rust::xxh64::xxh64;
/// Determines which erasure set an object belongs to, based on consistent hashing.
///
/// Uses xxhash64 of "{bucket}/{key}" to deterministically map objects to erasure sets.
/// This is stateless — any node can independently compute the placement.
pub fn erasure_set_for_object(bucket: &str, key: &str, num_erasure_sets: u32) -> u32 {
if num_erasure_sets == 0 {
return 0;
}
let hash_input = format!("{}/{}", bucket, key);
let hash = xxh64(hash_input.as_bytes(), 0);
(hash % num_erasure_sets as u64) as u32
}
/// Represents a drive location within the cluster topology.
#[derive(Debug, Clone)]
pub struct DriveLocation {
pub node_id: String,
pub drive_index: u32,
}
/// An erasure set: a fixed group of drives that together store one complete
/// set of shards for any object placed on them.
#[derive(Debug, Clone)]
pub struct ErasureSet {
pub set_id: u32,
/// Ordered drives: index = shard_index
pub drives: Vec<DriveLocation>,
}
/// Form erasure sets from the available drives across all nodes.
///
/// Interleaves drives from different nodes for fault isolation:
/// e.g., with 3 nodes x 4 drives and total_shards=6:
/// Set 0: N0-D0, N1-D0, N2-D0, N0-D1, N1-D1, N2-D1
/// Set 1: N0-D2, N1-D2, N2-D2, N0-D3, N1-D3, N2-D3
pub fn form_erasure_sets(
nodes: &[(String, u32)], // (node_id, drive_count)
total_shards: usize,
) -> Vec<ErasureSet> {
// Collect all drives as (node_id, drive_index), interleaved by node
let max_drives = nodes.iter().map(|(_, count)| *count).max().unwrap_or(0) as usize;
let mut all_drives: Vec<DriveLocation> = Vec::new();
for drive_idx in 0..max_drives {
for (node_id, drive_count) in nodes {
if (drive_idx as u32) < *drive_count {
all_drives.push(DriveLocation {
node_id: node_id.clone(),
drive_index: drive_idx as u32,
});
}
}
}
// Form sets of total_shards drives each
let num_sets = all_drives.len() / total_shards;
let mut sets = Vec::with_capacity(num_sets);
for set_idx in 0..num_sets {
let start = set_idx * total_shards;
let end = start + total_shards;
let drives = all_drives[start..end].to_vec();
sets.push(ErasureSet {
set_id: set_idx as u32,
drives,
});
}
sets
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erasure_set_assignment_deterministic() {
let set_a = erasure_set_for_object("mybucket", "mykey", 4);
let set_b = erasure_set_for_object("mybucket", "mykey", 4);
assert_eq!(set_a, set_b);
}
#[test]
fn test_erasure_set_distribution() {
// Check that objects are distributed across sets
let num_sets = 4u32;
let mut counts = vec![0u32; num_sets as usize];
for i in 0..1000 {
let key = format!("key-{}", i);
let set = erasure_set_for_object("bucket", &key, num_sets);
assert!(set < num_sets);
counts[set as usize] += 1;
}
// Each set should have some objects (not all in one set)
for count in &counts {
assert!(*count > 100, "Expected >100, got {}", count);
}
}
#[test]
fn test_form_erasure_sets_3x4() {
// 3 nodes, 4 drives each, 6 shards per set => 2 sets
let nodes = vec![
("node1".to_string(), 4),
("node2".to_string(), 4),
("node3".to_string(), 4),
];
let sets = form_erasure_sets(&nodes, 6);
assert_eq!(sets.len(), 2);
// Set 0 should interleave across nodes
let set0_nodes: Vec<&str> = sets[0].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set0_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Set 1 should also interleave
let set1_nodes: Vec<&str> = sets[1].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set1_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Drive indices should be different between sets
let set0_drives: Vec<u32> = sets[0].drives.iter().map(|d| d.drive_index).collect();
let set1_drives: Vec<u32> = sets[1].drives.iter().map(|d| d.drive_index).collect();
assert_eq!(set0_drives, vec![0, 0, 0, 1, 1, 1]);
assert_eq!(set1_drives, vec![2, 2, 2, 3, 3, 3]);
}
#[test]
fn test_form_erasure_sets_remainder() {
// 2 nodes, 3 drives each, 4 shards => 1 set (2 drives left over)
let nodes = vec![
("a".to_string(), 3),
("b".to_string(), 3),
];
let sets = form_erasure_sets(&nodes, 4);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].drives.len(), 4);
}
}