use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use bson::{Bson, Document}; use tracing::{debug, trace}; use rustdb_query::get_nested_value; use crate::error::IndexError; /// Options for creating an index. #[derive(Debug, Clone, Default)] pub struct IndexOptions { /// Custom name for the index. Auto-generated if None. pub name: Option, /// Whether the index enforces unique values. pub unique: bool, /// Whether the index skips documents missing the indexed field. pub sparse: bool, /// TTL in seconds (for date fields). None means no expiry. pub expire_after_seconds: Option, } /// Metadata about an existing index. #[derive(Debug, Clone)] pub struct IndexInfo { /// Index version (always 2). pub v: i32, /// The key specification document (e.g. {"name": 1}). pub key: Document, /// The index name. pub name: String, /// Whether the index enforces uniqueness. pub unique: bool, /// Whether the index is sparse. pub sparse: bool, /// TTL expiry in seconds, if set. pub expire_after_seconds: Option, } /// Internal data for a single index. struct IndexData { /// The key specification (field -> direction). key: Document, /// The index name. name: String, /// Whether uniqueness is enforced. unique: bool, /// Whether the index is sparse. sparse: bool, /// TTL in seconds. expire_after_seconds: Option, /// B-tree for range queries: serialized key bytes -> set of document _id hex strings. btree: BTreeMap, BTreeSet>, /// Hash map for equality lookups: serialized key bytes -> set of document _id hex strings. hash: HashMap, HashSet>, } impl IndexData { fn new(key: Document, name: String, unique: bool, sparse: bool, expire_after_seconds: Option) -> Self { Self { key, name, unique, sparse, expire_after_seconds, btree: BTreeMap::new(), hash: HashMap::new(), } } fn to_info(&self) -> IndexInfo { IndexInfo { v: 2, key: self.key.clone(), name: self.name.clone(), unique: self.unique, sparse: self.sparse, expire_after_seconds: self.expire_after_seconds, } } } /// Manages indexes for a single collection. pub struct IndexEngine { /// All indexes keyed by name. indexes: HashMap, } impl IndexEngine { /// Create a new IndexEngine with the default `_id_` index. pub fn new() -> Self { let mut indexes = HashMap::new(); let id_key = bson::doc! { "_id": 1 }; let id_index = IndexData::new(id_key, "_id_".to_string(), true, false, None); indexes.insert("_id_".to_string(), id_index); Self { indexes } } /// Create a new index. Returns the index name. pub fn create_index(&mut self, key: Document, options: IndexOptions) -> Result { if key.is_empty() { return Err(IndexError::InvalidIndex("Index key must have at least one field".to_string())); } let name = options.name.unwrap_or_else(|| Self::generate_index_name(&key)); if self.indexes.contains_key(&name) { debug!(index_name = %name, "Index already exists, returning existing"); return Ok(name); } debug!(index_name = %name, unique = options.unique, sparse = options.sparse, "Creating index"); let index_data = IndexData::new( key, name.clone(), options.unique, options.sparse, options.expire_after_seconds, ); self.indexes.insert(name.clone(), index_data); Ok(name) } /// Drop an index by name. Returns true if the index existed. /// Cannot drop the `_id_` index. pub fn drop_index(&mut self, name: &str) -> Result { if name == "_id_" { return Err(IndexError::ProtectedIndex("_id_".to_string())); } let existed = self.indexes.remove(name).is_some(); if existed { debug!(index_name = %name, "Dropped index"); } Ok(existed) } /// Drop all indexes except `_id_`. pub fn drop_all_indexes(&mut self) { self.indexes.retain(|name, _| name == "_id_"); debug!("Dropped all non-_id indexes"); } /// List all indexes. pub fn list_indexes(&self) -> Vec { self.indexes.values().map(|idx| idx.to_info()).collect() } /// Check whether an index with the given name exists. pub fn index_exists(&self, name: &str) -> bool { self.indexes.contains_key(name) } /// Check unique constraints for a document without modifying the index. /// Returns Ok(()) if no conflict, Err(DuplicateKey) if a unique constraint /// would be violated. This is a read-only check (immutable &self). pub fn check_unique_constraints(&self, doc: &Document) -> Result<(), IndexError> { for idx in self.indexes.values() { if idx.unique { let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse); if let Some(ref kb) = key_bytes { if let Some(existing_ids) = idx.hash.get(kb) { if !existing_ids.is_empty() { return Err(IndexError::DuplicateKey { index: idx.name.clone(), key: format!("{:?}", kb), }); } } } } } Ok(()) } /// Check unique constraints for an update, excluding the document being updated. /// Returns Ok(()) if no conflict. This is a read-only check (immutable &self). pub fn check_unique_constraints_for_update( &self, old_doc: &Document, new_doc: &Document, ) -> Result<(), IndexError> { let doc_id = Self::extract_id(old_doc); for idx in self.indexes.values() { if idx.unique { let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse); if let Some(ref kb) = new_key_bytes { if let Some(existing_ids) = idx.hash.get(kb) { let has_conflict = existing_ids.iter().any(|id| *id != doc_id); if has_conflict { return Err(IndexError::DuplicateKey { index: idx.name.clone(), key: format!("{:?}", kb), }); } } } } } Ok(()) } /// Notify the engine that a document has been inserted. /// Checks unique constraints and updates all index structures. pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> { let doc_id = Self::extract_id(doc); // First pass: check unique constraints for idx in self.indexes.values() { if idx.unique { let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse); if let Some(ref kb) = key_bytes { if let Some(existing_ids) = idx.hash.get(kb) { if !existing_ids.is_empty() { return Err(IndexError::DuplicateKey { index: idx.name.clone(), key: format!("{:?}", kb), }); } } } } } // Second pass: insert into all indexes for idx in self.indexes.values_mut() { let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse); if let Some(kb) = key_bytes { idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone()); idx.hash.entry(kb).or_default().insert(doc_id.clone()); } } trace!(doc_id = %doc_id, "Indexed document on insert"); Ok(()) } /// Notify the engine that a document has been updated. pub fn on_update(&mut self, old_doc: &Document, new_doc: &Document) -> Result<(), IndexError> { let doc_id = Self::extract_id(old_doc); // Check unique constraints for the new document (excluding the document itself) for idx in self.indexes.values() { if idx.unique { let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse); if let Some(ref kb) = new_key_bytes { if let Some(existing_ids) = idx.hash.get(kb) { // If there are existing entries that aren't this document, it's a conflict let other_ids: HashSet<_> = existing_ids.iter() .filter(|id| **id != doc_id) .collect(); if !other_ids.is_empty() { return Err(IndexError::DuplicateKey { index: idx.name.clone(), key: format!("{:?}", kb), }); } } } } } // Remove old entries and insert new ones for idx in self.indexes.values_mut() { let old_key_bytes = Self::extract_key_bytes(old_doc, &idx.key, idx.sparse); if let Some(ref kb) = old_key_bytes { if let Some(set) = idx.btree.get_mut(kb) { set.remove(&doc_id); if set.is_empty() { idx.btree.remove(kb); } } if let Some(set) = idx.hash.get_mut(kb) { set.remove(&doc_id); if set.is_empty() { idx.hash.remove(kb); } } } let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse); if let Some(kb) = new_key_bytes { idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone()); idx.hash.entry(kb).or_default().insert(doc_id.clone()); } } trace!(doc_id = %doc_id, "Re-indexed document on update"); Ok(()) } /// Notify the engine that a document has been deleted. pub fn on_delete(&mut self, doc: &Document) { let doc_id = Self::extract_id(doc); for idx in self.indexes.values_mut() { let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse); if let Some(ref kb) = key_bytes { if let Some(set) = idx.btree.get_mut(kb) { set.remove(&doc_id); if set.is_empty() { idx.btree.remove(kb); } } if let Some(set) = idx.hash.get_mut(kb) { set.remove(&doc_id); if set.is_empty() { idx.hash.remove(kb); } } } } trace!(doc_id = %doc_id, "Removed document from indexes"); } /// Attempt to find candidate document IDs using indexes for the given filter. /// Returns `None` if no suitable index is found (meaning a COLLSCAN is needed). /// Returns `Some(set)` with candidate IDs that should be checked against the full filter. pub fn find_candidate_ids(&self, filter: &Document) -> Option> { if filter.is_empty() { return None; } // Try each index to see which can serve this query let mut best_candidates: Option> = None; let mut best_score: f64 = 0.0; for idx in self.indexes.values() { if let Some((candidates, score)) = self.try_index_lookup(idx, filter) { if score > best_score { best_score = score; best_candidates = Some(candidates); } } } best_candidates } /// Rebuild all indexes from a full set of documents. pub fn rebuild_from_documents(&mut self, docs: &[Document]) { // Clear all index data for idx in self.indexes.values_mut() { idx.btree.clear(); idx.hash.clear(); } // Re-index all documents for doc in docs { let doc_id = Self::extract_id(doc); for idx in self.indexes.values_mut() { let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse); if let Some(kb) = key_bytes { idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone()); idx.hash.entry(kb).or_default().insert(doc_id.clone()); } } } debug!(num_docs = docs.len(), num_indexes = self.indexes.len(), "Rebuilt all indexes"); } // ---- Internal helpers ---- /// Try to use an index for the given filter. Returns candidate IDs and a score. fn try_index_lookup(&self, idx: &IndexData, filter: &Document) -> Option<(HashSet, f64)> { let index_fields: Vec = idx.key.keys().map(|k| k.to_string()).collect(); // Check if the filter uses fields covered by this index let mut matched_any = false; let mut result_set: Option> = None; let mut total_score: f64 = 0.0; for field in &index_fields { if let Some(condition) = filter.get(field) { matched_any = true; let (candidates, score) = self.lookup_field(idx, field, condition); total_score += score; // Add unique bonus if idx.unique { total_score += 0.5; } result_set = Some(match result_set { Some(existing) => existing.intersection(&candidates).cloned().collect(), None => candidates, }); } } if !matched_any { return None; } result_set.map(|rs| (rs, total_score)) } /// Look up candidates for a single field condition in an index. fn lookup_field(&self, idx: &IndexData, field: &str, condition: &Bson) -> (HashSet, f64) { match condition { // Equality match Bson::Document(cond_doc) if Self::has_operators(cond_doc) => { self.lookup_operator(idx, field, cond_doc) } // Direct equality _ => { let key_bytes = Self::bson_to_key_bytes(condition); let candidates = idx.hash .get(&key_bytes) .cloned() .unwrap_or_default(); (candidates, 2.0) // equality score } } } /// Handle operator-based lookups ($eq, $in, $gt, $lt, etc.). fn lookup_operator(&self, idx: &IndexData, field: &str, operators: &Document) -> (HashSet, f64) { let mut candidates = HashSet::new(); let mut score: f64 = 0.0; let mut has_range = false; for (op, value) in operators { match op.as_str() { "$eq" => { let key_bytes = Self::bson_to_key_bytes(value); if let Some(ids) = idx.hash.get(&key_bytes) { candidates = if candidates.is_empty() { ids.clone() } else { candidates.intersection(ids).cloned().collect() }; } score += 2.0; } "$in" => { if let Bson::Array(arr) = value { let mut in_candidates = HashSet::new(); for v in arr { let key_bytes = Self::bson_to_key_bytes(v); if let Some(ids) = idx.hash.get(&key_bytes) { in_candidates.extend(ids.iter().cloned()); } } candidates = if candidates.is_empty() { in_candidates } else { candidates.intersection(&in_candidates).cloned().collect() }; score += 1.5; } } "$gt" | "$gte" | "$lt" | "$lte" => { let range_candidates = self.range_scan(idx, field, op.as_str(), value); candidates = if candidates.is_empty() && !has_range { range_candidates } else { candidates.intersection(&range_candidates).cloned().collect() }; has_range = true; score += 1.0; } _ => { // Operators like $ne, $nin, $exists, $regex are not efficiently indexable // Return all indexed IDs for this index } } } // If we only had non-indexable operators, return empty with 0 score if score == 0.0 { return (HashSet::new(), 0.0); } (candidates, score) } /// Perform a range scan on the B-tree index. fn range_scan(&self, idx: &IndexData, _field: &str, op: &str, bound: &Bson) -> HashSet { let bound_bytes = Self::bson_to_key_bytes(bound); let mut result = HashSet::new(); match op { "$gt" => { use std::ops::Bound; for (_key, ids) in idx.btree.range((Bound::Excluded(bound_bytes), Bound::Unbounded)) { result.extend(ids.iter().cloned()); } } "$gte" => { for (_key, ids) in idx.btree.range(bound_bytes..) { result.extend(ids.iter().cloned()); } } "$lt" => { for (_key, ids) in idx.btree.range(..bound_bytes) { result.extend(ids.iter().cloned()); } } "$lte" => { for (_key, ids) in idx.btree.range(..=bound_bytes) { result.extend(ids.iter().cloned()); } } _ => {} } result } /// Generate an index name from the key spec (e.g. {"name": 1, "age": -1} -> "name_1_age_-1"). fn generate_index_name(key: &Document) -> String { key.iter() .map(|(field, dir)| { let dir_val = match dir { Bson::Int32(n) => n.to_string(), Bson::Int64(n) => n.to_string(), Bson::String(s) => s.clone(), _ => "1".to_string(), }; format!("{}_{}", field, dir_val) }) .collect::>() .join("_") } /// Extract the `_id` field from a document as a hex string. fn extract_id(doc: &Document) -> String { match doc.get("_id") { Some(Bson::ObjectId(oid)) => oid.to_hex(), Some(Bson::String(s)) => s.clone(), Some(other) => format!("{}", other), None => String::new(), } } /// Extract the index key bytes from a document for a given key specification. /// Returns `None` if the document should be skipped (sparse index with missing fields). fn extract_key_bytes(doc: &Document, key_spec: &Document, sparse: bool) -> Option> { let fields: Vec<(&str, &Bson)> = key_spec.iter().map(|(k, v)| (k.as_str(), v)).collect(); if fields.len() == 1 { // Single-field index let field = fields[0].0; let value = Self::resolve_field_value(doc, field); if sparse && value.is_none() { return None; } let val = value.unwrap_or(Bson::Null); Some(Self::bson_to_key_bytes(&val)) } else { // Compound index: concatenate field values let mut all_null = true; let mut compound_bytes = Vec::new(); for (field, _dir) in &fields { let value = Self::resolve_field_value(doc, field); if value.is_some() { all_null = false; } let val = value.unwrap_or(Bson::Null); let field_bytes = Self::bson_to_key_bytes(&val); // Length-prefix each field for unambiguous concatenation compound_bytes.extend_from_slice(&(field_bytes.len() as u32).to_be_bytes()); compound_bytes.extend_from_slice(&field_bytes); } if sparse && all_null { return None; } Some(compound_bytes) } } /// Resolve a field value from a document, supporting dot notation. fn resolve_field_value(doc: &Document, field: &str) -> Option { if field.contains('.') { get_nested_value(doc, field) } else { doc.get(field).cloned() } } /// Serialize a BSON value to bytes for use as an index key. fn bson_to_key_bytes(value: &Bson) -> Vec { // Use BSON raw serialization for consistent byte representation. // We wrap in a document since raw BSON requires a top-level document. let wrapper = bson::doc! { "k": value.clone() }; let raw = bson::to_vec(&wrapper).unwrap_or_default(); raw } fn has_operators(doc: &Document) -> bool { doc.keys().any(|k| k.starts_with('$')) } } impl Default for IndexEngine { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; use bson::oid::ObjectId; fn make_doc(name: &str, age: i32) -> Document { bson::doc! { "_id": ObjectId::new(), "name": name, "age": age, } } #[test] fn test_default_id_index() { let engine = IndexEngine::new(); assert!(engine.index_exists("_id_")); assert_eq!(engine.list_indexes().len(), 1); } #[test] fn test_create_and_drop_index() { let mut engine = IndexEngine::new(); let name = engine.create_index( bson::doc! { "name": 1 }, IndexOptions::default(), ).unwrap(); assert_eq!(name, "name_1"); assert!(engine.index_exists("name_1")); assert!(engine.drop_index("name_1").unwrap()); assert!(!engine.index_exists("name_1")); } #[test] fn test_cannot_drop_id_index() { let mut engine = IndexEngine::new(); let result = engine.drop_index("_id_"); assert!(result.is_err()); } #[test] fn test_unique_constraint() { let mut engine = IndexEngine::new(); engine.create_index( bson::doc! { "email": 1 }, IndexOptions { unique: true, ..Default::default() }, ).unwrap(); let doc1 = bson::doc! { "_id": ObjectId::new(), "email": "a@b.com" }; let doc2 = bson::doc! { "_id": ObjectId::new(), "email": "a@b.com" }; engine.on_insert(&doc1).unwrap(); let result = engine.on_insert(&doc2); assert!(result.is_err()); } #[test] fn test_find_candidates_equality() { let mut engine = IndexEngine::new(); engine.create_index( bson::doc! { "name": 1 }, IndexOptions::default(), ).unwrap(); let doc1 = make_doc("Alice", 30); let doc2 = make_doc("Bob", 25); let doc3 = make_doc("Alice", 35); engine.on_insert(&doc1).unwrap(); engine.on_insert(&doc2).unwrap(); engine.on_insert(&doc3).unwrap(); let filter = bson::doc! { "name": "Alice" }; let candidates = engine.find_candidate_ids(&filter); assert!(candidates.is_some()); assert_eq!(candidates.unwrap().len(), 2); } #[test] fn test_on_delete() { let mut engine = IndexEngine::new(); engine.create_index( bson::doc! { "name": 1 }, IndexOptions::default(), ).unwrap(); let doc = make_doc("Alice", 30); engine.on_insert(&doc).unwrap(); let filter = bson::doc! { "name": "Alice" }; assert!(engine.find_candidate_ids(&filter).is_some()); engine.on_delete(&doc); let candidates = engine.find_candidate_ids(&filter); assert!(candidates.is_some()); assert!(candidates.unwrap().is_empty()); } #[test] fn test_rebuild_from_documents() { let mut engine = IndexEngine::new(); engine.create_index( bson::doc! { "name": 1 }, IndexOptions::default(), ).unwrap(); let docs = vec![ make_doc("Alice", 30), make_doc("Bob", 25), ]; engine.rebuild_from_documents(&docs); let filter = bson::doc! { "name": "Alice" }; let candidates = engine.find_candidate_ids(&filter); assert!(candidates.is_some()); assert_eq!(candidates.unwrap().len(), 1); } #[test] fn test_drop_all_indexes() { let mut engine = IndexEngine::new(); engine.create_index(bson::doc! { "a": 1 }, IndexOptions::default()).unwrap(); engine.create_index(bson::doc! { "b": 1 }, IndexOptions::default()).unwrap(); assert_eq!(engine.list_indexes().len(), 3); engine.drop_all_indexes(); assert_eq!(engine.list_indexes().len(), 1); assert!(engine.index_exists("_id_")); } }