BREAKING CHANGE(core): replace the TypeScript database engine with a Rust-backed embedded server and bridge

2026-03-26 19:48:27 +00:00
parent 8ec2046908
commit e23a951dbe
106 changed files with 11567 additions and 10678 deletions
@@ -0,0 +1,15 @@
+[package]
+name = "rustdb-index"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+authors.workspace = true
+description = "MongoDB-compatible B-tree and hash index engine with query planner for RustDb"
+
+[dependencies]
+bson = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+tracing = { workspace = true }
+rustdb-query = { workspace = true }
@@ -0,0 +1,691 @@
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+
+use bson::{Bson, Document};
+use tracing::{debug, trace};
+
+use rustdb_query::get_nested_value;
+
+use crate::error::IndexError;
+
+/// Options for creating an index.
+#[derive(Debug, Clone, Default)]
+pub struct IndexOptions {
+    /// Custom name for the index. Auto-generated if None.
+    pub name: Option<String>,
+    /// Whether the index enforces unique values.
+    pub unique: bool,
+    /// Whether the index skips documents missing the indexed field.
+    pub sparse: bool,
+    /// TTL in seconds (for date fields). None means no expiry.
+    pub expire_after_seconds: Option<u64>,
+}
+
+/// Metadata about an existing index.
+#[derive(Debug, Clone)]
+pub struct IndexInfo {
+    /// Index version (always 2).
+    pub v: i32,
+    /// The key specification document (e.g. {"name": 1}).
+    pub key: Document,
+    /// The index name.
+    pub name: String,
+    /// Whether the index enforces uniqueness.
+    pub unique: bool,
+    /// Whether the index is sparse.
+    pub sparse: bool,
+    /// TTL expiry in seconds, if set.
+    pub expire_after_seconds: Option<u64>,
+}
+
+/// Internal data for a single index.
+struct IndexData {
+    /// The key specification (field -> direction).
+    key: Document,
+    /// The index name.
+    name: String,
+    /// Whether uniqueness is enforced.
+    unique: bool,
+    /// Whether the index is sparse.
+    sparse: bool,
+    /// TTL in seconds.
+    expire_after_seconds: Option<u64>,
+    /// B-tree for range queries: serialized key bytes -> set of document _id hex strings.
+    btree: BTreeMap<Vec<u8>, BTreeSet<String>>,
+    /// Hash map for equality lookups: serialized key bytes -> set of document _id hex strings.
+    hash: HashMap<Vec<u8>, HashSet<String>>,
+}
+
+impl IndexData {
+    fn new(key: Document, name: String, unique: bool, sparse: bool, expire_after_seconds: Option<u64>) -> Self {
+        Self {
+            key,
+            name,
+            unique,
+            sparse,
+            expire_after_seconds,
+            btree: BTreeMap::new(),
+            hash: HashMap::new(),
+        }
+    }
+
+    fn to_info(&self) -> IndexInfo {
+        IndexInfo {
+            v: 2,
+            key: self.key.clone(),
+            name: self.name.clone(),
+            unique: self.unique,
+            sparse: self.sparse,
+            expire_after_seconds: self.expire_after_seconds,
+        }
+    }
+}
+
+/// Manages indexes for a single collection.
+pub struct IndexEngine {
+    /// All indexes keyed by name.
+    indexes: HashMap<String, IndexData>,
+}
+
+impl IndexEngine {
+    /// Create a new IndexEngine with the default `_id_` index.
+    pub fn new() -> Self {
+        let mut indexes = HashMap::new();
+        let id_key = bson::doc! { "_id": 1 };
+        let id_index = IndexData::new(id_key, "_id_".to_string(), true, false, None);
+        indexes.insert("_id_".to_string(), id_index);
+        Self { indexes }
+    }
+
+    /// Create a new index. Returns the index name.
+    pub fn create_index(&mut self, key: Document, options: IndexOptions) -> Result<String, IndexError> {
+        if key.is_empty() {
+            return Err(IndexError::InvalidIndex("Index key must have at least one field".to_string()));
+        }
+
+        let name = options.name.unwrap_or_else(|| Self::generate_index_name(&key));
+
+        if self.indexes.contains_key(&name) {
+            debug!(index_name = %name, "Index already exists, returning existing");
+            return Ok(name);
+        }
+
+        debug!(index_name = %name, unique = options.unique, sparse = options.sparse, "Creating index");
+
+        let index_data = IndexData::new(
+            key,
+            name.clone(),
+            options.unique,
+            options.sparse,
+            options.expire_after_seconds,
+        );
+        self.indexes.insert(name.clone(), index_data);
+
+        Ok(name)
+    }
+
+    /// Drop an index by name. Returns true if the index existed.
+    /// Cannot drop the `_id_` index.
+    pub fn drop_index(&mut self, name: &str) -> Result<bool, IndexError> {
+        if name == "_id_" {
+            return Err(IndexError::ProtectedIndex("_id_".to_string()));
+        }
+
+        let existed = self.indexes.remove(name).is_some();
+        if existed {
+            debug!(index_name = %name, "Dropped index");
+        }
+        Ok(existed)
+    }
+
+    /// Drop all indexes except `_id_`.
+    pub fn drop_all_indexes(&mut self) {
+        self.indexes.retain(|name, _| name == "_id_");
+        debug!("Dropped all non-_id indexes");
+    }
+
+    /// List all indexes.
+    pub fn list_indexes(&self) -> Vec<IndexInfo> {
+        self.indexes.values().map(|idx| idx.to_info()).collect()
+    }
+
+    /// Check whether an index with the given name exists.
+    pub fn index_exists(&self, name: &str) -> bool {
+        self.indexes.contains_key(name)
+    }
+
+    /// Notify the engine that a document has been inserted.
+    /// Checks unique constraints and updates all index structures.
+    pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> {
+        let doc_id = Self::extract_id(doc);
+
+        // First pass: check unique constraints
+        for idx in self.indexes.values() {
+            if idx.unique {
+                let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
+                if let Some(ref kb) = key_bytes {
+                    if let Some(existing_ids) = idx.hash.get(kb) {
+                        if !existing_ids.is_empty() {
+                            return Err(IndexError::DuplicateKey {
+                                index: idx.name.clone(),
+                                key: format!("{:?}", kb),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+
+        // Second pass: insert into all indexes
+        for idx in self.indexes.values_mut() {
+            let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
+            if let Some(kb) = key_bytes {
+                idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone());
+                idx.hash.entry(kb).or_default().insert(doc_id.clone());
+            }
+        }
+
+        trace!(doc_id = %doc_id, "Indexed document on insert");
+        Ok(())
+    }
+
+    /// Notify the engine that a document has been updated.
+    pub fn on_update(&mut self, old_doc: &Document, new_doc: &Document) -> Result<(), IndexError> {
+        let doc_id = Self::extract_id(old_doc);
+
+        // Check unique constraints for the new document (excluding the document itself)
+        for idx in self.indexes.values() {
+            if idx.unique {
+                let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse);
+                if let Some(ref kb) = new_key_bytes {
+                    if let Some(existing_ids) = idx.hash.get(kb) {
+                        // If there are existing entries that aren't this document, it's a conflict
+                        let other_ids: HashSet<_> = existing_ids.iter()
+                            .filter(|id| **id != doc_id)
+                            .collect();
+                        if !other_ids.is_empty() {
+                            return Err(IndexError::DuplicateKey {
+                                index: idx.name.clone(),
+                                key: format!("{:?}", kb),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+
+        // Remove old entries and insert new ones
+        for idx in self.indexes.values_mut() {
+            let old_key_bytes = Self::extract_key_bytes(old_doc, &idx.key, idx.sparse);
+            if let Some(ref kb) = old_key_bytes {
+                if let Some(set) = idx.btree.get_mut(kb) {
+                    set.remove(&doc_id);
+                    if set.is_empty() {
+                        idx.btree.remove(kb);
+                    }
+                }
+                if let Some(set) = idx.hash.get_mut(kb) {
+                    set.remove(&doc_id);
+                    if set.is_empty() {
+                        idx.hash.remove(kb);
+                    }
+                }
+            }
+
+            let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse);
+            if let Some(kb) = new_key_bytes {
+                idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone());
+                idx.hash.entry(kb).or_default().insert(doc_id.clone());
+            }
+        }
+
+        trace!(doc_id = %doc_id, "Re-indexed document on update");
+        Ok(())
+    }
+
+    /// Notify the engine that a document has been deleted.
+    pub fn on_delete(&mut self, doc: &Document) {
+        let doc_id = Self::extract_id(doc);
+
+        for idx in self.indexes.values_mut() {
+            let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
+            if let Some(ref kb) = key_bytes {
+                if let Some(set) = idx.btree.get_mut(kb) {
+                    set.remove(&doc_id);
+                    if set.is_empty() {
+                        idx.btree.remove(kb);
+                    }
+                }
+                if let Some(set) = idx.hash.get_mut(kb) {
+                    set.remove(&doc_id);
+                    if set.is_empty() {
+                        idx.hash.remove(kb);
+                    }
+                }
+            }
+        }
+
+        trace!(doc_id = %doc_id, "Removed document from indexes");
+    }
+
+    /// Attempt to find candidate document IDs using indexes for the given filter.
+    /// Returns `None` if no suitable index is found (meaning a COLLSCAN is needed).
+    /// Returns `Some(set)` with candidate IDs that should be checked against the full filter.
+    pub fn find_candidate_ids(&self, filter: &Document) -> Option<HashSet<String>> {
+        if filter.is_empty() {
+            return None;
+        }
+
+        // Try each index to see which can serve this query
+        let mut best_candidates: Option<HashSet<String>> = None;
+        let mut best_score: f64 = 0.0;
+
+        for idx in self.indexes.values() {
+            if let Some((candidates, score)) = self.try_index_lookup(idx, filter) {
+                if score > best_score {
+                    best_score = score;
+                    best_candidates = Some(candidates);
+                }
+            }
+        }
+
+        best_candidates
+    }
+
+    /// Rebuild all indexes from a full set of documents.
+    pub fn rebuild_from_documents(&mut self, docs: &[Document]) {
+        // Clear all index data
+        for idx in self.indexes.values_mut() {
+            idx.btree.clear();
+            idx.hash.clear();
+        }
+
+        // Re-index all documents
+        for doc in docs {
+            let doc_id = Self::extract_id(doc);
+            for idx in self.indexes.values_mut() {
+                let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
+                if let Some(kb) = key_bytes {
+                    idx.btree.entry(kb.clone()).or_default().insert(doc_id.clone());
+                    idx.hash.entry(kb).or_default().insert(doc_id.clone());
+                }
+            }
+        }
+
+        debug!(num_docs = docs.len(), num_indexes = self.indexes.len(), "Rebuilt all indexes");
+    }
+
+    // ---- Internal helpers ----
+
+    /// Try to use an index for the given filter. Returns candidate IDs and a score.
+    fn try_index_lookup(&self, idx: &IndexData, filter: &Document) -> Option<(HashSet<String>, f64)> {
+        let index_fields: Vec<String> = idx.key.keys().map(|k| k.to_string()).collect();
+
+        // Check if the filter uses fields covered by this index
+        let mut matched_any = false;
+        let mut result_set: Option<HashSet<String>> = None;
+        let mut total_score: f64 = 0.0;
+
+        for field in &index_fields {
+            if let Some(condition) = filter.get(field) {
+                matched_any = true;
+
+                let (candidates, score) = self.lookup_field(idx, field, condition);
+                total_score += score;
+
+                // Add unique bonus
+                if idx.unique {
+                    total_score += 0.5;
+                }
+
+                result_set = Some(match result_set {
+                    Some(existing) => existing.intersection(&candidates).cloned().collect(),
+                    None => candidates,
+                });
+            }
+        }
+
+        if !matched_any {
+            return None;
+        }
+
+        result_set.map(|rs| (rs, total_score))
+    }
+
+    /// Look up candidates for a single field condition in an index.
+    fn lookup_field(&self, idx: &IndexData, field: &str, condition: &Bson) -> (HashSet<String>, f64) {
+        match condition {
+            // Equality match
+            Bson::Document(cond_doc) if Self::has_operators(cond_doc) => {
+                self.lookup_operator(idx, field, cond_doc)
+            }
+            // Direct equality
+            _ => {
+                let key_bytes = Self::bson_to_key_bytes(condition);
+                let candidates = idx.hash
+                    .get(&key_bytes)
+                    .cloned()
+                    .unwrap_or_default();
+                (candidates, 2.0) // equality score
+            }
+        }
+    }
+
+    /// Handle operator-based lookups ($eq, $in, $gt, $lt, etc.).
+    fn lookup_operator(&self, idx: &IndexData, field: &str, operators: &Document) -> (HashSet<String>, f64) {
+        let mut candidates = HashSet::new();
+        let mut score: f64 = 0.0;
+        let mut has_range = false;
+
+        for (op, value) in operators {
+            match op.as_str() {
+                "$eq" => {
+                    let key_bytes = Self::bson_to_key_bytes(value);
+                    if let Some(ids) = idx.hash.get(&key_bytes) {
+                        candidates = if candidates.is_empty() {
+                            ids.clone()
+                        } else {
+                            candidates.intersection(ids).cloned().collect()
+                        };
+                    }
+                    score += 2.0;
+                }
+                "$in" => {
+                    if let Bson::Array(arr) = value {
+                        let mut in_candidates = HashSet::new();
+                        for v in arr {
+                            let key_bytes = Self::bson_to_key_bytes(v);
+                            if let Some(ids) = idx.hash.get(&key_bytes) {
+                                in_candidates.extend(ids.iter().cloned());
+                            }
+                        }
+                        candidates = if candidates.is_empty() {
+                            in_candidates
+                        } else {
+                            candidates.intersection(&in_candidates).cloned().collect()
+                        };
+                        score += 1.5;
+                    }
+                }
+                "$gt" | "$gte" | "$lt" | "$lte" => {
+                    let range_candidates = self.range_scan(idx, field, op.as_str(), value);
+                    candidates = if candidates.is_empty() && !has_range {
+                        range_candidates
+                    } else {
+                        candidates.intersection(&range_candidates).cloned().collect()
+                    };
+                    has_range = true;
+                    score += 1.0;
+                }
+                _ => {
+                    // Operators like $ne, $nin, $exists, $regex are not efficiently indexable
+                    // Return all indexed IDs for this index
+                }
+            }
+        }
+
+        // If we only had non-indexable operators, return empty with 0 score
+        if score == 0.0 {
+            return (HashSet::new(), 0.0);
+        }
+
+        (candidates, score)
+    }
+
+    /// Perform a range scan on the B-tree index.
+    fn range_scan(&self, idx: &IndexData, _field: &str, op: &str, bound: &Bson) -> HashSet<String> {
+        let bound_bytes = Self::bson_to_key_bytes(bound);
+        let mut result = HashSet::new();
+
+        match op {
+            "$gt" => {
+                use std::ops::Bound;
+                for (_key, ids) in idx.btree.range((Bound::Excluded(bound_bytes), Bound::Unbounded)) {
+                    result.extend(ids.iter().cloned());
+                }
+            }
+            "$gte" => {
+                for (_key, ids) in idx.btree.range(bound_bytes..) {
+                    result.extend(ids.iter().cloned());
+                }
+            }
+            "$lt" => {
+                for (_key, ids) in idx.btree.range(..bound_bytes) {
+                    result.extend(ids.iter().cloned());
+                }
+            }
+            "$lte" => {
+                for (_key, ids) in idx.btree.range(..=bound_bytes) {
+                    result.extend(ids.iter().cloned());
+                }
+            }
+            _ => {}
+        }
+
+        result
+    }
+
+    /// Generate an index name from the key spec (e.g. {"name": 1, "age": -1} -> "name_1_age_-1").
+    fn generate_index_name(key: &Document) -> String {
+        key.iter()
+            .map(|(field, dir)| {
+                let dir_val = match dir {
+                    Bson::Int32(n) => n.to_string(),
+                    Bson::Int64(n) => n.to_string(),
+                    Bson::String(s) => s.clone(),
+                    _ => "1".to_string(),
+                };
+                format!("{}_{}", field, dir_val)
+            })
+            .collect::<Vec<_>>()
+            .join("_")
+    }
+
+    /// Extract the `_id` field from a document as a hex string.
+    fn extract_id(doc: &Document) -> String {
+        match doc.get("_id") {
+            Some(Bson::ObjectId(oid)) => oid.to_hex(),
+            Some(Bson::String(s)) => s.clone(),
+            Some(other) => format!("{}", other),
+            None => String::new(),
+        }
+    }
+
+    /// Extract the index key bytes from a document for a given key specification.
+    /// Returns `None` if the document should be skipped (sparse index with missing fields).
+    fn extract_key_bytes(doc: &Document, key_spec: &Document, sparse: bool) -> Option<Vec<u8>> {
+        let fields: Vec<(&str, &Bson)> = key_spec.iter().map(|(k, v)| (k.as_str(), v)).collect();
+
+        if fields.len() == 1 {
+            // Single-field index
+            let field = fields[0].0;
+            let value = Self::resolve_field_value(doc, field);
+            if sparse && value.is_none() {
+                return None;
+            }
+            let val = value.unwrap_or(Bson::Null);
+            Some(Self::bson_to_key_bytes(&val))
+        } else {
+            // Compound index: concatenate field values
+            let mut all_null = true;
+            let mut compound_bytes = Vec::new();
+            for (field, _dir) in &fields {
+                let value = Self::resolve_field_value(doc, field);
+                if value.is_some() {
+                    all_null = false;
+                }
+                let val = value.unwrap_or(Bson::Null);
+                let field_bytes = Self::bson_to_key_bytes(&val);
+                // Length-prefix each field for unambiguous concatenation
+                compound_bytes.extend_from_slice(&(field_bytes.len() as u32).to_be_bytes());
+                compound_bytes.extend_from_slice(&field_bytes);
+            }
+
+            if sparse && all_null {
+                return None;
+            }
+
+            Some(compound_bytes)
+        }
+    }
+
+    /// Resolve a field value from a document, supporting dot notation.
+    fn resolve_field_value(doc: &Document, field: &str) -> Option<Bson> {
+        if field.contains('.') {
+            get_nested_value(doc, field)
+        } else {
+            doc.get(field).cloned()
+        }
+    }
+
+    /// Serialize a BSON value to bytes for use as an index key.
+    fn bson_to_key_bytes(value: &Bson) -> Vec<u8> {
+        // Use BSON raw serialization for consistent byte representation.
+        // We wrap in a document since raw BSON requires a top-level document.
+        let wrapper = bson::doc! { "k": value.clone() };
+        let raw = bson::to_vec(&wrapper).unwrap_or_default();
+        raw
+    }
+
+    fn has_operators(doc: &Document) -> bool {
+        doc.keys().any(|k| k.starts_with('$'))
+    }
+}
+
+impl Default for IndexEngine {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use bson::oid::ObjectId;
+
+    fn make_doc(name: &str, age: i32) -> Document {
+        bson::doc! {
+            "_id": ObjectId::new(),
+            "name": name,
+            "age": age,
+        }
+    }
+
+    #[test]
+    fn test_default_id_index() {
+        let engine = IndexEngine::new();
+        assert!(engine.index_exists("_id_"));
+        assert_eq!(engine.list_indexes().len(), 1);
+    }
+
+    #[test]
+    fn test_create_and_drop_index() {
+        let mut engine = IndexEngine::new();
+        let name = engine.create_index(
+            bson::doc! { "name": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+        assert_eq!(name, "name_1");
+        assert!(engine.index_exists("name_1"));
+
+        assert!(engine.drop_index("name_1").unwrap());
+        assert!(!engine.index_exists("name_1"));
+    }
+
+    #[test]
+    fn test_cannot_drop_id_index() {
+        let mut engine = IndexEngine::new();
+        let result = engine.drop_index("_id_");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_unique_constraint() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "email": 1 },
+            IndexOptions { unique: true, ..Default::default() },
+        ).unwrap();
+
+        let doc1 = bson::doc! { "_id": ObjectId::new(), "email": "a@b.com" };
+        let doc2 = bson::doc! { "_id": ObjectId::new(), "email": "a@b.com" };
+
+        engine.on_insert(&doc1).unwrap();
+        let result = engine.on_insert(&doc2);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_find_candidates_equality() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "name": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+
+        let doc1 = make_doc("Alice", 30);
+        let doc2 = make_doc("Bob", 25);
+        let doc3 = make_doc("Alice", 35);
+
+        engine.on_insert(&doc1).unwrap();
+        engine.on_insert(&doc2).unwrap();
+        engine.on_insert(&doc3).unwrap();
+
+        let filter = bson::doc! { "name": "Alice" };
+        let candidates = engine.find_candidate_ids(&filter);
+        assert!(candidates.is_some());
+        assert_eq!(candidates.unwrap().len(), 2);
+    }
+
+    #[test]
+    fn test_on_delete() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "name": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+
+        let doc = make_doc("Alice", 30);
+        engine.on_insert(&doc).unwrap();
+
+        let filter = bson::doc! { "name": "Alice" };
+        assert!(engine.find_candidate_ids(&filter).is_some());
+
+        engine.on_delete(&doc);
+        let candidates = engine.find_candidate_ids(&filter);
+        assert!(candidates.is_some());
+        assert!(candidates.unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_rebuild_from_documents() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "name": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+
+        let docs = vec![
+            make_doc("Alice", 30),
+            make_doc("Bob", 25),
+        ];
+
+        engine.rebuild_from_documents(&docs);
+
+        let filter = bson::doc! { "name": "Alice" };
+        let candidates = engine.find_candidate_ids(&filter);
+        assert!(candidates.is_some());
+        assert_eq!(candidates.unwrap().len(), 1);
+    }
+
+    #[test]
+    fn test_drop_all_indexes() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(bson::doc! { "a": 1 }, IndexOptions::default()).unwrap();
+        engine.create_index(bson::doc! { "b": 1 }, IndexOptions::default()).unwrap();
+        assert_eq!(engine.list_indexes().len(), 3);
+
+        engine.drop_all_indexes();
+        assert_eq!(engine.list_indexes().len(), 1);
+        assert!(engine.index_exists("_id_"));
+    }
+}
@@ -0,0 +1,15 @@
+/// Errors from index operations.
+#[derive(Debug, thiserror::Error)]
+pub enum IndexError {
+    #[error("Duplicate key error: index '{index}' has duplicate value for key {key}")]
+    DuplicateKey { index: String, key: String },
+
+    #[error("Index not found: {0}")]
+    IndexNotFound(String),
+
+    #[error("Invalid index specification: {0}")]
+    InvalidIndex(String),
+
+    #[error("Cannot drop protected index: {0}")]
+    ProtectedIndex(String),
+}
@@ -0,0 +1,7 @@
+mod engine;
+mod planner;
+pub mod error;
+
+pub use engine::{IndexEngine, IndexInfo, IndexOptions};
+pub use planner::{QueryPlan, QueryPlanner};
+pub use error::IndexError;
@@ -0,0 +1,239 @@
+use std::collections::HashSet;
+
+use bson::{Bson, Document};
+use tracing::debug;
+
+use crate::engine::IndexEngine;
+
+/// The execution plan for a query.
+#[derive(Debug, Clone)]
+pub enum QueryPlan {
+    /// Full collection scan - no suitable index found.
+    CollScan,
+    /// Index scan with exact/equality matches.
+    IxScan {
+        /// Name of the index used.
+        index_name: String,
+        /// Candidate document IDs from the index.
+        candidate_ids: HashSet<String>,
+    },
+    /// Index scan with range-based matches.
+    IxScanRange {
+        /// Name of the index used.
+        index_name: String,
+        /// Candidate document IDs from the range scan.
+        candidate_ids: HashSet<String>,
+    },
+}
+
+/// Plans query execution by selecting the best available index.
+pub struct QueryPlanner;
+
+impl QueryPlanner {
+    /// Analyze a filter and the available indexes to produce a query plan.
+    pub fn plan(filter: &Document, engine: &IndexEngine) -> QueryPlan {
+        if filter.is_empty() {
+            debug!("Empty filter -> CollScan");
+            return QueryPlan::CollScan;
+        }
+
+        let indexes = engine.list_indexes();
+        let mut best_plan: Option<QueryPlan> = None;
+        let mut best_score: f64 = 0.0;
+
+        for idx_info in &indexes {
+            let index_fields: Vec<String> = idx_info.key.keys().map(|k| k.to_string()).collect();
+
+            let mut matched = false;
+            let mut score: f64 = 0.0;
+            let mut is_range = false;
+
+            for field in &index_fields {
+                if let Some(condition) = filter.get(field) {
+                    matched = true;
+                    let field_score = Self::score_condition(condition);
+                    score += field_score;
+
+                    if Self::is_range_condition(condition) {
+                        is_range = true;
+                    }
+                }
+            }
+
+            if !matched {
+                continue;
+            }
+
+            // Unique index bonus
+            if idx_info.unique {
+                score += 0.5;
+            }
+
+            if score > best_score {
+                best_score = score;
+
+                // Try to get candidates from the engine
+                // We build a sub-filter with only the fields this index covers
+                let mut sub_filter = Document::new();
+                for field in &index_fields {
+                    if let Some(val) = filter.get(field) {
+                        sub_filter.insert(field.clone(), val.clone());
+                    }
+                }
+
+                if let Some(candidates) = engine.find_candidate_ids(&sub_filter) {
+                    if is_range {
+                        best_plan = Some(QueryPlan::IxScanRange {
+                            index_name: idx_info.name.clone(),
+                            candidate_ids: candidates,
+                        });
+                    } else {
+                        best_plan = Some(QueryPlan::IxScan {
+                            index_name: idx_info.name.clone(),
+                            candidate_ids: candidates,
+                        });
+                    }
+                }
+            }
+        }
+
+        match best_plan {
+            Some(plan) => {
+                debug!(score = best_score, "Selected index plan");
+                plan
+            }
+            None => {
+                debug!("No suitable index found -> CollScan");
+                QueryPlan::CollScan
+            }
+        }
+    }
+
+    /// Score a filter condition for index selectivity.
+    /// Higher scores indicate more selective (better) index usage.
+    fn score_condition(condition: &Bson) -> f64 {
+        match condition {
+            Bson::Document(doc) if Self::has_operators(doc) => {
+                let mut score: f64 = 0.0;
+                for (op, _) in doc {
+                    score += match op.as_str() {
+                        "$eq" => 2.0,
+                        "$in" => 1.5,
+                        "$gt" | "$gte" | "$lt" | "$lte" => 1.0,
+                        _ => 0.0,
+                    };
+                }
+                score
+            }
+            // Direct equality
+            _ => 2.0,
+        }
+    }
+
+    /// Check if a condition involves range operators.
+    fn is_range_condition(condition: &Bson) -> bool {
+        match condition {
+            Bson::Document(doc) => {
+                doc.keys().any(|k| matches!(k.as_str(), "$gt" | "$gte" | "$lt" | "$lte"))
+            }
+            _ => false,
+        }
+    }
+
+    fn has_operators(doc: &Document) -> bool {
+        doc.keys().any(|k| k.starts_with('$'))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::engine::IndexOptions;
+    use bson::oid::ObjectId;
+
+    #[test]
+    fn test_empty_filter_collscan() {
+        let engine = IndexEngine::new();
+        let plan = QueryPlanner::plan(&bson::doc! {}, &engine);
+        assert!(matches!(plan, QueryPlan::CollScan));
+    }
+
+    #[test]
+    fn test_id_equality_ixscan() {
+        let mut engine = IndexEngine::new();
+        let oid = ObjectId::new();
+        let doc = bson::doc! { "_id": oid.clone(), "name": "Alice" };
+        engine.on_insert(&doc).unwrap();
+
+        let filter = bson::doc! { "_id": oid };
+        let plan = QueryPlanner::plan(&filter, &engine);
+        assert!(matches!(plan, QueryPlan::IxScan { .. }));
+    }
+
+    #[test]
+    fn test_indexed_field_ixscan() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "status": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+
+        let doc = bson::doc! { "_id": ObjectId::new(), "status": "active" };
+        engine.on_insert(&doc).unwrap();
+
+        let filter = bson::doc! { "status": "active" };
+        let plan = QueryPlanner::plan(&filter, &engine);
+        assert!(matches!(plan, QueryPlan::IxScan { .. }));
+    }
+
+    #[test]
+    fn test_unindexed_field_collscan() {
+        let engine = IndexEngine::new();
+        let filter = bson::doc! { "unindexed_field": "value" };
+        let plan = QueryPlanner::plan(&filter, &engine);
+        assert!(matches!(plan, QueryPlan::CollScan));
+    }
+
+    #[test]
+    fn test_range_query_ixscan_range() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "age": 1 },
+            IndexOptions::default(),
+        ).unwrap();
+
+        let doc = bson::doc! { "_id": ObjectId::new(), "age": 30 };
+        engine.on_insert(&doc).unwrap();
+
+        let filter = bson::doc! { "age": { "$gte": 25, "$lt": 35 } };
+        let plan = QueryPlanner::plan(&filter, &engine);
+        assert!(matches!(plan, QueryPlan::IxScanRange { .. }));
+    }
+
+    #[test]
+    fn test_unique_index_preferred() {
+        let mut engine = IndexEngine::new();
+        engine.create_index(
+            bson::doc! { "email": 1 },
+            IndexOptions { unique: true, ..Default::default() },
+        ).unwrap();
+        engine.create_index(
+            bson::doc! { "email": 1, "name": 1 },
+            IndexOptions { name: Some("email_name".to_string()), ..Default::default() },
+        ).unwrap();
+
+        let doc = bson::doc! { "_id": ObjectId::new(), "email": "a@b.com", "name": "Alice" };
+        engine.on_insert(&doc).unwrap();
+
+        let filter = bson::doc! { "email": "a@b.com" };
+        let plan = QueryPlanner::plan(&filter, &engine);
+
+        // The unique index on email should be preferred (higher score)
+        match plan {
+            QueryPlan::IxScan { index_name, .. } => {
+                assert_eq!(index_name, "email_1");
+            }
+            _ => panic!("Expected IxScan"),
+        }
+    }
+}