Files
smartdb/rust/crates/rustdb-query/src/matcher.rs

575 lines
18 KiB
Rust

use bson::{Bson, Document};
use regex::Regex;
use crate::field_path::get_nested_value;
/// Query matching engine.
/// Evaluates filter documents against BSON documents.
pub struct QueryMatcher;
impl QueryMatcher {
/// Test whether a single document matches a filter.
pub fn matches(doc: &Document, filter: &Document) -> bool {
Self::matches_filter(doc, filter)
}
/// Filter a slice of documents, returning those that match.
pub fn filter(docs: &[Document], filter: &Document) -> Vec<Document> {
if filter.is_empty() {
return docs.to_vec();
}
docs.iter()
.filter(|doc| Self::matches_filter(doc, filter))
.cloned()
.collect()
}
/// Find the first document matching a filter.
pub fn find_one(docs: &[Document], filter: &Document) -> Option<Document> {
docs.iter()
.find(|doc| Self::matches_filter(doc, filter))
.cloned()
}
fn matches_filter(doc: &Document, filter: &Document) -> bool {
for (key, value) in filter {
if !Self::matches_condition(doc, key, value) {
return false;
}
}
true
}
fn matches_condition(doc: &Document, key: &str, condition: &Bson) -> bool {
match key {
"$and" => Self::match_logical_and(doc, condition),
"$or" => Self::match_logical_or(doc, condition),
"$nor" => Self::match_logical_nor(doc, condition),
"$not" => Self::match_logical_not(doc, condition),
"$expr" => {
// Basic $expr support - just return true for now
true
}
_ => {
// Field condition
match condition {
Bson::Document(cond_doc) if Self::has_operators(cond_doc) => {
Self::match_field_operators(doc, key, cond_doc)
}
// Implicit equality
_ => Self::match_equality(doc, key, condition),
}
}
}
}
fn has_operators(doc: &Document) -> bool {
doc.keys().any(|k| k.starts_with('$'))
}
/// Public accessor for has_operators (used by update engine).
pub fn has_operators_pub(doc: &Document) -> bool {
Self::has_operators(doc)
}
/// Public accessor for bson_compare (used by update engine).
pub fn bson_compare_pub(a: &Bson, b: &Bson) -> Option<std::cmp::Ordering> {
Self::bson_compare(a, b)
}
fn match_equality(doc: &Document, field: &str, expected: &Bson) -> bool {
let actual = Self::resolve_field(doc, field);
match actual {
Some(val) => Self::bson_equals(&val, expected),
None => matches!(expected, Bson::Null),
}
}
fn match_field_operators(doc: &Document, field: &str, operators: &Document) -> bool {
let actual = Self::resolve_field(doc, field);
for (op, op_value) in operators {
let result = match op.as_str() {
"$eq" => Self::op_eq(&actual, op_value),
"$ne" => Self::op_ne(&actual, op_value),
"$gt" => Self::op_cmp(&actual, op_value, CmpOp::Gt),
"$gte" => Self::op_cmp(&actual, op_value, CmpOp::Gte),
"$lt" => Self::op_cmp(&actual, op_value, CmpOp::Lt),
"$lte" => Self::op_cmp(&actual, op_value, CmpOp::Lte),
"$in" => Self::op_in(&actual, op_value),
"$nin" => Self::op_nin(&actual, op_value),
"$exists" => Self::op_exists(&actual, op_value),
"$type" => Self::op_type(&actual, op_value),
"$regex" => Self::op_regex(&actual, op_value, operators.get("$options")),
"$not" => Self::op_not(doc, field, op_value),
"$elemMatch" => Self::op_elem_match(&actual, op_value),
"$size" => Self::op_size(&actual, op_value),
"$all" => Self::op_all(&actual, op_value),
"$mod" => Self::op_mod(&actual, op_value),
"$options" => continue, // handled by $regex
_ => true, // unknown operator, skip
};
if !result {
return false;
}
}
true
}
fn resolve_field(doc: &Document, field: &str) -> Option<Bson> {
if field.contains('.') {
get_nested_value(doc, field)
} else {
doc.get(field).cloned()
}
}
fn bson_equals(a: &Bson, b: &Bson) -> bool {
match (a, b) {
(Bson::Int32(x), Bson::Int64(y)) => (*x as i64) == *y,
(Bson::Int64(x), Bson::Int32(y)) => *x == (*y as i64),
(Bson::Int32(x), Bson::Double(y)) => (*x as f64) == *y,
(Bson::Double(x), Bson::Int32(y)) => *x == (*y as f64),
(Bson::Int64(x), Bson::Double(y)) => (*x as f64) == *y,
(Bson::Double(x), Bson::Int64(y)) => *x == (*y as f64),
// For arrays, check if any element matches (implicit $elemMatch)
(Bson::Array(arr), _) if !matches!(b, Bson::Array(_)) => {
arr.iter().any(|elem| Self::bson_equals(elem, b))
}
_ => a == b,
}
}
fn bson_compare(a: &Bson, b: &Bson) -> Option<std::cmp::Ordering> {
use std::cmp::Ordering;
match (a, b) {
// Numeric comparisons (cross-type)
(Bson::Int32(x), Bson::Int32(y)) => Some(x.cmp(y)),
(Bson::Int64(x), Bson::Int64(y)) => Some(x.cmp(y)),
(Bson::Double(x), Bson::Double(y)) => x.partial_cmp(y),
(Bson::Int32(x), Bson::Int64(y)) => Some((*x as i64).cmp(y)),
(Bson::Int64(x), Bson::Int32(y)) => Some(x.cmp(&(*y as i64))),
(Bson::Int32(x), Bson::Double(y)) => (*x as f64).partial_cmp(y),
(Bson::Double(x), Bson::Int32(y)) => x.partial_cmp(&(*y as f64)),
(Bson::Int64(x), Bson::Double(y)) => (*x as f64).partial_cmp(y),
(Bson::Double(x), Bson::Int64(y)) => x.partial_cmp(&(*y as f64)),
// String comparisons
(Bson::String(x), Bson::String(y)) => Some(x.cmp(y)),
// DateTime comparisons
(Bson::DateTime(x), Bson::DateTime(y)) => Some(x.cmp(y)),
// Boolean comparisons
(Bson::Boolean(x), Bson::Boolean(y)) => Some(x.cmp(y)),
// ObjectId comparisons
(Bson::ObjectId(x), Bson::ObjectId(y)) => Some(x.cmp(y)),
// Null comparisons
(Bson::Null, Bson::Null) => Some(Ordering::Equal),
_ => None,
}
}
// --- Operator implementations ---
fn op_eq(actual: &Option<Bson>, expected: &Bson) -> bool {
match actual {
Some(val) => Self::bson_equals(val, expected),
None => matches!(expected, Bson::Null),
}
}
fn op_ne(actual: &Option<Bson>, expected: &Bson) -> bool {
!Self::op_eq(actual, expected)
}
fn op_cmp(actual: &Option<Bson>, expected: &Bson, op: CmpOp) -> bool {
let val = match actual {
Some(v) => v,
None => return false,
};
// For arrays, check if any element satisfies the comparison
if let Bson::Array(arr) = val {
return arr.iter().any(|elem| {
if let Some(ord) = Self::bson_compare(elem, expected) {
op.check(ord)
} else {
false
}
});
}
if let Some(ord) = Self::bson_compare(val, expected) {
op.check(ord)
} else {
false
}
}
fn op_in(actual: &Option<Bson>, values: &Bson) -> bool {
let arr = match values {
Bson::Array(a) => a,
_ => return false,
};
match actual {
Some(val) => {
// For array values, check if any element is in the list
if let Bson::Array(actual_arr) = val {
actual_arr.iter().any(|elem| {
arr.iter().any(|v| Self::bson_equals(elem, v))
}) || arr.iter().any(|v| Self::bson_equals(val, v))
} else {
arr.iter().any(|v| Self::bson_equals(val, v))
}
}
None => arr.iter().any(|v| matches!(v, Bson::Null)),
}
}
fn op_nin(actual: &Option<Bson>, values: &Bson) -> bool {
!Self::op_in(actual, values)
}
fn op_exists(actual: &Option<Bson>, expected: &Bson) -> bool {
let should_exist = match expected {
Bson::Boolean(b) => *b,
Bson::Int32(n) => *n != 0,
Bson::Int64(n) => *n != 0,
_ => true,
};
actual.is_some() == should_exist
}
fn op_type(actual: &Option<Bson>, expected: &Bson) -> bool {
let val = match actual {
Some(v) => v,
None => return false,
};
let type_num = match expected {
Bson::Int32(n) => *n,
Bson::String(s) => match s.as_str() {
"double" => 1,
"string" => 2,
"object" => 3,
"array" => 4,
"binData" => 5,
"objectId" => 7,
"bool" => 8,
"date" => 9,
"null" => 10,
"regex" => 11,
"int" => 16,
"long" => 18,
"decimal" => 19,
"number" => -1, // special: any numeric type
_ => return false,
},
_ => return false,
};
if type_num == -1 {
return matches!(val, Bson::Int32(_) | Bson::Int64(_) | Bson::Double(_));
}
let actual_type = match val {
Bson::Double(_) => 1,
Bson::String(_) => 2,
Bson::Document(_) => 3,
Bson::Array(_) => 4,
Bson::Binary(_) => 5,
Bson::ObjectId(_) => 7,
Bson::Boolean(_) => 8,
Bson::DateTime(_) => 9,
Bson::Null => 10,
Bson::RegularExpression(_) => 11,
Bson::Int32(_) => 16,
Bson::Int64(_) => 18,
Bson::Decimal128(_) => 19,
_ => 0,
};
actual_type == type_num
}
fn op_regex(actual: &Option<Bson>, pattern: &Bson, options: Option<&Bson>) -> bool {
let val = match actual {
Some(Bson::String(s)) => s.as_str(),
_ => return false,
};
let pattern_str = match pattern {
Bson::String(s) => s.as_str(),
Bson::RegularExpression(re) => re.pattern.as_str(),
_ => return false,
};
let opts = match options {
Some(Bson::String(s)) => s.as_str(),
_ => match pattern {
Bson::RegularExpression(re) => re.options.as_str(),
_ => "",
},
};
let mut regex_pattern = String::new();
if opts.contains('i') {
regex_pattern.push_str("(?i)");
}
if opts.contains('m') {
regex_pattern.push_str("(?m)");
}
if opts.contains('s') {
regex_pattern.push_str("(?s)");
}
regex_pattern.push_str(pattern_str);
match Regex::new(&regex_pattern) {
Ok(re) => re.is_match(val),
Err(_) => false,
}
}
fn op_not(doc: &Document, field: &str, condition: &Bson) -> bool {
match condition {
Bson::Document(cond_doc) => !Self::match_field_operators(doc, field, cond_doc),
_ => true,
}
}
fn op_elem_match(actual: &Option<Bson>, condition: &Bson) -> bool {
let arr = match actual {
Some(Bson::Array(a)) => a,
_ => return false,
};
let cond_doc = match condition {
Bson::Document(d) => d,
_ => return false,
};
arr.iter().any(|elem| {
if let Bson::Document(elem_doc) = elem {
Self::matches_filter(elem_doc, cond_doc)
} else {
false
}
})
}
fn op_size(actual: &Option<Bson>, expected: &Bson) -> bool {
let arr = match actual {
Some(Bson::Array(a)) => a,
_ => return false,
};
let expected_size = match expected {
Bson::Int32(n) => *n as usize,
Bson::Int64(n) => *n as usize,
_ => return false,
};
arr.len() == expected_size
}
fn op_all(actual: &Option<Bson>, expected: &Bson) -> bool {
let arr = match actual {
Some(Bson::Array(a)) => a,
_ => return false,
};
let expected_arr = match expected {
Bson::Array(a) => a,
_ => return false,
};
expected_arr.iter().all(|expected_val| {
arr.iter().any(|elem| Self::bson_equals(elem, expected_val))
})
}
fn op_mod(actual: &Option<Bson>, expected: &Bson) -> bool {
let val = match actual {
Some(v) => match v {
Bson::Int32(n) => *n as i64,
Bson::Int64(n) => *n,
Bson::Double(n) => *n as i64,
_ => return false,
},
None => return false,
};
let arr = match expected {
Bson::Array(a) if a.len() == 2 => a,
_ => return false,
};
let divisor = match &arr[0] {
Bson::Int32(n) => *n as i64,
Bson::Int64(n) => *n,
_ => return false,
};
let remainder = match &arr[1] {
Bson::Int32(n) => *n as i64,
Bson::Int64(n) => *n,
_ => return false,
};
if divisor == 0 {
return false;
}
val % divisor == remainder
}
// --- Logical operators ---
fn match_logical_and(doc: &Document, conditions: &Bson) -> bool {
match conditions {
Bson::Array(arr) => arr.iter().all(|cond| {
if let Bson::Document(cond_doc) = cond {
Self::matches_filter(doc, cond_doc)
} else {
false
}
}),
_ => false,
}
}
fn match_logical_or(doc: &Document, conditions: &Bson) -> bool {
match conditions {
Bson::Array(arr) => arr.iter().any(|cond| {
if let Bson::Document(cond_doc) = cond {
Self::matches_filter(doc, cond_doc)
} else {
false
}
}),
_ => false,
}
}
fn match_logical_nor(doc: &Document, conditions: &Bson) -> bool {
!Self::match_logical_or(doc, conditions)
}
fn match_logical_not(doc: &Document, condition: &Bson) -> bool {
match condition {
Bson::Document(cond_doc) => !Self::matches_filter(doc, cond_doc),
_ => true,
}
}
}
#[derive(Debug, Clone, Copy)]
enum CmpOp {
Gt,
Gte,
Lt,
Lte,
}
impl CmpOp {
fn check(self, ord: std::cmp::Ordering) -> bool {
use std::cmp::Ordering;
match self {
CmpOp::Gt => ord == Ordering::Greater,
CmpOp::Gte => ord == Ordering::Greater || ord == Ordering::Equal,
CmpOp::Lt => ord == Ordering::Less,
CmpOp::Lte => ord == Ordering::Less || ord == Ordering::Equal,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_equality() {
let doc = bson::doc! { "name": "Alice", "age": 30 };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "name": "Alice" }));
assert!(!QueryMatcher::matches(&doc, &bson::doc! { "name": "Bob" }));
}
#[test]
fn test_comparison_operators() {
let doc = bson::doc! { "age": 30 };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "age": { "$gt": 25 } }));
assert!(QueryMatcher::matches(&doc, &bson::doc! { "age": { "$gte": 30 } }));
assert!(QueryMatcher::matches(&doc, &bson::doc! { "age": { "$lt": 35 } }));
assert!(QueryMatcher::matches(&doc, &bson::doc! { "age": { "$lte": 30 } }));
assert!(!QueryMatcher::matches(&doc, &bson::doc! { "age": { "$gt": 30 } }));
}
#[test]
fn test_in_operator() {
let doc = bson::doc! { "status": "active" };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "status": { "$in": ["active", "pending"] } }));
assert!(!QueryMatcher::matches(&doc, &bson::doc! { "status": { "$in": ["closed"] } }));
}
#[test]
fn test_exists_operator() {
let doc = bson::doc! { "name": "Alice" };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "name": { "$exists": true } }));
assert!(!QueryMatcher::matches(&doc, &bson::doc! { "age": { "$exists": true } }));
assert!(QueryMatcher::matches(&doc, &bson::doc! { "age": { "$exists": false } }));
}
#[test]
fn test_logical_or() {
let doc = bson::doc! { "age": 30 };
assert!(QueryMatcher::matches(&doc, &bson::doc! {
"$or": [{ "age": 30 }, { "age": 40 }]
}));
assert!(!QueryMatcher::matches(&doc, &bson::doc! {
"$or": [{ "age": 20 }, { "age": 40 }]
}));
}
#[test]
fn test_logical_and() {
let doc = bson::doc! { "age": 30, "name": "Alice" };
assert!(QueryMatcher::matches(&doc, &bson::doc! {
"$and": [{ "age": 30 }, { "name": "Alice" }]
}));
assert!(!QueryMatcher::matches(&doc, &bson::doc! {
"$and": [{ "age": 30 }, { "name": "Bob" }]
}));
}
#[test]
fn test_dot_notation() {
let doc = bson::doc! { "address": { "city": "NYC" } };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "address.city": "NYC" }));
}
#[test]
fn test_ne_operator() {
let doc = bson::doc! { "status": "active" };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "status": { "$ne": "closed" } }));
assert!(!QueryMatcher::matches(&doc, &bson::doc! { "status": { "$ne": "active" } }));
}
#[test]
fn test_cross_type_numeric_equality() {
let doc = bson::doc! { "count": 5_i32 };
assert!(QueryMatcher::matches(&doc, &bson::doc! { "count": 5_i64 }));
}
#[test]
fn test_empty_filter_matches_all() {
let doc = bson::doc! { "x": 1 };
assert!(QueryMatcher::matches(&doc, &bson::doc! {}));
}
}