Files
smartdb/rust/crates/rustdb-query/src/distinct.rs
T

81 lines
2.1 KiB
Rust

use bson::{Bson, Document};
use std::collections::HashSet;
use crate::field_path::get_nested_value;
use crate::matcher::QueryMatcher;
/// Get distinct values for a field across documents, with optional filter.
/// Handles array flattening (each array element counted separately).
pub fn distinct_values(
docs: &[Document],
field: &str,
filter: Option<&Document>,
) -> Vec<Bson> {
let filtered: Vec<&Document> = if let Some(f) = filter {
docs.iter().filter(|d| QueryMatcher::matches(d, f)).collect()
} else {
docs.iter().collect()
};
let mut seen = HashSet::new();
let mut result = Vec::new();
for doc in &filtered {
let value = if field.contains('.') {
get_nested_value(doc, field)
} else {
doc.get(field).cloned()
};
if let Some(val) = value {
collect_distinct_values(&val, &mut seen, &mut result);
}
}
result
}
fn collect_distinct_values(value: &Bson, seen: &mut HashSet<String>, result: &mut Vec<Bson>) {
match value {
Bson::Array(arr) => {
// Flatten: each array element is a separate value
for elem in arr {
collect_distinct_values(elem, seen, result);
}
}
_ => {
let key = format!("{:?}", value);
if seen.insert(key) {
result.push(value.clone());
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_distinct_simple() {
let docs = vec![
bson::doc! { "x": 1 },
bson::doc! { "x": 2 },
bson::doc! { "x": 1 },
bson::doc! { "x": 3 },
];
let result = distinct_values(&docs, "x", None);
assert_eq!(result.len(), 3);
}
#[test]
fn test_distinct_array_flattening() {
let docs = vec![
bson::doc! { "tags": ["a", "b"] },
bson::doc! { "tags": ["b", "c"] },
];
let result = distinct_values(&docs, "tags", None);
assert_eq!(result.len(), 3); // a, b, c
}
}