81 lines
2.1 KiB
Rust
81 lines
2.1 KiB
Rust
use bson::{Bson, Document};
|
|
use std::collections::HashSet;
|
|
|
|
use crate::field_path::get_nested_value;
|
|
use crate::matcher::QueryMatcher;
|
|
|
|
/// Get distinct values for a field across documents, with optional filter.
|
|
/// Handles array flattening (each array element counted separately).
|
|
pub fn distinct_values(
|
|
docs: &[Document],
|
|
field: &str,
|
|
filter: Option<&Document>,
|
|
) -> Vec<Bson> {
|
|
let filtered: Vec<&Document> = if let Some(f) = filter {
|
|
docs.iter().filter(|d| QueryMatcher::matches(d, f)).collect()
|
|
} else {
|
|
docs.iter().collect()
|
|
};
|
|
|
|
let mut seen = HashSet::new();
|
|
let mut result = Vec::new();
|
|
|
|
for doc in &filtered {
|
|
let value = if field.contains('.') {
|
|
get_nested_value(doc, field)
|
|
} else {
|
|
doc.get(field).cloned()
|
|
};
|
|
|
|
if let Some(val) = value {
|
|
collect_distinct_values(&val, &mut seen, &mut result);
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
fn collect_distinct_values(value: &Bson, seen: &mut HashSet<String>, result: &mut Vec<Bson>) {
|
|
match value {
|
|
Bson::Array(arr) => {
|
|
// Flatten: each array element is a separate value
|
|
for elem in arr {
|
|
collect_distinct_values(elem, seen, result);
|
|
}
|
|
}
|
|
_ => {
|
|
let key = format!("{:?}", value);
|
|
if seen.insert(key) {
|
|
result.push(value.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_distinct_simple() {
|
|
let docs = vec![
|
|
bson::doc! { "x": 1 },
|
|
bson::doc! { "x": 2 },
|
|
bson::doc! { "x": 1 },
|
|
bson::doc! { "x": 3 },
|
|
];
|
|
let result = distinct_values(&docs, "x", None);
|
|
assert_eq!(result.len(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn test_distinct_array_flattening() {
|
|
let docs = vec![
|
|
bson::doc! { "tags": ["a", "b"] },
|
|
bson::doc! { "tags": ["b", "c"] },
|
|
];
|
|
let result = distinct_values(&docs, "tags", None);
|
|
assert_eq!(result.len(), 3); // a, b, c
|
|
}
|
|
}
|