feat(search): Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests

This commit is contained in:
2025-04-21 15:27:55 +00:00
parent 0834ec5c91
commit 23b499b3a8
7 changed files with 373 additions and 76 deletions

View File

@@ -329,7 +329,16 @@ export class LuceneParser {
* FIXED VERSION - proper MongoDB query structure
*/
export class LuceneToMongoTransformer {
constructor() {}
private defaultFields: string[];
constructor(defaultFields: string[] = []) {
this.defaultFields = defaultFields;
}
/**
* Escape special characters for use in RegExp patterns
*/
private escapeRegex(input: string): string {
return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Transform a Lucene AST node to a MongoDB query
@@ -366,18 +375,21 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformTerm(node: TermNode, searchFields?: string[]): any {
// If specific fields are provided, search across those fields
if (searchFields && searchFields.length > 0) {
// Create an $or query to search across multiple fields
const orConditions = searchFields.map((field) => ({
[field]: { $regex: node.value, $options: 'i' },
}));
return { $or: orConditions };
// Build regex pattern, support wildcard (*) and fuzzy (?) if present
const term = node.value;
// Determine regex pattern: wildcard conversion or exact escape
let pattern: string;
if (term.includes('*') || term.includes('?')) {
pattern = this.luceneWildcardToRegex(term);
} else {
pattern = this.escapeRegex(term);
}
// Otherwise, use text search (requires a text index on desired fields)
return { $text: { $search: node.value } };
// Search across provided fields or default fields
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@@ -385,17 +397,14 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformPhrase(node: PhraseNode, searchFields?: string[]): any {
// If specific fields are provided, search phrase across those fields
if (searchFields && searchFields.length > 0) {
const orConditions = searchFields.map((field) => ({
[field]: { $regex: `${node.value.replace(/\s+/g, '\\s+')}`, $options: 'i' },
}));
return { $or: orConditions };
}
// For phrases, we use a regex to ensure exact matches
return { $text: { $search: `"${node.value}"` } };
// Use regex across provided fields or default fields, respecting word boundaries
const parts = node.value.split(/\s+/).map((t) => this.escapeRegex(t));
const pattern = parts.join('\\s+');
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@@ -429,9 +438,14 @@ export class LuceneToMongoTransformer {
};
}
// Special case for exact term matches on fields
// Special case for exact term matches on fields (supporting wildcard characters)
if (node.value.type === 'TERM') {
return { [node.field]: { $regex: (node.value as TermNode).value, $options: 'i' } };
const val = (node.value as TermNode).value;
if (val.includes('*') || val.includes('?')) {
const regex = this.luceneWildcardToRegex(val);
return { [node.field]: { $regex: regex, $options: 'i' } };
}
return { [node.field]: { $regex: val, $options: 'i' } };
}
// Special case for phrase matches on fields
@@ -691,7 +705,8 @@ export class SmartdataLuceneAdapter {
*/
constructor(defaultSearchFields?: string[]) {
this.parser = new LuceneParser();
this.transformer = new LuceneToMongoTransformer();
// Pass default searchable fields into transformer
this.transformer = new LuceneToMongoTransformer(defaultSearchFields || []);
if (defaultSearchFields) {
this.defaultSearchFields = defaultSearchFields;
}
@@ -704,7 +719,7 @@ export class SmartdataLuceneAdapter {
*/
convert(luceneQuery: string, searchFields?: string[]): any {
try {
// For simple single term queries, create a simpler query structure
// For simple single-term queries (no field:, boolean, grouping), use simpler regex
if (
!luceneQuery.includes(':') &&
!luceneQuery.includes(' AND ') &&
@@ -713,13 +728,17 @@ export class SmartdataLuceneAdapter {
!luceneQuery.includes('(') &&
!luceneQuery.includes('[')
) {
// This is a simple term, use a more direct approach
const fieldsToSearch = searchFields || this.defaultSearchFields;
if (fieldsToSearch && fieldsToSearch.length > 0) {
// Handle wildcard characters in query
let pattern = luceneQuery;
if (luceneQuery.includes('*') || luceneQuery.includes('?')) {
// Use transformer to convert wildcard pattern
pattern = this.transformer.luceneWildcardToRegex(luceneQuery);
}
return {
$or: fieldsToSearch.map((field) => ({
[field]: { $regex: luceneQuery, $options: 'i' },
[field]: { $regex: pattern, $options: 'i' },
})),
};
}