feat(search): Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests
This commit is contained in:
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartdata',
|
||||
version: '5.9.2',
|
||||
version: '5.10.0',
|
||||
description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.'
|
||||
}
|
||||
|
@ -343,22 +343,72 @@ export class SmartDataDbDoc<T extends TImplements, TImplements, TManager extends
|
||||
if (searchableFields.length === 0) {
|
||||
throw new Error(`No searchable fields defined for class ${className}`);
|
||||
}
|
||||
// field:value exact match (case-sensitive for non-regex fields)
|
||||
const fv = query.match(/^(\w+):(.+)$/);
|
||||
if (fv) {
|
||||
const field = fv[1];
|
||||
const value = fv[2];
|
||||
// empty query -> return all
|
||||
const q = query.trim();
|
||||
if (!q) {
|
||||
return await (this as any).getInstances({});
|
||||
}
|
||||
// simple exact field:value (no spaces, no wildcards, no quotes)
|
||||
const simpleExact = q.match(/^(\w+):([^"'\*\?\s]+)$/);
|
||||
if (simpleExact) {
|
||||
const field = simpleExact[1];
|
||||
const value = simpleExact[2];
|
||||
if (!searchableFields.includes(field)) {
|
||||
throw new Error(`Field '${field}' is not searchable for class ${className}`);
|
||||
}
|
||||
return await (this as any).getInstances({ [field]: value });
|
||||
}
|
||||
// safe regex across all searchable fields (case-insensitive)
|
||||
const escaped = escapeForRegex(query);
|
||||
const orConditions = searchableFields.map((field) => ({
|
||||
[field]: { $regex: escaped, $options: 'i' },
|
||||
}));
|
||||
return await (this as any).getInstances({ $or: orConditions });
|
||||
// quoted phrase across all searchable fields: exact match of phrase
|
||||
const quoted = q.match(/^"(.+)"$|^'(.+)'$/);
|
||||
if (quoted) {
|
||||
const phrase = quoted[1] || quoted[2] || '';
|
||||
// build regex that matches the exact phrase (allowing flexible whitespace)
|
||||
const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t));
|
||||
const pattern = parts.join('\\s+');
|
||||
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
|
||||
return await (this as any).getInstances({ $or: orConds });
|
||||
}
|
||||
// wildcard field:value (supports * and ?) -> direct regex on that field
|
||||
const wildcardField = q.match(/^(\w+):(.+[*?].*)$/);
|
||||
if (wildcardField) {
|
||||
const field = wildcardField[1];
|
||||
const pattern = wildcardField[2];
|
||||
if (!searchableFields.includes(field)) {
|
||||
throw new Error(`Field '${field}' is not searchable for class ${className}`);
|
||||
}
|
||||
// escape regex special chars except * and ?, then convert wildcards
|
||||
const escaped = pattern.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
|
||||
const regexPattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
|
||||
return await (this as any).getInstances({ [field]: { $regex: regexPattern, $options: 'i' } });
|
||||
}
|
||||
// wildcard plain term across all fields (supports * and ?)
|
||||
if (!q.includes(':') && (q.includes('*') || q.includes('?'))) {
|
||||
// build wildcard regex pattern: escape all except * and ? then convert
|
||||
const escaped = q.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
|
||||
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
|
||||
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
|
||||
return await (this as any).getInstances({ $or: orConds });
|
||||
}
|
||||
// detect advanced Lucene syntax: field:value, wildcards, boolean, grouping
|
||||
const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/;
|
||||
if (luceneSyntax.test(q)) {
|
||||
const filter = (this as any).createSearchFilter(q);
|
||||
return await (this as any).getInstances(filter);
|
||||
}
|
||||
// multi-term unquoted -> AND of regex across fields for each term
|
||||
const terms = q.split(/\s+/);
|
||||
if (terms.length > 1) {
|
||||
const andConds = terms.map((term) => {
|
||||
const esc = escapeForRegex(term);
|
||||
const ors = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
|
||||
return { $or: ors };
|
||||
});
|
||||
return await (this as any).getInstances({ $and: andConds });
|
||||
}
|
||||
// single term -> regex across all searchable fields
|
||||
const esc = escapeForRegex(q);
|
||||
const orConds = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
|
||||
return await (this as any).getInstances({ $or: orConds });
|
||||
}
|
||||
|
||||
|
||||
|
@ -329,7 +329,16 @@ export class LuceneParser {
|
||||
* FIXED VERSION - proper MongoDB query structure
|
||||
*/
|
||||
export class LuceneToMongoTransformer {
|
||||
constructor() {}
|
||||
private defaultFields: string[];
|
||||
constructor(defaultFields: string[] = []) {
|
||||
this.defaultFields = defaultFields;
|
||||
}
|
||||
/**
|
||||
* Escape special characters for use in RegExp patterns
|
||||
*/
|
||||
private escapeRegex(input: string): string {
|
||||
return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform a Lucene AST node to a MongoDB query
|
||||
@ -366,18 +375,21 @@ export class LuceneToMongoTransformer {
|
||||
* FIXED: properly structured $or query for multiple fields
|
||||
*/
|
||||
private transformTerm(node: TermNode, searchFields?: string[]): any {
|
||||
// If specific fields are provided, search across those fields
|
||||
if (searchFields && searchFields.length > 0) {
|
||||
// Create an $or query to search across multiple fields
|
||||
const orConditions = searchFields.map((field) => ({
|
||||
[field]: { $regex: node.value, $options: 'i' },
|
||||
}));
|
||||
|
||||
return { $or: orConditions };
|
||||
// Build regex pattern, support wildcard (*) and fuzzy (?) if present
|
||||
const term = node.value;
|
||||
// Determine regex pattern: wildcard conversion or exact escape
|
||||
let pattern: string;
|
||||
if (term.includes('*') || term.includes('?')) {
|
||||
pattern = this.luceneWildcardToRegex(term);
|
||||
} else {
|
||||
pattern = this.escapeRegex(term);
|
||||
}
|
||||
|
||||
// Otherwise, use text search (requires a text index on desired fields)
|
||||
return { $text: { $search: node.value } };
|
||||
// Search across provided fields or default fields
|
||||
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
|
||||
const orConditions = fields.map((field) => ({
|
||||
[field]: { $regex: pattern, $options: 'i' },
|
||||
}));
|
||||
return { $or: orConditions };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -385,17 +397,14 @@ export class LuceneToMongoTransformer {
|
||||
* FIXED: properly structured $or query for multiple fields
|
||||
*/
|
||||
private transformPhrase(node: PhraseNode, searchFields?: string[]): any {
|
||||
// If specific fields are provided, search phrase across those fields
|
||||
if (searchFields && searchFields.length > 0) {
|
||||
const orConditions = searchFields.map((field) => ({
|
||||
[field]: { $regex: `${node.value.replace(/\s+/g, '\\s+')}`, $options: 'i' },
|
||||
}));
|
||||
|
||||
return { $or: orConditions };
|
||||
}
|
||||
|
||||
// For phrases, we use a regex to ensure exact matches
|
||||
return { $text: { $search: `"${node.value}"` } };
|
||||
// Use regex across provided fields or default fields, respecting word boundaries
|
||||
const parts = node.value.split(/\s+/).map((t) => this.escapeRegex(t));
|
||||
const pattern = parts.join('\\s+');
|
||||
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
|
||||
const orConditions = fields.map((field) => ({
|
||||
[field]: { $regex: pattern, $options: 'i' },
|
||||
}));
|
||||
return { $or: orConditions };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -429,9 +438,14 @@ export class LuceneToMongoTransformer {
|
||||
};
|
||||
}
|
||||
|
||||
// Special case for exact term matches on fields
|
||||
// Special case for exact term matches on fields (supporting wildcard characters)
|
||||
if (node.value.type === 'TERM') {
|
||||
return { [node.field]: { $regex: (node.value as TermNode).value, $options: 'i' } };
|
||||
const val = (node.value as TermNode).value;
|
||||
if (val.includes('*') || val.includes('?')) {
|
||||
const regex = this.luceneWildcardToRegex(val);
|
||||
return { [node.field]: { $regex: regex, $options: 'i' } };
|
||||
}
|
||||
return { [node.field]: { $regex: val, $options: 'i' } };
|
||||
}
|
||||
|
||||
// Special case for phrase matches on fields
|
||||
@ -691,7 +705,8 @@ export class SmartdataLuceneAdapter {
|
||||
*/
|
||||
constructor(defaultSearchFields?: string[]) {
|
||||
this.parser = new LuceneParser();
|
||||
this.transformer = new LuceneToMongoTransformer();
|
||||
// Pass default searchable fields into transformer
|
||||
this.transformer = new LuceneToMongoTransformer(defaultSearchFields || []);
|
||||
if (defaultSearchFields) {
|
||||
this.defaultSearchFields = defaultSearchFields;
|
||||
}
|
||||
@ -704,7 +719,7 @@ export class SmartdataLuceneAdapter {
|
||||
*/
|
||||
convert(luceneQuery: string, searchFields?: string[]): any {
|
||||
try {
|
||||
// For simple single term queries, create a simpler query structure
|
||||
// For simple single-term queries (no field:, boolean, grouping), use simpler regex
|
||||
if (
|
||||
!luceneQuery.includes(':') &&
|
||||
!luceneQuery.includes(' AND ') &&
|
||||
@ -713,13 +728,17 @@ export class SmartdataLuceneAdapter {
|
||||
!luceneQuery.includes('(') &&
|
||||
!luceneQuery.includes('[')
|
||||
) {
|
||||
// This is a simple term, use a more direct approach
|
||||
const fieldsToSearch = searchFields || this.defaultSearchFields;
|
||||
|
||||
if (fieldsToSearch && fieldsToSearch.length > 0) {
|
||||
// Handle wildcard characters in query
|
||||
let pattern = luceneQuery;
|
||||
if (luceneQuery.includes('*') || luceneQuery.includes('?')) {
|
||||
// Use transformer to convert wildcard pattern
|
||||
pattern = this.transformer.luceneWildcardToRegex(luceneQuery);
|
||||
}
|
||||
return {
|
||||
$or: fieldsToSearch.map((field) => ({
|
||||
[field]: { $regex: luceneQuery, $options: 'i' },
|
||||
[field]: { $regex: pattern, $options: 'i' },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
Reference in New Issue
Block a user