feat(search): Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests

This commit is contained in:
2025-04-21 15:27:55 +00:00
parent 0834ec5c91
commit 23b499b3a8
7 changed files with 373 additions and 76 deletions

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartdata',
version: '5.9.2',
version: '5.10.0',
description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.'
}

View File

@ -343,22 +343,72 @@ export class SmartDataDbDoc<T extends TImplements, TImplements, TManager extends
if (searchableFields.length === 0) {
throw new Error(`No searchable fields defined for class ${className}`);
}
// field:value exact match (case-sensitive for non-regex fields)
const fv = query.match(/^(\w+):(.+)$/);
if (fv) {
const field = fv[1];
const value = fv[2];
// empty query -> return all
const q = query.trim();
if (!q) {
return await (this as any).getInstances({});
}
// simple exact field:value (no spaces, no wildcards, no quotes)
const simpleExact = q.match(/^(\w+):([^"'\*\?\s]+)$/);
if (simpleExact) {
const field = simpleExact[1];
const value = simpleExact[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${className}`);
}
return await (this as any).getInstances({ [field]: value });
}
// safe regex across all searchable fields (case-insensitive)
const escaped = escapeForRegex(query);
const orConditions = searchableFields.map((field) => ({
[field]: { $regex: escaped, $options: 'i' },
}));
return await (this as any).getInstances({ $or: orConditions });
// quoted phrase across all searchable fields: exact match of phrase
const quoted = q.match(/^"(.+)"$|^'(.+)'$/);
if (quoted) {
const phrase = quoted[1] || quoted[2] || '';
// build regex that matches the exact phrase (allowing flexible whitespace)
const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t));
const pattern = parts.join('\\s+');
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}
// wildcard field:value (supports * and ?) -> direct regex on that field
const wildcardField = q.match(/^(\w+):(.+[*?].*)$/);
if (wildcardField) {
const field = wildcardField[1];
const pattern = wildcardField[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${className}`);
}
// escape regex special chars except * and ?, then convert wildcards
const escaped = pattern.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const regexPattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
return await (this as any).getInstances({ [field]: { $regex: regexPattern, $options: 'i' } });
}
// wildcard plain term across all fields (supports * and ?)
if (!q.includes(':') && (q.includes('*') || q.includes('?'))) {
// build wildcard regex pattern: escape all except * and ? then convert
const escaped = q.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}
// detect advanced Lucene syntax: field:value, wildcards, boolean, grouping
const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/;
if (luceneSyntax.test(q)) {
const filter = (this as any).createSearchFilter(q);
return await (this as any).getInstances(filter);
}
// multi-term unquoted -> AND of regex across fields for each term
const terms = q.split(/\s+/);
if (terms.length > 1) {
const andConds = terms.map((term) => {
const esc = escapeForRegex(term);
const ors = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
return { $or: ors };
});
return await (this as any).getInstances({ $and: andConds });
}
// single term -> regex across all searchable fields
const esc = escapeForRegex(q);
const orConds = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}

View File

@ -329,7 +329,16 @@ export class LuceneParser {
* FIXED VERSION - proper MongoDB query structure
*/
export class LuceneToMongoTransformer {
constructor() {}
private defaultFields: string[];
constructor(defaultFields: string[] = []) {
this.defaultFields = defaultFields;
}
/**
* Escape special characters for use in RegExp patterns
*/
private escapeRegex(input: string): string {
return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Transform a Lucene AST node to a MongoDB query
@ -366,18 +375,21 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformTerm(node: TermNode, searchFields?: string[]): any {
// If specific fields are provided, search across those fields
if (searchFields && searchFields.length > 0) {
// Create an $or query to search across multiple fields
const orConditions = searchFields.map((field) => ({
[field]: { $regex: node.value, $options: 'i' },
}));
return { $or: orConditions };
// Build regex pattern, support wildcard (*) and fuzzy (?) if present
const term = node.value;
// Determine regex pattern: wildcard conversion or exact escape
let pattern: string;
if (term.includes('*') || term.includes('?')) {
pattern = this.luceneWildcardToRegex(term);
} else {
pattern = this.escapeRegex(term);
}
// Otherwise, use text search (requires a text index on desired fields)
return { $text: { $search: node.value } };
// Search across provided fields or default fields
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@ -385,17 +397,14 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformPhrase(node: PhraseNode, searchFields?: string[]): any {
// If specific fields are provided, search phrase across those fields
if (searchFields && searchFields.length > 0) {
const orConditions = searchFields.map((field) => ({
[field]: { $regex: `${node.value.replace(/\s+/g, '\\s+')}`, $options: 'i' },
}));
return { $or: orConditions };
}
// For phrases, we use a regex to ensure exact matches
return { $text: { $search: `"${node.value}"` } };
// Use regex across provided fields or default fields, respecting word boundaries
const parts = node.value.split(/\s+/).map((t) => this.escapeRegex(t));
const pattern = parts.join('\\s+');
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@ -429,9 +438,14 @@ export class LuceneToMongoTransformer {
};
}
// Special case for exact term matches on fields
// Special case for exact term matches on fields (supporting wildcard characters)
if (node.value.type === 'TERM') {
return { [node.field]: { $regex: (node.value as TermNode).value, $options: 'i' } };
const val = (node.value as TermNode).value;
if (val.includes('*') || val.includes('?')) {
const regex = this.luceneWildcardToRegex(val);
return { [node.field]: { $regex: regex, $options: 'i' } };
}
return { [node.field]: { $regex: val, $options: 'i' } };
}
// Special case for phrase matches on fields
@ -691,7 +705,8 @@ export class SmartdataLuceneAdapter {
*/
constructor(defaultSearchFields?: string[]) {
this.parser = new LuceneParser();
this.transformer = new LuceneToMongoTransformer();
// Pass default searchable fields into transformer
this.transformer = new LuceneToMongoTransformer(defaultSearchFields || []);
if (defaultSearchFields) {
this.defaultSearchFields = defaultSearchFields;
}
@ -704,7 +719,7 @@ export class SmartdataLuceneAdapter {
*/
convert(luceneQuery: string, searchFields?: string[]): any {
try {
// For simple single term queries, create a simpler query structure
// For simple single-term queries (no field:, boolean, grouping), use simpler regex
if (
!luceneQuery.includes(':') &&
!luceneQuery.includes(' AND ') &&
@ -713,13 +728,17 @@ export class SmartdataLuceneAdapter {
!luceneQuery.includes('(') &&
!luceneQuery.includes('[')
) {
// This is a simple term, use a more direct approach
const fieldsToSearch = searchFields || this.defaultSearchFields;
if (fieldsToSearch && fieldsToSearch.length > 0) {
// Handle wildcard characters in query
let pattern = luceneQuery;
if (luceneQuery.includes('*') || luceneQuery.includes('?')) {
// Use transformer to convert wildcard pattern
pattern = this.transformer.luceneWildcardToRegex(luceneQuery);
}
return {
$or: fieldsToSearch.map((field) => ({
[field]: { $regex: luceneQuery, $options: 'i' },
[field]: { $regex: pattern, $options: 'i' },
})),
};
}