feat(search): Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests

This commit is contained in:
Philipp Kunz 2025-04-21 15:27:55 +00:00
parent 0834ec5c91
commit 23b499b3a8
7 changed files with 373 additions and 76 deletions

View File

@ -1,5 +1,13 @@
# Changelog
## 2025-04-21 - 5.10.0 - feat(search)
Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests
- Updated readme.md with detailed Lucenestyle search examples and use cases
- Enhanced LuceneToMongoTransformer to properly handle wildcard conversion and regex escaping
- Improved search query parsing in SmartDataDbDoc for field-specific, multi-term, and advanced Lucene syntax
- Added new advanced search tests covering boolean operators, grouping, quoted phrases, and wildcard queries
## 2025-04-18 - 5.9.2 - fix(documentation)
Update search API documentation to replace deprecated searchWithLucene examples with the unified search(query) API and clarify its behavior.

View File

@ -189,48 +189,42 @@ await user.delete(); // Delete the user from the database
### Search Functionality
SmartData provides powerful search capabilities with a Lucene-like query syntax and robust fallback mechanisms:
SmartData provides powerful, Lucenestyle search capabilities with robust fallback mechanisms:
```typescript
// Define a model with searchable fields
@Collection(() => db)
class Product extends SmartDataDbDoc<Product, Product> {
@unI()
public id: string = 'product-id';
@svDb()
@searchable() // Mark this field as searchable
public name: string;
@svDb()
@searchable() // Mark this field as searchable
public description: string;
@svDb()
@searchable() // Mark this field as searchable
public category: string;
@svDb()
public price: number;
@unI() public id: string = 'product-id';
@svDb() @searchable() public name: string;
@svDb() @searchable() public description: string;
@svDb() @searchable() public category: string;
@svDb() public price: number;
}
// Get all fields marked as searchable for a class
const searchableFields = getSearchableFields('Product'); // ['name', 'description', 'category']
// List searchable fields
const searchableFields = getSearchableFields('Product');
// Basic search across all searchable fields
const iphoneProducts = await Product.search('iPhone');
// 1: Exact phrase across all fields
await Product.search('"Kindle Paperwhite"');
// Field-specific exact match
const electronicsProducts = await Product.search('category:Electronics');
// 2: Wildcard search across all fields
await Product.search('Air*');
// Partial word search (regex across all fields)
const laptopResults = await Product.search('laptop');
// 3: Fieldscoped wildcard
await Product.search('name:Air*');
// Multi-word literal search
const paperwhite = await Product.search('Kindle Paperwhite');
// 4: Boolean AND/OR/NOT
await Product.search('category:Electronics AND name:iPhone');
// Empty query returns all documents
const allProducts = await Product.search('');
// 5: Grouping with parentheses
await Product.search('(Furniture OR Electronics) AND Chair');
// 6: Multiterm unquoted (terms ANDd across fields)
await Product.search('TypeScript Aufgabe');
// 7: Empty query returns all documents
await Product.search('');
```
The search functionality includes:
@ -238,11 +232,14 @@ The search functionality includes:
- `@searchable()` decorator for marking fields as searchable
- `getSearchableFields()` to list searchable fields for a model
- `search(query: string)` method supporting:
- Field-specific exact matches (`field:value`)
- Case-insensitive partial matches across all searchable fields
- Multi-word literal matching
- Exact phrase matches (`"my exact string"` or `'my exact string'`)
- Fieldscoped exact & wildcard searches (`field:value`, `field:Air*`)
- Wildcard searches across all fields (`Air*`, `?Pods`)
- Boolean operators (`AND`, `OR`, `NOT`) with grouping (`(...)`)
- Multiterm unquoted queries ANDd across fields (`TypeScript Aufgabe`)
- Single/multiterm regex searches across fields
- Empty queries returning all documents
- Automatic escaping of special characters to prevent regex injection
- Automatic escaping & wildcard conversion to prevent regex injection
### EasyStore

View File

@ -0,0 +1,187 @@
import { tap, expect } from '@push.rocks/tapbundle';
import * as smartmongo from '@push.rocks/smartmongo';
import * as smartdata from '../ts/index.js';
import { searchable, getSearchableFields } from '../ts/classes.doc.js';
import { smartunique } from '../ts/plugins.js';
// Set up database connection
let smartmongoInstance: smartmongo.SmartMongo;
let testDb: smartdata.SmartdataDb;
// Define a test class for advanced search scenarios
@smartdata.Collection(() => testDb)
class Product extends smartdata.SmartDataDbDoc<Product, Product> {
@smartdata.unI()
public id: string = smartunique.shortId();
@smartdata.svDb()
@searchable()
public name: string;
@smartdata.svDb()
@searchable()
public description: string;
@smartdata.svDb()
@searchable()
public category: string;
@smartdata.svDb()
public price: number;
constructor(
nameArg: string,
descriptionArg: string,
categoryArg: string,
priceArg: number,
) {
super();
this.name = nameArg;
this.description = descriptionArg;
this.category = categoryArg;
this.price = priceArg;
}
}
// Initialize DB and insert sample products
tap.test('setup advanced search database', async () => {
smartmongoInstance = await smartmongo.SmartMongo.createAndStart();
testDb = new smartdata.SmartdataDb(
await smartmongoInstance.getMongoDescriptor(),
);
await testDb.init();
});
tap.test('insert products for advanced search', async () => {
const products = [
new Product(
'Night Owl Lamp',
'Bright lamp for night reading',
'Lighting',
29,
),
new Product(
'Day Light Lamp',
'Daytime lamp with adjustable brightness',
'Lighting',
39,
),
new Product(
'Office Chair',
'Ergonomic chair for office',
'Furniture',
199,
),
new Product(
'Gaming Chair',
'Comfortable for long gaming sessions',
'Furniture',
299,
),
new Product(
'iPhone 12',
'Latest iPhone with A14 Bionic chip',
'Electronics',
999,
),
new Product(
'AirPods',
'Wireless earbuds with noise cancellation',
'Electronics',
249,
),
];
for (const p of products) {
await p.save();
}
const all = await Product.getInstances({});
expect(all.length).toEqual(products.length);
});
// Simple exact field:value matching
tap.test('simpleExact: category:Furniture returns chairs', async () => {
const res = await Product.search('category:Furniture');
expect(res.length).toEqual(2);
const names = res.map((r) => r.name).sort();
expect(names).toEqual(['Gaming Chair', 'Office Chair']);
});
// simpleExact invalid field should throw
tap.test('simpleExact invalid field errors', async () => {
let error: Error;
try {
await Product.search('price:29');
} catch (e) {
error = e as Error;
}
expect(error).toBeTruthy();
expect(error.message).toMatch(/not searchable/);
});
// Quoted phrase search
tap.test('quoted phrase "Bright lamp" matches Night Owl Lamp', async () => {
const res = await Product.search('"Bright lamp"');
expect(res.length).toEqual(1);
expect(res[0].name).toEqual('Night Owl Lamp');
});
tap.test("quoted phrase 'night reading' matches Night Owl Lamp", async () => {
const res = await Product.search("'night reading'");
expect(res.length).toEqual(1);
expect(res[0].name).toEqual('Night Owl Lamp');
});
tap.test('wildcard description:*gaming* matches Gaming Chair', async () => {
const res = await Product.search('description:*gaming*');
expect(res.length).toEqual(1);
expect(res[0].name).toEqual('Gaming Chair');
});
// Boolean AND and OR
tap.test('boolean AND: category:Lighting AND lamp', async () => {
const res = await Product.search('category:Lighting AND lamp');
expect(res.length).toEqual(2);
});
tap.test('boolean OR: Furniture OR Electronics', async () => {
const res = await Product.search('Furniture OR Electronics');
expect(res.length).toEqual(4);
});
// Multi-term unquoted -> AND across terms
tap.test('multi-term unquoted adjustable brightness', async () => {
const res = await Product.search('adjustable brightness');
expect(res.length).toEqual(1);
expect(res[0].name).toEqual('Day Light Lamp');
});
tap.test('multi-term unquoted Night Lamp', async () => {
const res = await Product.search('Night Lamp');
expect(res.length).toEqual(1);
expect(res[0].name).toEqual('Night Owl Lamp');
});
// Grouping with parentheses
tap.test('grouping: (Furniture OR Electronics) AND Chair', async () => {
const res = await Product.search(
'(Furniture OR Electronics) AND Chair',
);
expect(res.length).toEqual(2);
const names = res.map((r) => r.name).sort();
expect(names).toEqual(['Gaming Chair', 'Office Chair']);
});
// Teardown
tap.test('cleanup advanced search database', async () => {
await testDb.mongoDb.dropDatabase();
await testDb.close();
if (smartmongoInstance) {
await smartmongoInstance.stopAndDumpToDir(
`.nogit/dbdump/test.search.advanced.ts`,
);
}
setTimeout(() => process.exit(), 2000);
});
tap.start({ throwOnError: true });

View File

@ -221,6 +221,42 @@ tap.test('should search multi-word term across fields', async () => {
expect(termResults[0].name).toEqual('iPhone 12');
});
// Additional search scenarios
tap.test('should return zero results for non-existent terms', async () => {
const noResults = await Product.search('NonexistentTerm');
expect(noResults.length).toEqual(0);
});
tap.test('should search products by description term "noise"', async () => {
const noiseResults = await Product.search('noise');
expect(noiseResults.length).toEqual(1);
expect(noiseResults[0].name).toEqual('AirPods');
});
tap.test('should search products by description term "flagship"', async () => {
const flagshipResults = await Product.search('flagship');
expect(flagshipResults.length).toEqual(1);
expect(flagshipResults[0].name).toEqual('Galaxy S21');
});
tap.test('should search numeric strings "12"', async () => {
const twelveResults = await Product.search('12');
expect(twelveResults.length).toEqual(1);
expect(twelveResults[0].name).toEqual('iPhone 12');
});
tap.test('should search hyphenated terms "high-speed"', async () => {
const hyphenResults = await Product.search('high-speed');
expect(hyphenResults.length).toEqual(1);
expect(hyphenResults[0].name).toEqual('Blender');
});
tap.test('should search hyphenated terms "E-reader"', async () => {
const ereaderResults = await Product.search('E-reader');
expect(ereaderResults.length).toEqual(1);
expect(ereaderResults[0].name).toEqual('Kindle Paperwhite');
});
tap.test('close database connection', async () => {
await testDb.mongoDb.dropDatabase();
await testDb.close();

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartdata',
version: '5.9.2',
version: '5.10.0',
description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.'
}

View File

@ -343,22 +343,72 @@ export class SmartDataDbDoc<T extends TImplements, TImplements, TManager extends
if (searchableFields.length === 0) {
throw new Error(`No searchable fields defined for class ${className}`);
}
// field:value exact match (case-sensitive for non-regex fields)
const fv = query.match(/^(\w+):(.+)$/);
if (fv) {
const field = fv[1];
const value = fv[2];
// empty query -> return all
const q = query.trim();
if (!q) {
return await (this as any).getInstances({});
}
// simple exact field:value (no spaces, no wildcards, no quotes)
const simpleExact = q.match(/^(\w+):([^"'\*\?\s]+)$/);
if (simpleExact) {
const field = simpleExact[1];
const value = simpleExact[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${className}`);
}
return await (this as any).getInstances({ [field]: value });
}
// safe regex across all searchable fields (case-insensitive)
const escaped = escapeForRegex(query);
const orConditions = searchableFields.map((field) => ({
[field]: { $regex: escaped, $options: 'i' },
}));
return await (this as any).getInstances({ $or: orConditions });
// quoted phrase across all searchable fields: exact match of phrase
const quoted = q.match(/^"(.+)"$|^'(.+)'$/);
if (quoted) {
const phrase = quoted[1] || quoted[2] || '';
// build regex that matches the exact phrase (allowing flexible whitespace)
const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t));
const pattern = parts.join('\\s+');
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}
// wildcard field:value (supports * and ?) -> direct regex on that field
const wildcardField = q.match(/^(\w+):(.+[*?].*)$/);
if (wildcardField) {
const field = wildcardField[1];
const pattern = wildcardField[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${className}`);
}
// escape regex special chars except * and ?, then convert wildcards
const escaped = pattern.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const regexPattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
return await (this as any).getInstances({ [field]: { $regex: regexPattern, $options: 'i' } });
}
// wildcard plain term across all fields (supports * and ?)
if (!q.includes(':') && (q.includes('*') || q.includes('?'))) {
// build wildcard regex pattern: escape all except * and ? then convert
const escaped = q.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}
// detect advanced Lucene syntax: field:value, wildcards, boolean, grouping
const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/;
if (luceneSyntax.test(q)) {
const filter = (this as any).createSearchFilter(q);
return await (this as any).getInstances(filter);
}
// multi-term unquoted -> AND of regex across fields for each term
const terms = q.split(/\s+/);
if (terms.length > 1) {
const andConds = terms.map((term) => {
const esc = escapeForRegex(term);
const ors = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
return { $or: ors };
});
return await (this as any).getInstances({ $and: andConds });
}
// single term -> regex across all searchable fields
const esc = escapeForRegex(q);
const orConds = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } }));
return await (this as any).getInstances({ $or: orConds });
}

View File

@ -329,7 +329,16 @@ export class LuceneParser {
* FIXED VERSION - proper MongoDB query structure
*/
export class LuceneToMongoTransformer {
constructor() {}
private defaultFields: string[];
constructor(defaultFields: string[] = []) {
this.defaultFields = defaultFields;
}
/**
* Escape special characters for use in RegExp patterns
*/
private escapeRegex(input: string): string {
return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Transform a Lucene AST node to a MongoDB query
@ -366,18 +375,21 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformTerm(node: TermNode, searchFields?: string[]): any {
// If specific fields are provided, search across those fields
if (searchFields && searchFields.length > 0) {
// Create an $or query to search across multiple fields
const orConditions = searchFields.map((field) => ({
[field]: { $regex: node.value, $options: 'i' },
}));
return { $or: orConditions };
// Build regex pattern, support wildcard (*) and fuzzy (?) if present
const term = node.value;
// Determine regex pattern: wildcard conversion or exact escape
let pattern: string;
if (term.includes('*') || term.includes('?')) {
pattern = this.luceneWildcardToRegex(term);
} else {
pattern = this.escapeRegex(term);
}
// Otherwise, use text search (requires a text index on desired fields)
return { $text: { $search: node.value } };
// Search across provided fields or default fields
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@ -385,17 +397,14 @@ export class LuceneToMongoTransformer {
* FIXED: properly structured $or query for multiple fields
*/
private transformPhrase(node: PhraseNode, searchFields?: string[]): any {
// If specific fields are provided, search phrase across those fields
if (searchFields && searchFields.length > 0) {
const orConditions = searchFields.map((field) => ({
[field]: { $regex: `${node.value.replace(/\s+/g, '\\s+')}`, $options: 'i' },
}));
return { $or: orConditions };
}
// For phrases, we use a regex to ensure exact matches
return { $text: { $search: `"${node.value}"` } };
// Use regex across provided fields or default fields, respecting word boundaries
const parts = node.value.split(/\s+/).map((t) => this.escapeRegex(t));
const pattern = parts.join('\\s+');
const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields;
const orConditions = fields.map((field) => ({
[field]: { $regex: pattern, $options: 'i' },
}));
return { $or: orConditions };
}
/**
@ -429,9 +438,14 @@ export class LuceneToMongoTransformer {
};
}
// Special case for exact term matches on fields
// Special case for exact term matches on fields (supporting wildcard characters)
if (node.value.type === 'TERM') {
return { [node.field]: { $regex: (node.value as TermNode).value, $options: 'i' } };
const val = (node.value as TermNode).value;
if (val.includes('*') || val.includes('?')) {
const regex = this.luceneWildcardToRegex(val);
return { [node.field]: { $regex: regex, $options: 'i' } };
}
return { [node.field]: { $regex: val, $options: 'i' } };
}
// Special case for phrase matches on fields
@ -691,7 +705,8 @@ export class SmartdataLuceneAdapter {
*/
constructor(defaultSearchFields?: string[]) {
this.parser = new LuceneParser();
this.transformer = new LuceneToMongoTransformer();
// Pass default searchable fields into transformer
this.transformer = new LuceneToMongoTransformer(defaultSearchFields || []);
if (defaultSearchFields) {
this.defaultSearchFields = defaultSearchFields;
}
@ -704,7 +719,7 @@ export class SmartdataLuceneAdapter {
*/
convert(luceneQuery: string, searchFields?: string[]): any {
try {
// For simple single term queries, create a simpler query structure
// For simple single-term queries (no field:, boolean, grouping), use simpler regex
if (
!luceneQuery.includes(':') &&
!luceneQuery.includes(' AND ') &&
@ -713,13 +728,17 @@ export class SmartdataLuceneAdapter {
!luceneQuery.includes('(') &&
!luceneQuery.includes('[')
) {
// This is a simple term, use a more direct approach
const fieldsToSearch = searchFields || this.defaultSearchFields;
if (fieldsToSearch && fieldsToSearch.length > 0) {
// Handle wildcard characters in query
let pattern = luceneQuery;
if (luceneQuery.includes('*') || luceneQuery.includes('?')) {
// Use transformer to convert wildcard pattern
pattern = this.transformer.luceneWildcardToRegex(luceneQuery);
}
return {
$or: fieldsToSearch.map((field) => ({
[field]: { $regex: luceneQuery, $options: 'i' },
[field]: { $regex: pattern, $options: 'i' },
})),
};
}