diff --git a/changelog.md b/changelog.md index a93c5ab..3f34dc4 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,13 @@ # Changelog +## 2025-04-21 - 5.10.0 - feat(search) +Improve search functionality: update documentation, refine Lucene query transformation, and add advanced search tests + +- Updated readme.md with detailed Lucene‑style search examples and use cases +- Enhanced LuceneToMongoTransformer to properly handle wildcard conversion and regex escaping +- Improved search query parsing in SmartDataDbDoc for field-specific, multi-term, and advanced Lucene syntax +- Added new advanced search tests covering boolean operators, grouping, quoted phrases, and wildcard queries + ## 2025-04-18 - 5.9.2 - fix(documentation) Update search API documentation to replace deprecated searchWithLucene examples with the unified search(query) API and clarify its behavior. diff --git a/readme.md b/readme.md index dcb3a99..4bf8719 100644 --- a/readme.md +++ b/readme.md @@ -189,48 +189,42 @@ await user.delete(); // Delete the user from the database ### Search Functionality -SmartData provides powerful search capabilities with a Lucene-like query syntax and robust fallback mechanisms: +SmartData provides powerful, Lucene‑style search capabilities with robust fallback mechanisms: ```typescript // Define a model with searchable fields @Collection(() => db) class Product extends SmartDataDbDoc { - @unI() - public id: string = 'product-id'; - - @svDb() - @searchable() // Mark this field as searchable - public name: string; - - @svDb() - @searchable() // Mark this field as searchable - public description: string; - - @svDb() - @searchable() // Mark this field as searchable - public category: string; - - @svDb() - public price: number; + @unI() public id: string = 'product-id'; + @svDb() @searchable() public name: string; + @svDb() @searchable() public description: string; + @svDb() @searchable() public category: string; + @svDb() public price: number; } -// Get all fields marked as searchable for a class -const searchableFields = getSearchableFields('Product'); // ['name', 'description', 'category'] +// List searchable fields +const searchableFields = getSearchableFields('Product'); -// Basic search across all searchable fields -const iphoneProducts = await Product.search('iPhone'); +// 1: Exact phrase across all fields +await Product.search('"Kindle Paperwhite"'); -// Field-specific exact match -const electronicsProducts = await Product.search('category:Electronics'); +// 2: Wildcard search across all fields +await Product.search('Air*'); -// Partial word search (regex across all fields) -const laptopResults = await Product.search('laptop'); +// 3: Field‑scoped wildcard +await Product.search('name:Air*'); -// Multi-word literal search -const paperwhite = await Product.search('Kindle Paperwhite'); +// 4: Boolean AND/OR/NOT +await Product.search('category:Electronics AND name:iPhone'); -// Empty query returns all documents -const allProducts = await Product.search(''); +// 5: Grouping with parentheses +await Product.search('(Furniture OR Electronics) AND Chair'); + +// 6: Multi‑term unquoted (terms AND’d across fields) +await Product.search('TypeScript Aufgabe'); + +// 7: Empty query returns all documents +await Product.search(''); ``` The search functionality includes: @@ -238,11 +232,14 @@ The search functionality includes: - `@searchable()` decorator for marking fields as searchable - `getSearchableFields()` to list searchable fields for a model - `search(query: string)` method supporting: - - Field-specific exact matches (`field:value`) - - Case-insensitive partial matches across all searchable fields - - Multi-word literal matching + - Exact phrase matches (`"my exact string"` or `'my exact string'`) + - Field‑scoped exact & wildcard searches (`field:value`, `field:Air*`) + - Wildcard searches across all fields (`Air*`, `?Pods`) + - Boolean operators (`AND`, `OR`, `NOT`) with grouping (`(...)`) + - Multi‑term unquoted queries AND’d across fields (`TypeScript Aufgabe`) + - Single/multi‑term regex searches across fields - Empty queries returning all documents -- Automatic escaping of special characters to prevent regex injection +- Automatic escaping & wildcard conversion to prevent regex injection ### EasyStore diff --git a/test/test.search.advanced.ts b/test/test.search.advanced.ts new file mode 100644 index 0000000..4856e71 --- /dev/null +++ b/test/test.search.advanced.ts @@ -0,0 +1,187 @@ +import { tap, expect } from '@push.rocks/tapbundle'; +import * as smartmongo from '@push.rocks/smartmongo'; +import * as smartdata from '../ts/index.js'; +import { searchable, getSearchableFields } from '../ts/classes.doc.js'; +import { smartunique } from '../ts/plugins.js'; + +// Set up database connection +let smartmongoInstance: smartmongo.SmartMongo; +let testDb: smartdata.SmartdataDb; + +// Define a test class for advanced search scenarios +@smartdata.Collection(() => testDb) +class Product extends smartdata.SmartDataDbDoc { + @smartdata.unI() + public id: string = smartunique.shortId(); + + @smartdata.svDb() + @searchable() + public name: string; + + @smartdata.svDb() + @searchable() + public description: string; + + @smartdata.svDb() + @searchable() + public category: string; + + @smartdata.svDb() + public price: number; + + constructor( + nameArg: string, + descriptionArg: string, + categoryArg: string, + priceArg: number, + ) { + super(); + this.name = nameArg; + this.description = descriptionArg; + this.category = categoryArg; + this.price = priceArg; + } +} + +// Initialize DB and insert sample products +tap.test('setup advanced search database', async () => { + smartmongoInstance = await smartmongo.SmartMongo.createAndStart(); + testDb = new smartdata.SmartdataDb( + await smartmongoInstance.getMongoDescriptor(), + ); + await testDb.init(); +}); + +tap.test('insert products for advanced search', async () => { + const products = [ + new Product( + 'Night Owl Lamp', + 'Bright lamp for night reading', + 'Lighting', + 29, + ), + new Product( + 'Day Light Lamp', + 'Daytime lamp with adjustable brightness', + 'Lighting', + 39, + ), + new Product( + 'Office Chair', + 'Ergonomic chair for office', + 'Furniture', + 199, + ), + new Product( + 'Gaming Chair', + 'Comfortable for long gaming sessions', + 'Furniture', + 299, + ), + new Product( + 'iPhone 12', + 'Latest iPhone with A14 Bionic chip', + 'Electronics', + 999, + ), + new Product( + 'AirPods', + 'Wireless earbuds with noise cancellation', + 'Electronics', + 249, + ), + ]; + for (const p of products) { + await p.save(); + } + const all = await Product.getInstances({}); + expect(all.length).toEqual(products.length); +}); + +// Simple exact field:value matching +tap.test('simpleExact: category:Furniture returns chairs', async () => { + const res = await Product.search('category:Furniture'); + expect(res.length).toEqual(2); + const names = res.map((r) => r.name).sort(); + expect(names).toEqual(['Gaming Chair', 'Office Chair']); +}); + +// simpleExact invalid field should throw +tap.test('simpleExact invalid field errors', async () => { + let error: Error; + try { + await Product.search('price:29'); + } catch (e) { + error = e as Error; + } + expect(error).toBeTruthy(); + expect(error.message).toMatch(/not searchable/); +}); + +// Quoted phrase search +tap.test('quoted phrase "Bright lamp" matches Night Owl Lamp', async () => { + const res = await Product.search('"Bright lamp"'); + expect(res.length).toEqual(1); + expect(res[0].name).toEqual('Night Owl Lamp'); +}); + +tap.test("quoted phrase 'night reading' matches Night Owl Lamp", async () => { + const res = await Product.search("'night reading'"); + expect(res.length).toEqual(1); + expect(res[0].name).toEqual('Night Owl Lamp'); +}); + + +tap.test('wildcard description:*gaming* matches Gaming Chair', async () => { + const res = await Product.search('description:*gaming*'); + expect(res.length).toEqual(1); + expect(res[0].name).toEqual('Gaming Chair'); +}); + +// Boolean AND and OR +tap.test('boolean AND: category:Lighting AND lamp', async () => { + const res = await Product.search('category:Lighting AND lamp'); + expect(res.length).toEqual(2); +}); + +tap.test('boolean OR: Furniture OR Electronics', async () => { + const res = await Product.search('Furniture OR Electronics'); + expect(res.length).toEqual(4); +}); + +// Multi-term unquoted -> AND across terms +tap.test('multi-term unquoted adjustable brightness', async () => { + const res = await Product.search('adjustable brightness'); + expect(res.length).toEqual(1); + expect(res[0].name).toEqual('Day Light Lamp'); +}); + +tap.test('multi-term unquoted Night Lamp', async () => { + const res = await Product.search('Night Lamp'); + expect(res.length).toEqual(1); + expect(res[0].name).toEqual('Night Owl Lamp'); +}); + +// Grouping with parentheses +tap.test('grouping: (Furniture OR Electronics) AND Chair', async () => { + const res = await Product.search( + '(Furniture OR Electronics) AND Chair', + ); + expect(res.length).toEqual(2); + const names = res.map((r) => r.name).sort(); + expect(names).toEqual(['Gaming Chair', 'Office Chair']); +}); + +// Teardown +tap.test('cleanup advanced search database', async () => { + await testDb.mongoDb.dropDatabase(); + await testDb.close(); + if (smartmongoInstance) { + await smartmongoInstance.stopAndDumpToDir( + `.nogit/dbdump/test.search.advanced.ts`, + ); + } + setTimeout(() => process.exit(), 2000); +}); + +tap.start({ throwOnError: true }); \ No newline at end of file diff --git a/test/test.search.ts b/test/test.search.ts index 7237d29..b2695a2 100644 --- a/test/test.search.ts +++ b/test/test.search.ts @@ -221,6 +221,42 @@ tap.test('should search multi-word term across fields', async () => { expect(termResults[0].name).toEqual('iPhone 12'); }); +// Additional search scenarios +tap.test('should return zero results for non-existent terms', async () => { + const noResults = await Product.search('NonexistentTerm'); + expect(noResults.length).toEqual(0); +}); + +tap.test('should search products by description term "noise"', async () => { + const noiseResults = await Product.search('noise'); + expect(noiseResults.length).toEqual(1); + expect(noiseResults[0].name).toEqual('AirPods'); +}); + +tap.test('should search products by description term "flagship"', async () => { + const flagshipResults = await Product.search('flagship'); + expect(flagshipResults.length).toEqual(1); + expect(flagshipResults[0].name).toEqual('Galaxy S21'); +}); + +tap.test('should search numeric strings "12"', async () => { + const twelveResults = await Product.search('12'); + expect(twelveResults.length).toEqual(1); + expect(twelveResults[0].name).toEqual('iPhone 12'); +}); + +tap.test('should search hyphenated terms "high-speed"', async () => { + const hyphenResults = await Product.search('high-speed'); + expect(hyphenResults.length).toEqual(1); + expect(hyphenResults[0].name).toEqual('Blender'); +}); + +tap.test('should search hyphenated terms "E-reader"', async () => { + const ereaderResults = await Product.search('E-reader'); + expect(ereaderResults.length).toEqual(1); + expect(ereaderResults[0].name).toEqual('Kindle Paperwhite'); +}); + tap.test('close database connection', async () => { await testDb.mongoDb.dropDatabase(); await testDb.close(); diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index a3856b4..c3ce613 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartdata', - version: '5.9.2', + version: '5.10.0', description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.' } diff --git a/ts/classes.doc.ts b/ts/classes.doc.ts index 9776824..ac55da5 100644 --- a/ts/classes.doc.ts +++ b/ts/classes.doc.ts @@ -343,22 +343,72 @@ export class SmartDataDbDoc return all + const q = query.trim(); + if (!q) { + return await (this as any).getInstances({}); + } + // simple exact field:value (no spaces, no wildcards, no quotes) + const simpleExact = q.match(/^(\w+):([^"'\*\?\s]+)$/); + if (simpleExact) { + const field = simpleExact[1]; + const value = simpleExact[2]; if (!searchableFields.includes(field)) { throw new Error(`Field '${field}' is not searchable for class ${className}`); } return await (this as any).getInstances({ [field]: value }); } - // safe regex across all searchable fields (case-insensitive) - const escaped = escapeForRegex(query); - const orConditions = searchableFields.map((field) => ({ - [field]: { $regex: escaped, $options: 'i' }, - })); - return await (this as any).getInstances({ $or: orConditions }); + // quoted phrase across all searchable fields: exact match of phrase + const quoted = q.match(/^"(.+)"$|^'(.+)'$/); + if (quoted) { + const phrase = quoted[1] || quoted[2] || ''; + // build regex that matches the exact phrase (allowing flexible whitespace) + const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t)); + const pattern = parts.join('\\s+'); + const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } })); + return await (this as any).getInstances({ $or: orConds }); + } + // wildcard field:value (supports * and ?) -> direct regex on that field + const wildcardField = q.match(/^(\w+):(.+[*?].*)$/); + if (wildcardField) { + const field = wildcardField[1]; + const pattern = wildcardField[2]; + if (!searchableFields.includes(field)) { + throw new Error(`Field '${field}' is not searchable for class ${className}`); + } + // escape regex special chars except * and ?, then convert wildcards + const escaped = pattern.replace(/([.+^${}()|[\\]\\])/g, '\\$1'); + const regexPattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); + return await (this as any).getInstances({ [field]: { $regex: regexPattern, $options: 'i' } }); + } + // wildcard plain term across all fields (supports * and ?) + if (!q.includes(':') && (q.includes('*') || q.includes('?'))) { + // build wildcard regex pattern: escape all except * and ? then convert + const escaped = q.replace(/([.+^${}()|[\\]\\])/g, '\\$1'); + const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); + const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } })); + return await (this as any).getInstances({ $or: orConds }); + } + // detect advanced Lucene syntax: field:value, wildcards, boolean, grouping + const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/; + if (luceneSyntax.test(q)) { + const filter = (this as any).createSearchFilter(q); + return await (this as any).getInstances(filter); + } + // multi-term unquoted -> AND of regex across fields for each term + const terms = q.split(/\s+/); + if (terms.length > 1) { + const andConds = terms.map((term) => { + const esc = escapeForRegex(term); + const ors = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })); + return { $or: ors }; + }); + return await (this as any).getInstances({ $and: andConds }); + } + // single term -> regex across all searchable fields + const esc = escapeForRegex(q); + const orConds = searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })); + return await (this as any).getInstances({ $or: orConds }); } diff --git a/ts/classes.lucene.adapter.ts b/ts/classes.lucene.adapter.ts index 74b2f95..2d16721 100644 --- a/ts/classes.lucene.adapter.ts +++ b/ts/classes.lucene.adapter.ts @@ -329,7 +329,16 @@ export class LuceneParser { * FIXED VERSION - proper MongoDB query structure */ export class LuceneToMongoTransformer { - constructor() {} + private defaultFields: string[]; + constructor(defaultFields: string[] = []) { + this.defaultFields = defaultFields; + } + /** + * Escape special characters for use in RegExp patterns + */ + private escapeRegex(input: string): string { + return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + } /** * Transform a Lucene AST node to a MongoDB query @@ -366,18 +375,21 @@ export class LuceneToMongoTransformer { * FIXED: properly structured $or query for multiple fields */ private transformTerm(node: TermNode, searchFields?: string[]): any { - // If specific fields are provided, search across those fields - if (searchFields && searchFields.length > 0) { - // Create an $or query to search across multiple fields - const orConditions = searchFields.map((field) => ({ - [field]: { $regex: node.value, $options: 'i' }, - })); - - return { $or: orConditions }; + // Build regex pattern, support wildcard (*) and fuzzy (?) if present + const term = node.value; + // Determine regex pattern: wildcard conversion or exact escape + let pattern: string; + if (term.includes('*') || term.includes('?')) { + pattern = this.luceneWildcardToRegex(term); + } else { + pattern = this.escapeRegex(term); } - - // Otherwise, use text search (requires a text index on desired fields) - return { $text: { $search: node.value } }; + // Search across provided fields or default fields + const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields; + const orConditions = fields.map((field) => ({ + [field]: { $regex: pattern, $options: 'i' }, + })); + return { $or: orConditions }; } /** @@ -385,17 +397,14 @@ export class LuceneToMongoTransformer { * FIXED: properly structured $or query for multiple fields */ private transformPhrase(node: PhraseNode, searchFields?: string[]): any { - // If specific fields are provided, search phrase across those fields - if (searchFields && searchFields.length > 0) { - const orConditions = searchFields.map((field) => ({ - [field]: { $regex: `${node.value.replace(/\s+/g, '\\s+')}`, $options: 'i' }, - })); - - return { $or: orConditions }; - } - - // For phrases, we use a regex to ensure exact matches - return { $text: { $search: `"${node.value}"` } }; + // Use regex across provided fields or default fields, respecting word boundaries + const parts = node.value.split(/\s+/).map((t) => this.escapeRegex(t)); + const pattern = parts.join('\\s+'); + const fields = searchFields && searchFields.length > 0 ? searchFields : this.defaultFields; + const orConditions = fields.map((field) => ({ + [field]: { $regex: pattern, $options: 'i' }, + })); + return { $or: orConditions }; } /** @@ -429,9 +438,14 @@ export class LuceneToMongoTransformer { }; } - // Special case for exact term matches on fields + // Special case for exact term matches on fields (supporting wildcard characters) if (node.value.type === 'TERM') { - return { [node.field]: { $regex: (node.value as TermNode).value, $options: 'i' } }; + const val = (node.value as TermNode).value; + if (val.includes('*') || val.includes('?')) { + const regex = this.luceneWildcardToRegex(val); + return { [node.field]: { $regex: regex, $options: 'i' } }; + } + return { [node.field]: { $regex: val, $options: 'i' } }; } // Special case for phrase matches on fields @@ -691,7 +705,8 @@ export class SmartdataLuceneAdapter { */ constructor(defaultSearchFields?: string[]) { this.parser = new LuceneParser(); - this.transformer = new LuceneToMongoTransformer(); + // Pass default searchable fields into transformer + this.transformer = new LuceneToMongoTransformer(defaultSearchFields || []); if (defaultSearchFields) { this.defaultSearchFields = defaultSearchFields; } @@ -704,7 +719,7 @@ export class SmartdataLuceneAdapter { */ convert(luceneQuery: string, searchFields?: string[]): any { try { - // For simple single term queries, create a simpler query structure + // For simple single-term queries (no field:, boolean, grouping), use simpler regex if ( !luceneQuery.includes(':') && !luceneQuery.includes(' AND ') && @@ -713,13 +728,17 @@ export class SmartdataLuceneAdapter { !luceneQuery.includes('(') && !luceneQuery.includes('[') ) { - // This is a simple term, use a more direct approach const fieldsToSearch = searchFields || this.defaultSearchFields; - if (fieldsToSearch && fieldsToSearch.length > 0) { + // Handle wildcard characters in query + let pattern = luceneQuery; + if (luceneQuery.includes('*') || luceneQuery.includes('?')) { + // Use transformer to convert wildcard pattern + pattern = this.transformer.luceneWildcardToRegex(luceneQuery); + } return { $or: fieldsToSearch.map((field) => ({ - [field]: { $regex: luceneQuery, $options: 'i' }, + [field]: { $regex: pattern, $options: 'i' }, })), }; }