From e2dc094afd84ab51e99b9911c831d3626f94b257 Mon Sep 17 00:00:00 2001 From: Philipp Kunz Date: Tue, 22 Apr 2025 20:42:11 +0000 Subject: [PATCH] fix(search): Improve search query parsing for implicit AND queries by preserving quoted substrings and better handling free terms, quoted phrases, and field:value tokens. --- changelog.md | 7 +++ ts/00_commitinfo_data.ts | 2 +- ts/classes.doc.ts | 96 ++++++++++++++++++---------------------- 3 files changed, 51 insertions(+), 54 deletions(-) diff --git a/changelog.md b/changelog.md index 273aea9..564a5e9 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2025-04-22 - 5.13.1 - fix(search) +Improve search query parsing for implicit AND queries by preserving quoted substrings and better handling free terms, quoted phrases, and field:value tokens. + +- Replace previous implicit AND logic with tokenization that preserves quoted substrings +- Support both free term and field:value tokens with wildcards inside quotes +- Ensure errors are thrown for non-searchable fields in field-specific queries + ## 2025-04-22 - 5.13.0 - feat(search) Improve search query handling and update documentation diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index b588f91..7553638 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartdata', - version: '5.13.0', + version: '5.13.1', description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.' } diff --git a/ts/classes.doc.ts b/ts/classes.doc.ts index 1296928..f667b82 100644 --- a/ts/classes.doc.ts +++ b/ts/classes.doc.ts @@ -419,62 +419,52 @@ export class SmartDataDbDoc ({ [f]: { $regex: pattern, $options: 'i' } })); return await (this as any).execQuery({ $or: orConds }, opts); } - // implicit AND: combine free terms and field:value terms (with or without wildcards) - const parts = q.split(/\s+/); - const hasColon = parts.some((t) => t.includes(':')); - if ( - parts.length > 1 && hasColon && - !q.includes(' AND ') && !q.includes(' OR ') && !q.includes(' NOT ') && - !q.includes('(') && !q.includes(')') && - !q.includes('[') && !q.includes(']') && - !q.includes('"') && !q.includes("'") - ) { - const andConds = parts.map((term) => { - const m = term.match(/^(\w+):(.+)$/); - if (m) { - const field = m[1]; - const value = m[2]; - if (!searchableFields.includes(field)) { - throw new Error(`Field '${field}' is not searchable for class ${this.name}`); + // implicit AND for multiple tokens: free terms, quoted phrases, and field:values + { + // Split query into tokens, preserving quoted substrings + const rawTokens = q.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || []; + // Only apply when more than one token and no boolean operators or grouping + if ( + rawTokens.length > 1 && + !/(\bAND\b|\bOR\b|\bNOT\b|\(|\))/i.test(q) && + !/\[|\]/.test(q) + ) { + const andConds: any[] = []; + for (let token of rawTokens) { + // field:value token + const fv = token.match(/^(\w+):(.+)$/); + if (fv) { + const field = fv[1]; + let value = fv[2]; + if (!searchableFields.includes(field)) { + throw new Error(`Field '${field}' is not searchable for class ${this.name}`); + } + // Strip surrounding quotes if present + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } + // Wildcard search? + if (value.includes('*') || value.includes('?')) { + const escaped = value.replace(/([.+^${}()|[\\]\\])/g, '\\$1'); + const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); + andConds.push({ [field]: { $regex: pattern, $options: 'i' } }); + } else { + andConds.push({ [field]: value }); + } + } else if ((token.startsWith('"') && token.endsWith('"')) || (token.startsWith("'") && token.endsWith("'"))) { + // Quoted free phrase across all fields + const phrase = token.slice(1, -1); + const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t)); + const pattern = parts.join('\\s+'); + andConds.push({ $or: searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } })) }); + } else { + // Free term across all fields + const esc = escapeForRegex(token); + andConds.push({ $or: searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })) }); } - if (value.includes('*') || value.includes('?')) { - // wildcard field search - const escaped = value.replace(/([.+^${}()|[\\]\\])/g, '\\$1'); - const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); - return { [field]: { $regex: pattern, $options: 'i' } }; - } - // exact field:value - return { [field]: value }; } - // free term -> regex across all searchable fields - const esc = escapeForRegex(term); - return { $or: searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })) }; - }); - return await (this as any).execQuery({ $and: andConds }, opts); - } - - // free term and quoted field phrase (exact or wildcard), e.g. 'term field:"phrase"' or 'term field:"ph*se"' - const freeWithQuotedField = q.match(/^(\S+)\s+(\w+):"(.+)"$/); - if (freeWithQuotedField) { - const freeTerm = freeWithQuotedField[1]; - const field = freeWithQuotedField[2]; - let phrase = freeWithQuotedField[3]; - if (!searchableFields.includes(field)) { - throw new Error(`Field '${field}' is not searchable for class ${this.name}`); + return await (this as any).execQuery({ $and: andConds }, opts); } - // free term condition across all searchable fields - const freeEsc = escapeForRegex(freeTerm); - const freeCond = { $or: searchableFields.map((f) => ({ [f]: { $regex: freeEsc, $options: 'i' } })) }; - // field condition: exact match or wildcard pattern - let fieldCond; - if (phrase.includes('*') || phrase.includes('?')) { - const escaped = phrase.replace(/([.+^${}()|[\\]\\])/g, '\\$1'); - const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); - fieldCond = { [field]: { $regex: pattern, $options: 'i' } }; - } else { - fieldCond = { [field]: phrase }; - } - return await (this as any).execQuery({ $and: [freeCond, fieldCond] }, opts); } // detect advanced Lucene syntax: field:value, wildcards, boolean, grouping const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/;