fix(search): Improve search query parsing for implicit AND queries by preserving quoted substrings and better handling free terms, quoted phrases, and field:value tokens.

This commit is contained in:
Philipp Kunz 2025-04-22 20:42:11 +00:00
parent 39d2957b7d
commit e2dc094afd
3 changed files with 51 additions and 54 deletions

View File

@ -1,5 +1,12 @@
# Changelog
## 2025-04-22 - 5.13.1 - fix(search)
Improve search query parsing for implicit AND queries by preserving quoted substrings and better handling free terms, quoted phrases, and field:value tokens.
- Replace previous implicit AND logic with tokenization that preserves quoted substrings
- Support both free term and field:value tokens with wildcards inside quotes
- Ensure errors are thrown for non-searchable fields in field-specific queries
## 2025-04-22 - 5.13.0 - feat(search)
Improve search query handling and update documentation

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartdata',
version: '5.13.0',
version: '5.13.1',
description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.'
}

View File

@ -419,62 +419,52 @@ export class SmartDataDbDoc<T extends TImplements, TImplements, TManager extends
const orConds = searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } }));
return await (this as any).execQuery({ $or: orConds }, opts);
}
// implicit AND: combine free terms and field:value terms (with or without wildcards)
const parts = q.split(/\s+/);
const hasColon = parts.some((t) => t.includes(':'));
if (
parts.length > 1 && hasColon &&
!q.includes(' AND ') && !q.includes(' OR ') && !q.includes(' NOT ') &&
!q.includes('(') && !q.includes(')') &&
!q.includes('[') && !q.includes(']') &&
!q.includes('"') && !q.includes("'")
) {
const andConds = parts.map((term) => {
const m = term.match(/^(\w+):(.+)$/);
if (m) {
const field = m[1];
const value = m[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${this.name}`);
// implicit AND for multiple tokens: free terms, quoted phrases, and field:values
{
// Split query into tokens, preserving quoted substrings
const rawTokens = q.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) || [];
// Only apply when more than one token and no boolean operators or grouping
if (
rawTokens.length > 1 &&
!/(\bAND\b|\bOR\b|\bNOT\b|\(|\))/i.test(q) &&
!/\[|\]/.test(q)
) {
const andConds: any[] = [];
for (let token of rawTokens) {
// field:value token
const fv = token.match(/^(\w+):(.+)$/);
if (fv) {
const field = fv[1];
let value = fv[2];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${this.name}`);
}
// Strip surrounding quotes if present
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
value = value.slice(1, -1);
}
// Wildcard search?
if (value.includes('*') || value.includes('?')) {
const escaped = value.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
andConds.push({ [field]: { $regex: pattern, $options: 'i' } });
} else {
andConds.push({ [field]: value });
}
} else if ((token.startsWith('"') && token.endsWith('"')) || (token.startsWith("'") && token.endsWith("'"))) {
// Quoted free phrase across all fields
const phrase = token.slice(1, -1);
const parts = phrase.split(/\s+/).map((t) => escapeForRegex(t));
const pattern = parts.join('\\s+');
andConds.push({ $or: searchableFields.map((f) => ({ [f]: { $regex: pattern, $options: 'i' } })) });
} else {
// Free term across all fields
const esc = escapeForRegex(token);
andConds.push({ $or: searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })) });
}
if (value.includes('*') || value.includes('?')) {
// wildcard field search
const escaped = value.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
return { [field]: { $regex: pattern, $options: 'i' } };
}
// exact field:value
return { [field]: value };
}
// free term -> regex across all searchable fields
const esc = escapeForRegex(term);
return { $or: searchableFields.map((f) => ({ [f]: { $regex: esc, $options: 'i' } })) };
});
return await (this as any).execQuery({ $and: andConds }, opts);
}
// free term and quoted field phrase (exact or wildcard), e.g. 'term field:"phrase"' or 'term field:"ph*se"'
const freeWithQuotedField = q.match(/^(\S+)\s+(\w+):"(.+)"$/);
if (freeWithQuotedField) {
const freeTerm = freeWithQuotedField[1];
const field = freeWithQuotedField[2];
let phrase = freeWithQuotedField[3];
if (!searchableFields.includes(field)) {
throw new Error(`Field '${field}' is not searchable for class ${this.name}`);
return await (this as any).execQuery({ $and: andConds }, opts);
}
// free term condition across all searchable fields
const freeEsc = escapeForRegex(freeTerm);
const freeCond = { $or: searchableFields.map((f) => ({ [f]: { $regex: freeEsc, $options: 'i' } })) };
// field condition: exact match or wildcard pattern
let fieldCond;
if (phrase.includes('*') || phrase.includes('?')) {
const escaped = phrase.replace(/([.+^${}()|[\\]\\])/g, '\\$1');
const pattern = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
fieldCond = { [field]: { $regex: pattern, $options: 'i' } };
} else {
fieldCond = { [field]: phrase };
}
return await (this as any).execQuery({ $and: [freeCond, fieldCond] }, opts);
}
// detect advanced Lucene syntax: field:value, wildcards, boolean, grouping
const luceneSyntax = /(\w+:[^\s]+)|\*|\?|\bAND\b|\bOR\b|\bNOT\b|\(|\)/;