ts/tsmdb/engine/QueryPlanner.ts

import * as plugins from '../tsmdb.plugins.js';
import type { Document, IStoredDocument } from '../types/interfaces.js';
import { IndexEngine } from './IndexEngine.js';

/**
 * Query execution plan types
 */
export type TQueryPlanType = 'IXSCAN' | 'COLLSCAN' | 'FETCH' | 'IXSCAN_RANGE';

/**
 * Represents a query execution plan
 */
export interface IQueryPlan {
  /** The type of scan used */
  type: TQueryPlanType;
  /** Index name if using an index */
  indexName?: string;
  /** Index key specification */
  indexKey?: Record<string, 1 | -1 | string>;
  /** Whether the query can be fully satisfied by the index */
  indexCovering: boolean;
  /** Estimated selectivity (0-1, lower is more selective) */
  selectivity: number;
  /** Whether range operators are used */
  usesRange: boolean;
  /** Fields used from the index */
  indexFieldsUsed: string[];
  /** Filter conditions that must be applied post-index lookup */
  residualFilter?: Document;
  /** Explanation for debugging */
  explanation: string;
}

/**
 * Filter operator analysis
 */
interface IFilterOperatorInfo {
  field: string;
  operators: string[];
  equality: boolean;
  range: boolean;
  in: boolean;
  exists: boolean;
  regex: boolean;
  values: Record<string, any>;
}

/**
 * QueryPlanner - Analyzes queries and selects optimal execution plans
 */
export class QueryPlanner {
  private indexEngine: IndexEngine;

  constructor(indexEngine: IndexEngine) {
    this.indexEngine = indexEngine;
  }

  /**
   * Generate an execution plan for a query filter
   */
  async plan(filter: Document): Promise<IQueryPlan> {
    await this.indexEngine['initialize']();

    // Empty filter = full collection scan
    if (!filter || Object.keys(filter).length === 0) {
      return {
        type: 'COLLSCAN',
        indexCovering: false,
        selectivity: 1.0,
        usesRange: false,
        indexFieldsUsed: [],
        explanation: 'No filter specified, full collection scan required',
      };
    }

    // Analyze the filter
    const operatorInfo = this.analyzeFilter(filter);

    // Get available indexes
    const indexes = await this.indexEngine.listIndexes();

    // Score each index
    let bestPlan: IQueryPlan | null = null;
    let bestScore = -1;

    for (const index of indexes) {
      const plan = this.scoreIndex(index, operatorInfo, filter);
      if (plan.selectivity < 1.0) {
        const score = this.calculateScore(plan);
        if (score > bestScore) {
          bestScore = score;
          bestPlan = plan;
        }
      }
    }

    // If no suitable index found, fall back to collection scan
    if (!bestPlan || bestScore <= 0) {
      return {
        type: 'COLLSCAN',
        indexCovering: false,
        selectivity: 1.0,
        usesRange: false,
        indexFieldsUsed: [],
        explanation: 'No suitable index found for this query',
      };
    }

    return bestPlan;
  }

  /**
   * Analyze filter to extract operator information per field
   */
  private analyzeFilter(filter: Document, prefix = ''): Map<string, IFilterOperatorInfo> {
    const result = new Map<string, IFilterOperatorInfo>();

    for (const [key, value] of Object.entries(filter)) {
      // Skip logical operators at the top level
      if (key.startsWith('$')) {
        if (key === '$and' && Array.isArray(value)) {
          // Merge $and conditions
          for (const subFilter of value) {
            const subInfo = this.analyzeFilter(subFilter, prefix);
            for (const [field, info] of subInfo) {
              if (result.has(field)) {
                // Merge operators
                const existing = result.get(field)!;
                existing.operators.push(...info.operators);
                existing.equality = existing.equality || info.equality;
                existing.range = existing.range || info.range;
                existing.in = existing.in || info.in;
                Object.assign(existing.values, info.values);
              } else {
                result.set(field, info);
              }
            }
          }
        }
        continue;
      }

      const fullKey = prefix ? `${prefix}.${key}` : key;
      const info: IFilterOperatorInfo = {
        field: fullKey,
        operators: [],
        equality: false,
        range: false,
        in: false,
        exists: false,
        regex: false,
        values: {},
      };

      if (typeof value !== 'object' || value === null || value instanceof plugins.bson.ObjectId || value instanceof Date) {
        // Direct equality
        info.equality = true;
        info.operators.push('$eq');
        info.values['$eq'] = value;
      } else if (Array.isArray(value)) {
        // Array equality (rare, but possible)
        info.equality = true;
        info.operators.push('$eq');
        info.values['$eq'] = value;
      } else {
        // Operator object
        for (const [op, opValue] of Object.entries(value)) {
          if (op.startsWith('$')) {
            info.operators.push(op);
            info.values[op] = opValue;

            switch (op) {
              case '$eq':
                info.equality = true;
                break;
              case '$ne':
              case '$not':
                // These can use indexes but with low selectivity
                break;
              case '$in':
                info.in = true;
                break;
              case '$nin':
                // Can't efficiently use indexes
                break;
              case '$gt':
              case '$gte':
              case '$lt':
              case '$lte':
                info.range = true;
                break;
              case '$exists':
                info.exists = true;
                break;
              case '$regex':
                info.regex = true;
                break;
            }
          } else {
            // Nested object - recurse
            const nestedInfo = this.analyzeFilter({ [op]: opValue }, fullKey);
            for (const [nestedField, nestedFieldInfo] of nestedInfo) {
              result.set(nestedField, nestedFieldInfo);
            }
          }
        }
      }

      if (info.operators.length > 0) {
        result.set(fullKey, info);
      }
    }

    return result;
  }

  /**
   * Score an index for the given filter
   */
  private scoreIndex(
    index: { name: string; key: Record<string, any>; unique?: boolean; sparse?: boolean },
    operatorInfo: Map<string, IFilterOperatorInfo>,
    filter: Document
  ): IQueryPlan {
    const indexFields = Object.keys(index.key);
    const usedFields: string[] = [];
    let usesRange = false;
    let canUseIndex = true;
    let selectivity = 1.0;
    let residualFilter: Document | undefined;

    // Check each index field in order
    for (const field of indexFields) {
      const info = operatorInfo.get(field);
      if (!info) {
        // Index field not in filter - stop here
        break;
      }

      usedFields.push(field);

      // Calculate selectivity based on operator
      if (info.equality) {
        // Equality has high selectivity
        selectivity *= 0.01; // Assume 1% match
      } else if (info.in) {
        // $in selectivity depends on array size
        const inValues = info.values['$in'];
        if (Array.isArray(inValues)) {
          selectivity *= Math.min(0.5, inValues.length * 0.01);
        } else {
          selectivity *= 0.1;
        }
      } else if (info.range) {
        // Range queries have moderate selectivity
        selectivity *= 0.25;
        usesRange = true;
        // After range, can't use more index fields efficiently
        break;
      } else if (info.exists) {
        // $exists can use sparse indexes
        selectivity *= 0.5;
      } else {
        // Other operators may not be indexable
        canUseIndex = false;
        break;
      }
    }

    if (!canUseIndex || usedFields.length === 0) {
      return {
        type: 'COLLSCAN',
        indexCovering: false,
        selectivity: 1.0,
        usesRange: false,
        indexFieldsUsed: [],
        explanation: `Index ${index.name} cannot be used for this query`,
      };
    }

    // Build residual filter for conditions not covered by index
    const coveredFields = new Set(usedFields);
    const residualConditions: Record<string, any> = {};
    for (const [field, info] of operatorInfo) {
      if (!coveredFields.has(field)) {
        // This field isn't covered by the index
        if (info.equality) {
          residualConditions[field] = info.values['$eq'];
        } else {
          residualConditions[field] = info.values;
        }
      }
    }

    if (Object.keys(residualConditions).length > 0) {
      residualFilter = residualConditions;
    }

    // Unique indexes have better selectivity for equality
    if (index.unique && usedFields.length === indexFields.length) {
      selectivity = Math.min(selectivity, 0.001); // At most 1 document
    }

    return {
      type: usesRange ? 'IXSCAN_RANGE' : 'IXSCAN',
      indexName: index.name,
      indexKey: index.key,
      indexCovering: Object.keys(residualConditions).length === 0,
      selectivity,
      usesRange,
      indexFieldsUsed: usedFields,
      residualFilter,
      explanation: `Using index ${index.name} on fields [${usedFields.join(', ')}]`,
    };
  }

  /**
   * Calculate overall score for a plan (higher is better)
   */
  private calculateScore(plan: IQueryPlan): number {
    let score = 0;

    // Lower selectivity is better (fewer documents to fetch)
    score += (1 - plan.selectivity) * 100;

    // Index covering queries are best
    if (plan.indexCovering) {
      score += 50;
    }

    // More index fields used is better
    score += plan.indexFieldsUsed.length * 10;

    // Equality scans are better than range scans
    if (!plan.usesRange) {
      score += 20;
    }

    return score;
  }

  /**
   * Explain a query - returns detailed plan information
   */
  async explain(filter: Document): Promise<{
    queryPlanner: {
      plannerVersion: number;
      namespace: string;
      indexFilterSet: boolean;
      winningPlan: IQueryPlan;
      rejectedPlans: IQueryPlan[];
    };
  }> {
    await this.indexEngine['initialize']();

    // Analyze the filter
    const operatorInfo = this.analyzeFilter(filter);

    // Get available indexes
    const indexes = await this.indexEngine.listIndexes();

    // Score all indexes
    const plans: IQueryPlan[] = [];

    for (const index of indexes) {
      const plan = this.scoreIndex(index, operatorInfo, filter);
      plans.push(plan);
    }

    // Add collection scan as fallback
    plans.push({
      type: 'COLLSCAN',
      indexCovering: false,
      selectivity: 1.0,
      usesRange: false,
      indexFieldsUsed: [],
      explanation: 'Full collection scan',
    });

    // Sort by score (best first)
    plans.sort((a, b) => this.calculateScore(b) - this.calculateScore(a));

    return {
      queryPlanner: {
        plannerVersion: 1,
        namespace: `${this.indexEngine['dbName']}.${this.indexEngine['collName']}`,
        indexFilterSet: false,
        winningPlan: plans[0],
        rejectedPlans: plans.slice(1),
      },
    };
  }
}
BREAKING CHANGE(storage,engine,server): add session & transaction management, index/query planner, WAL and checksum support; integrate index-accelerated queries and update storage API (findByIds) to enable index optimizations 2026-02-01 16:02:03 +00:00			`import * as plugins from '../tsmdb.plugins.js';`
			`import type { Document, IStoredDocument } from '../types/interfaces.js';`
			`import { IndexEngine } from './IndexEngine.js';`

			`/**`
			`* Query execution plan types`
			`*/`
			`export type TQueryPlanType = 'IXSCAN' \| 'COLLSCAN' \| 'FETCH' \| 'IXSCAN_RANGE';`

			`/**`
			`* Represents a query execution plan`
			`*/`
			`export interface IQueryPlan {`
			`/** The type of scan used */`
			`type: TQueryPlanType;`
			`/** Index name if using an index */`
			`indexName?: string;`
			`/** Index key specification */`
			`indexKey?: Record<string, 1 \| -1 \| string>;`
			`/** Whether the query can be fully satisfied by the index */`
			`indexCovering: boolean;`
			`/** Estimated selectivity (0-1, lower is more selective) */`
			`selectivity: number;`
			`/** Whether range operators are used */`
			`usesRange: boolean;`
			`/** Fields used from the index */`
			`indexFieldsUsed: string[];`
			`/** Filter conditions that must be applied post-index lookup */`
			`residualFilter?: Document;`
			`/** Explanation for debugging */`
			`explanation: string;`
			`}`

			`/**`
			`* Filter operator analysis`
			`*/`
			`interface IFilterOperatorInfo {`
			`field: string;`
			`operators: string[];`
			`equality: boolean;`
			`range: boolean;`
			`in: boolean;`
			`exists: boolean;`
			`regex: boolean;`
			`values: Record<string, any>;`
			`}`

			`/**`
			`* QueryPlanner - Analyzes queries and selects optimal execution plans`
			`*/`
			`export class QueryPlanner {`
			`private indexEngine: IndexEngine;`

			`constructor(indexEngine: IndexEngine) {`
			`this.indexEngine = indexEngine;`
			`}`

			`/**`
			`* Generate an execution plan for a query filter`
			`*/`
			`async plan(filter: Document): Promise<IQueryPlan> {`
			`await this.indexEngine['initialize']();`

			`// Empty filter = full collection scan`
			`if (!filter \|\| Object.keys(filter).length === 0) {`
			`return {`
			`type: 'COLLSCAN',`
			`indexCovering: false,`
			`selectivity: 1.0,`
			`usesRange: false,`
			`indexFieldsUsed: [],`
			`explanation: 'No filter specified, full collection scan required',`
			`};`
			`}`

			`// Analyze the filter`
			`const operatorInfo = this.analyzeFilter(filter);`

			`// Get available indexes`
			`const indexes = await this.indexEngine.listIndexes();`

			`// Score each index`
			`let bestPlan: IQueryPlan \| null = null;`
			`let bestScore = -1;`

			`for (const index of indexes) {`
			`const plan = this.scoreIndex(index, operatorInfo, filter);`
			`if (plan.selectivity < 1.0) {`
			`const score = this.calculateScore(plan);`
			`if (score > bestScore) {`
			`bestScore = score;`
			`bestPlan = plan;`
			`}`
			`}`
			`}`

			`// If no suitable index found, fall back to collection scan`
			`if (!bestPlan \|\| bestScore <= 0) {`
			`return {`
			`type: 'COLLSCAN',`
			`indexCovering: false,`
			`selectivity: 1.0,`
			`usesRange: false,`
			`indexFieldsUsed: [],`
			`explanation: 'No suitable index found for this query',`
			`};`
			`}`

			`return bestPlan;`
			`}`

			`/**`
			`* Analyze filter to extract operator information per field`
			`*/`
			`private analyzeFilter(filter: Document, prefix = ''): Map<string, IFilterOperatorInfo> {`
			`const result = new Map<string, IFilterOperatorInfo>();`

			`for (const [key, value] of Object.entries(filter)) {`
			`// Skip logical operators at the top level`
			`if (key.startsWith('$')) {`
			`if (key === '$and' && Array.isArray(value)) {`
			`// Merge $and conditions`
			`for (const subFilter of value) {`
			`const subInfo = this.analyzeFilter(subFilter, prefix);`
			`for (const [field, info] of subInfo) {`
			`if (result.has(field)) {`
			`// Merge operators`
			`const existing = result.get(field)!;`
			`existing.operators.push(...info.operators);`
			`existing.equality = existing.equality \|\| info.equality;`
			`existing.range = existing.range \|\| info.range;`
			`existing.in = existing.in \|\| info.in;`
			`Object.assign(existing.values, info.values);`
			`} else {`
			`result.set(field, info);`
			`}`
			`}`
			`}`
			`}`
			`continue;`
			`}`

			const fullKey = prefix ? `${prefix}.${key}` : key;
			`const info: IFilterOperatorInfo = {`
			`field: fullKey,`
			`operators: [],`
			`equality: false,`
			`range: false,`
			`in: false,`
			`exists: false,`
			`regex: false,`
			`values: {},`
			`};`

			`if (typeof value !== 'object' \|\| value === null \|\| value instanceof plugins.bson.ObjectId \|\| value instanceof Date) {`
			`// Direct equality`
			`info.equality = true;`
			`info.operators.push('$eq');`
			`info.values['$eq'] = value;`
			`} else if (Array.isArray(value)) {`
			`// Array equality (rare, but possible)`
			`info.equality = true;`
			`info.operators.push('$eq');`
			`info.values['$eq'] = value;`
			`} else {`
			`// Operator object`
			`for (const [op, opValue] of Object.entries(value)) {`
			`if (op.startsWith('$')) {`
			`info.operators.push(op);`
			`info.values[op] = opValue;`

			`switch (op) {`
			`case '$eq':`
			`info.equality = true;`
			`break;`
			`case '$ne':`
			`case '$not':`
			`// These can use indexes but with low selectivity`
			`break;`
			`case '$in':`
			`info.in = true;`
			`break;`
			`case '$nin':`
			`// Can't efficiently use indexes`
			`break;`
			`case '$gt':`
			`case '$gte':`
			`case '$lt':`
			`case '$lte':`
			`info.range = true;`
			`break;`
			`case '$exists':`
			`info.exists = true;`
			`break;`
			`case '$regex':`
			`info.regex = true;`
			`break;`
			`}`
			`} else {`
			`// Nested object - recurse`
			`const nestedInfo = this.analyzeFilter({ [op]: opValue }, fullKey);`
			`for (const [nestedField, nestedFieldInfo] of nestedInfo) {`
			`result.set(nestedField, nestedFieldInfo);`
			`}`
			`}`
			`}`
			`}`

			`if (info.operators.length > 0) {`
			`result.set(fullKey, info);`
			`}`
			`}`

			`return result;`
			`}`

			`/**`
			`* Score an index for the given filter`
			`*/`
			`private scoreIndex(`
			`index: { name: string; key: Record<string, any>; unique?: boolean; sparse?: boolean },`
			`operatorInfo: Map<string, IFilterOperatorInfo>,`
			`filter: Document`
			`): IQueryPlan {`
			`const indexFields = Object.keys(index.key);`
			`const usedFields: string[] = [];`
			`let usesRange = false;`
			`let canUseIndex = true;`
			`let selectivity = 1.0;`
			`let residualFilter: Document \| undefined;`

			`// Check each index field in order`
			`for (const field of indexFields) {`
			`const info = operatorInfo.get(field);`
			`if (!info) {`
			`// Index field not in filter - stop here`
			`break;`
			`}`

			`usedFields.push(field);`

			`// Calculate selectivity based on operator`
			`if (info.equality) {`
			`// Equality has high selectivity`
			`selectivity *= 0.01; // Assume 1% match`
			`} else if (info.in) {`
			`// $in selectivity depends on array size`
			`const inValues = info.values['$in'];`
			`if (Array.isArray(inValues)) {`
			`selectivity = Math.min(0.5, inValues.length 0.01);`
			`} else {`
			`selectivity *= 0.1;`
			`}`
			`} else if (info.range) {`
			`// Range queries have moderate selectivity`
			`selectivity *= 0.25;`
			`usesRange = true;`
			`// After range, can't use more index fields efficiently`
			`break;`
			`} else if (info.exists) {`
			`// $exists can use sparse indexes`
			`selectivity *= 0.5;`
			`} else {`
			`// Other operators may not be indexable`
			`canUseIndex = false;`
			`break;`
			`}`
			`}`

			`if (!canUseIndex \|\| usedFields.length === 0) {`
			`return {`
			`type: 'COLLSCAN',`
			`indexCovering: false,`
			`selectivity: 1.0,`
			`usesRange: false,`
			`indexFieldsUsed: [],`
			explanation: `Index ${index.name} cannot be used for this query`,
			`};`
			`}`

			`// Build residual filter for conditions not covered by index`
			`const coveredFields = new Set(usedFields);`
			`const residualConditions: Record<string, any> = {};`
			`for (const [field, info] of operatorInfo) {`
			`if (!coveredFields.has(field)) {`
			`// This field isn't covered by the index`
			`if (info.equality) {`
			`residualConditions[field] = info.values['$eq'];`
			`} else {`
			`residualConditions[field] = info.values;`
			`}`
			`}`
			`}`

			`if (Object.keys(residualConditions).length > 0) {`
			`residualFilter = residualConditions;`
			`}`

			`// Unique indexes have better selectivity for equality`
			`if (index.unique && usedFields.length === indexFields.length) {`
			`selectivity = Math.min(selectivity, 0.001); // At most 1 document`
			`}`

			`return {`
			`type: usesRange ? 'IXSCAN_RANGE' : 'IXSCAN',`
			`indexName: index.name,`
			`indexKey: index.key,`
			`indexCovering: Object.keys(residualConditions).length === 0,`
			`selectivity,`
			`usesRange,`
			`indexFieldsUsed: usedFields,`
			`residualFilter,`
			explanation: `Using index ${index.name} on fields [${usedFields.join(', ')}]`,
			`};`
			`}`

			`/**`
			`* Calculate overall score for a plan (higher is better)`
			`*/`
			`private calculateScore(plan: IQueryPlan): number {`
			`let score = 0;`

			`// Lower selectivity is better (fewer documents to fetch)`
			`score += (1 - plan.selectivity) * 100;`

			`// Index covering queries are best`
			`if (plan.indexCovering) {`
			`score += 50;`
			`}`

			`// More index fields used is better`
			`score += plan.indexFieldsUsed.length * 10;`

			`// Equality scans are better than range scans`
			`if (!plan.usesRange) {`
			`score += 20;`
			`}`

			`return score;`
			`}`

			`/**`
			`* Explain a query - returns detailed plan information`
			`*/`
			`async explain(filter: Document): Promise<{`
			`queryPlanner: {`
			`plannerVersion: number;`
			`namespace: string;`
			`indexFilterSet: boolean;`
			`winningPlan: IQueryPlan;`
			`rejectedPlans: IQueryPlan[];`
			`};`
			`}> {`
			`await this.indexEngine['initialize']();`

			`// Analyze the filter`
			`const operatorInfo = this.analyzeFilter(filter);`

			`// Get available indexes`
			`const indexes = await this.indexEngine.listIndexes();`

			`// Score all indexes`
			`const plans: IQueryPlan[] = [];`

			`for (const index of indexes) {`
			`const plan = this.scoreIndex(index, operatorInfo, filter);`
			`plans.push(plan);`
			`}`

			`// Add collection scan as fallback`
			`plans.push({`
			`type: 'COLLSCAN',`
			`indexCovering: false,`
			`selectivity: 1.0,`
			`usesRange: false,`
			`indexFieldsUsed: [],`
			`explanation: 'Full collection scan',`
			`});`

			`// Sort by score (best first)`
			`plans.sort((a, b) => this.calculateScore(b) - this.calculateScore(a));`

			`return {`
			`queryPlanner: {`
			`plannerVersion: 1,`
			namespace: `${this.indexEngine['dbName']}.${this.indexEngine['collName']}`,
			`indexFilterSet: false,`
			`winningPlan: plans[0],`
			`rejectedPlans: plans.slice(1),`
			`},`
			`};`
			`}`
			`}`