feat(tsmdb): implement TsmDB Mongo-wire-compatible server, add storage/engine modules and reorganize exports

2026-02-01 23:33:35 +00:00
parent 678bf15eb4
commit fff77fbd8e
40 changed files with 261 additions and 95 deletions
--- a/ts/ts_tsmdb/engine/QueryPlanner.ts
+++ b/ts/ts_tsmdb/engine/QueryPlanner.ts
@@ -0,0 +1,393 @@
+import * as plugins from '../plugins.js';
+import type { Document, IStoredDocument } from '../types/interfaces.js';
+import { IndexEngine } from './IndexEngine.js';
+
+/**
+ * Query execution plan types
+ */
+export type TQueryPlanType = 'IXSCAN' | 'COLLSCAN' | 'FETCH' | 'IXSCAN_RANGE';
+
+/**
+ * Represents a query execution plan
+ */
+export interface IQueryPlan {
+  /** The type of scan used */
+  type: TQueryPlanType;
+  /** Index name if using an index */
+  indexName?: string;
+  /** Index key specification */
+  indexKey?: Record<string, 1 | -1 | string>;
+  /** Whether the query can be fully satisfied by the index */
+  indexCovering: boolean;
+  /** Estimated selectivity (0-1, lower is more selective) */
+  selectivity: number;
+  /** Whether range operators are used */
+  usesRange: boolean;
+  /** Fields used from the index */
+  indexFieldsUsed: string[];
+  /** Filter conditions that must be applied post-index lookup */
+  residualFilter?: Document;
+  /** Explanation for debugging */
+  explanation: string;
+}
+
+/**
+ * Filter operator analysis
+ */
+interface IFilterOperatorInfo {
+  field: string;
+  operators: string[];
+  equality: boolean;
+  range: boolean;
+  in: boolean;
+  exists: boolean;
+  regex: boolean;
+  values: Record<string, any>;
+}
+
+/**
+ * QueryPlanner - Analyzes queries and selects optimal execution plans
+ */
+export class QueryPlanner {
+  private indexEngine: IndexEngine;
+
+  constructor(indexEngine: IndexEngine) {
+    this.indexEngine = indexEngine;
+  }
+
+  /**
+   * Generate an execution plan for a query filter
+   */
+  async plan(filter: Document): Promise<IQueryPlan> {
+    await this.indexEngine['initialize']();
+
+    // Empty filter = full collection scan
+    if (!filter || Object.keys(filter).length === 0) {
+      return {
+        type: 'COLLSCAN',
+        indexCovering: false,
+        selectivity: 1.0,
+        usesRange: false,
+        indexFieldsUsed: [],
+        explanation: 'No filter specified, full collection scan required',
+      };
+    }
+
+    // Analyze the filter
+    const operatorInfo = this.analyzeFilter(filter);
+
+    // Get available indexes
+    const indexes = await this.indexEngine.listIndexes();
+
+    // Score each index
+    let bestPlan: IQueryPlan | null = null;
+    let bestScore = -1;
+
+    for (const index of indexes) {
+      const plan = this.scoreIndex(index, operatorInfo, filter);
+      if (plan.selectivity < 1.0) {
+        const score = this.calculateScore(plan);
+        if (score > bestScore) {
+          bestScore = score;
+          bestPlan = plan;
+        }
+      }
+    }
+
+    // If no suitable index found, fall back to collection scan
+    if (!bestPlan || bestScore <= 0) {
+      return {
+        type: 'COLLSCAN',
+        indexCovering: false,
+        selectivity: 1.0,
+        usesRange: false,
+        indexFieldsUsed: [],
+        explanation: 'No suitable index found for this query',
+      };
+    }
+
+    return bestPlan;
+  }
+
+  /**
+   * Analyze filter to extract operator information per field
+   */
+  private analyzeFilter(filter: Document, prefix = ''): Map<string, IFilterOperatorInfo> {
+    const result = new Map<string, IFilterOperatorInfo>();
+
+    for (const [key, value] of Object.entries(filter)) {
+      // Skip logical operators at the top level
+      if (key.startsWith('$')) {
+        if (key === '$and' && Array.isArray(value)) {
+          // Merge $and conditions
+          for (const subFilter of value) {
+            const subInfo = this.analyzeFilter(subFilter, prefix);
+            for (const [field, info] of subInfo) {
+              if (result.has(field)) {
+                // Merge operators
+                const existing = result.get(field)!;
+                existing.operators.push(...info.operators);
+                existing.equality = existing.equality || info.equality;
+                existing.range = existing.range || info.range;
+                existing.in = existing.in || info.in;
+                Object.assign(existing.values, info.values);
+              } else {
+                result.set(field, info);
+              }
+            }
+          }
+        }
+        continue;
+      }
+
+      const fullKey = prefix ? `${prefix}.${key}` : key;
+      const info: IFilterOperatorInfo = {
+        field: fullKey,
+        operators: [],
+        equality: false,
+        range: false,
+        in: false,
+        exists: false,
+        regex: false,
+        values: {},
+      };
+
+      if (typeof value !== 'object' || value === null || value instanceof plugins.bson.ObjectId || value instanceof Date) {
+        // Direct equality
+        info.equality = true;
+        info.operators.push('$eq');
+        info.values['$eq'] = value;
+      } else if (Array.isArray(value)) {
+        // Array equality (rare, but possible)
+        info.equality = true;
+        info.operators.push('$eq');
+        info.values['$eq'] = value;
+      } else {
+        // Operator object
+        for (const [op, opValue] of Object.entries(value)) {
+          if (op.startsWith('$')) {
+            info.operators.push(op);
+            info.values[op] = opValue;
+
+            switch (op) {
+              case '$eq':
+                info.equality = true;
+                break;
+              case '$ne':
+              case '$not':
+                // These can use indexes but with low selectivity
+                break;
+              case '$in':
+                info.in = true;
+                break;
+              case '$nin':
+                // Can't efficiently use indexes
+                break;
+              case '$gt':
+              case '$gte':
+              case '$lt':
+              case '$lte':
+                info.range = true;
+                break;
+              case '$exists':
+                info.exists = true;
+                break;
+              case '$regex':
+                info.regex = true;
+                break;
+            }
+          } else {
+            // Nested object - recurse
+            const nestedInfo = this.analyzeFilter({ [op]: opValue }, fullKey);
+            for (const [nestedField, nestedFieldInfo] of nestedInfo) {
+              result.set(nestedField, nestedFieldInfo);
+            }
+          }
+        }
+      }
+
+      if (info.operators.length > 0) {
+        result.set(fullKey, info);
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Score an index for the given filter
+   */
+  private scoreIndex(
+    index: { name: string; key: Record<string, any>; unique?: boolean; sparse?: boolean },
+    operatorInfo: Map<string, IFilterOperatorInfo>,
+    filter: Document
+  ): IQueryPlan {
+    const indexFields = Object.keys(index.key);
+    const usedFields: string[] = [];
+    let usesRange = false;
+    let canUseIndex = true;
+    let selectivity = 1.0;
+    let residualFilter: Document | undefined;
+
+    // Check each index field in order
+    for (const field of indexFields) {
+      const info = operatorInfo.get(field);
+      if (!info) {
+        // Index field not in filter - stop here
+        break;
+      }
+
+      usedFields.push(field);
+
+      // Calculate selectivity based on operator
+      if (info.equality) {
+        // Equality has high selectivity
+        selectivity *= 0.01; // Assume 1% match
+      } else if (info.in) {
+        // $in selectivity depends on array size
+        const inValues = info.values['$in'];
+        if (Array.isArray(inValues)) {
+          selectivity *= Math.min(0.5, inValues.length * 0.01);
+        } else {
+          selectivity *= 0.1;
+        }
+      } else if (info.range) {
+        // Range queries have moderate selectivity
+        selectivity *= 0.25;
+        usesRange = true;
+        // After range, can't use more index fields efficiently
+        break;
+      } else if (info.exists) {
+        // $exists can use sparse indexes
+        selectivity *= 0.5;
+      } else {
+        // Other operators may not be indexable
+        canUseIndex = false;
+        break;
+      }
+    }
+
+    if (!canUseIndex || usedFields.length === 0) {
+      return {
+        type: 'COLLSCAN',
+        indexCovering: false,
+        selectivity: 1.0,
+        usesRange: false,
+        indexFieldsUsed: [],
+        explanation: `Index ${index.name} cannot be used for this query`,
+      };
+    }
+
+    // Build residual filter for conditions not covered by index
+    const coveredFields = new Set(usedFields);
+    const residualConditions: Record<string, any> = {};
+    for (const [field, info] of operatorInfo) {
+      if (!coveredFields.has(field)) {
+        // This field isn't covered by the index
+        if (info.equality) {
+          residualConditions[field] = info.values['$eq'];
+        } else {
+          residualConditions[field] = info.values;
+        }
+      }
+    }
+
+    if (Object.keys(residualConditions).length > 0) {
+      residualFilter = residualConditions;
+    }
+
+    // Unique indexes have better selectivity for equality
+    if (index.unique && usedFields.length === indexFields.length) {
+      selectivity = Math.min(selectivity, 0.001); // At most 1 document
+    }
+
+    return {
+      type: usesRange ? 'IXSCAN_RANGE' : 'IXSCAN',
+      indexName: index.name,
+      indexKey: index.key,
+      indexCovering: Object.keys(residualConditions).length === 0,
+      selectivity,
+      usesRange,
+      indexFieldsUsed: usedFields,
+      residualFilter,
+      explanation: `Using index ${index.name} on fields [${usedFields.join(', ')}]`,
+    };
+  }
+
+  /**
+   * Calculate overall score for a plan (higher is better)
+   */
+  private calculateScore(plan: IQueryPlan): number {
+    let score = 0;
+
+    // Lower selectivity is better (fewer documents to fetch)
+    score += (1 - plan.selectivity) * 100;
+
+    // Index covering queries are best
+    if (plan.indexCovering) {
+      score += 50;
+    }
+
+    // More index fields used is better
+    score += plan.indexFieldsUsed.length * 10;
+
+    // Equality scans are better than range scans
+    if (!plan.usesRange) {
+      score += 20;
+    }
+
+    return score;
+  }
+
+  /**
+   * Explain a query - returns detailed plan information
+   */
+  async explain(filter: Document): Promise<{
+    queryPlanner: {
+      plannerVersion: number;
+      namespace: string;
+      indexFilterSet: boolean;
+      winningPlan: IQueryPlan;
+      rejectedPlans: IQueryPlan[];
+    };
+  }> {
+    await this.indexEngine['initialize']();
+
+    // Analyze the filter
+    const operatorInfo = this.analyzeFilter(filter);
+
+    // Get available indexes
+    const indexes = await this.indexEngine.listIndexes();
+
+    // Score all indexes
+    const plans: IQueryPlan[] = [];
+
+    for (const index of indexes) {
+      const plan = this.scoreIndex(index, operatorInfo, filter);
+      plans.push(plan);
+    }
+
+    // Add collection scan as fallback
+    plans.push({
+      type: 'COLLSCAN',
+      indexCovering: false,
+      selectivity: 1.0,
+      usesRange: false,
+      indexFieldsUsed: [],
+      explanation: 'Full collection scan',
+    });
+
+    // Sort by score (best first)
+    plans.sort((a, b) => this.calculateScore(b) - this.calculateScore(a));
+
+    return {
+      queryPlanner: {
+        plannerVersion: 1,
+        namespace: `${this.indexEngine['dbName']}.${this.indexEngine['collName']}`,
+        indexFilterSet: false,
+        winningPlan: plans[0],
+        rejectedPlans: plans.slice(1),
+      },
+    };
+  }
+}