import * as plugins from '../plugins.js'; import type { IStorageAdapter } from '../storage/IStorageAdapter.js'; // Simple B-Tree implementation for range queries // Since sorted-btree has ESM/CJS interop issues, we use a simple custom implementation class SimpleBTree { private entries: Map = new Map(); private sortedKeys: K[] = []; private comparator: (a: K, b: K) => number; constructor(_unused?: undefined, comparator?: (a: K, b: K) => number) { this.comparator = comparator || ((a: K, b: K) => { if (a < b) return -1; if (a > b) return 1; return 0; }); } private keyToString(key: K): string { return JSON.stringify(key); } set(key: K, value: V): boolean { const keyStr = this.keyToString(key); const existed = this.entries.has(keyStr); this.entries.set(keyStr, { key, value }); if (!existed) { // Insert in sorted order const idx = this.sortedKeys.findIndex(k => this.comparator(k, key) > 0); if (idx === -1) { this.sortedKeys.push(key); } else { this.sortedKeys.splice(idx, 0, key); } } return !existed; } get(key: K): V | undefined { const entry = this.entries.get(this.keyToString(key)); return entry?.value; } delete(key: K): boolean { const keyStr = this.keyToString(key); if (this.entries.has(keyStr)) { this.entries.delete(keyStr); const idx = this.sortedKeys.findIndex(k => this.comparator(k, key) === 0); if (idx !== -1) { this.sortedKeys.splice(idx, 1); } return true; } return false; } forRange( lowKey: K | undefined, highKey: K | undefined, lowInclusive: boolean, highInclusive: boolean, callback: (value: V, key: K) => void ): void { for (const key of this.sortedKeys) { // Check low bound if (lowKey !== undefined) { const cmp = this.comparator(key, lowKey); if (cmp < 0) continue; if (cmp === 0 && !lowInclusive) continue; } // Check high bound if (highKey !== undefined) { const cmp = this.comparator(key, highKey); if (cmp > 0) break; if (cmp === 0 && !highInclusive) break; } const entry = this.entries.get(this.keyToString(key)); if (entry) { callback(entry.value, key); } } } } import type { Document, IStoredDocument, IIndexSpecification, IIndexInfo, ICreateIndexOptions, } from '../types/interfaces.js'; import { TsmdbDuplicateKeyError, TsmdbIndexError } from '../errors/TsmdbErrors.js'; import { QueryEngine } from './QueryEngine.js'; /** * Comparator for B-Tree that handles mixed types consistently */ function indexKeyComparator(a: any, b: any): number { // Handle null/undefined if (a === null || a === undefined) { if (b === null || b === undefined) return 0; return -1; } if (b === null || b === undefined) return 1; // Handle arrays (compound keys) if (Array.isArray(a) && Array.isArray(b)) { for (let i = 0; i < Math.max(a.length, b.length); i++) { const cmp = indexKeyComparator(a[i], b[i]); if (cmp !== 0) return cmp; } return 0; } // Handle ObjectId if (a instanceof plugins.bson.ObjectId && b instanceof plugins.bson.ObjectId) { return a.toHexString().localeCompare(b.toHexString()); } // Handle Date if (a instanceof Date && b instanceof Date) { return a.getTime() - b.getTime(); } // Handle different types - use type ordering (null < number < string < object) const typeOrder = (v: any): number => { if (v === null || v === undefined) return 0; if (typeof v === 'number') return 1; if (typeof v === 'string') return 2; if (typeof v === 'boolean') return 3; if (v instanceof Date) return 4; if (v instanceof plugins.bson.ObjectId) return 5; return 6; }; const typeA = typeOrder(a); const typeB = typeOrder(b); if (typeA !== typeB) return typeA - typeB; // Same type comparison if (typeof a === 'number') return a - b; if (typeof a === 'string') return a.localeCompare(b); if (typeof a === 'boolean') return (a ? 1 : 0) - (b ? 1 : 0); // Fallback to string comparison return String(a).localeCompare(String(b)); } /** * Index data structure using B-Tree for range queries */ interface IIndexData { name: string; key: Record; unique: boolean; sparse: boolean; expireAfterSeconds?: number; // B-Tree for ordered index lookups (supports range queries) btree: SimpleBTree>; // Hash map for fast equality lookups hashMap: Map>; } /** * Index engine for managing indexes and query optimization */ export class IndexEngine { private dbName: string; private collName: string; private storage: IStorageAdapter; private indexes: Map = new Map(); private initialized = false; constructor(dbName: string, collName: string, storage: IStorageAdapter) { this.dbName = dbName; this.collName = collName; this.storage = storage; } /** * Initialize indexes from storage */ async initialize(): Promise { if (this.initialized) return; const storedIndexes = await this.storage.getIndexes(this.dbName, this.collName); const documents = await this.storage.findAll(this.dbName, this.collName); for (const indexSpec of storedIndexes) { const indexData: IIndexData = { name: indexSpec.name, key: indexSpec.key, unique: indexSpec.unique || false, sparse: indexSpec.sparse || false, expireAfterSeconds: indexSpec.expireAfterSeconds, btree: new SimpleBTree>(undefined, indexKeyComparator), hashMap: new Map(), }; // Build index entries for (const doc of documents) { const keyValue = this.extractKeyValue(doc, indexSpec.key); if (keyValue !== null || !indexData.sparse) { const keyStr = JSON.stringify(keyValue); // Add to hash map if (!indexData.hashMap.has(keyStr)) { indexData.hashMap.set(keyStr, new Set()); } indexData.hashMap.get(keyStr)!.add(doc._id.toHexString()); // Add to B-tree const existing = indexData.btree.get(keyValue); if (existing) { existing.add(doc._id.toHexString()); } else { indexData.btree.set(keyValue, new Set([doc._id.toHexString()])); } } } this.indexes.set(indexSpec.name, indexData); } this.initialized = true; } /** * Create a new index */ async createIndex( key: Record, options?: ICreateIndexOptions ): Promise { await this.initialize(); // Generate index name if not provided const name = options?.name || this.generateIndexName(key); // Check if index already exists if (this.indexes.has(name)) { return name; } // Create index data structure const indexData: IIndexData = { name, key: key as Record, unique: options?.unique || false, sparse: options?.sparse || false, expireAfterSeconds: options?.expireAfterSeconds, btree: new SimpleBTree>(undefined, indexKeyComparator), hashMap: new Map(), }; // Build index from existing documents const documents = await this.storage.findAll(this.dbName, this.collName); for (const doc of documents) { const keyValue = this.extractKeyValue(doc, key); if (keyValue === null && indexData.sparse) { continue; } const keyStr = JSON.stringify(keyValue); if (indexData.unique && indexData.hashMap.has(keyStr)) { throw new TsmdbDuplicateKeyError( `E11000 duplicate key error index: ${this.dbName}.${this.collName}.$${name}`, key as Record, keyValue ); } // Add to hash map if (!indexData.hashMap.has(keyStr)) { indexData.hashMap.set(keyStr, new Set()); } indexData.hashMap.get(keyStr)!.add(doc._id.toHexString()); // Add to B-tree const existing = indexData.btree.get(keyValue); if (existing) { existing.add(doc._id.toHexString()); } else { indexData.btree.set(keyValue, new Set([doc._id.toHexString()])); } } // Store index this.indexes.set(name, indexData); await this.storage.saveIndex(this.dbName, this.collName, name, { key, unique: options?.unique, sparse: options?.sparse, expireAfterSeconds: options?.expireAfterSeconds, }); return name; } /** * Drop an index */ async dropIndex(name: string): Promise { await this.initialize(); if (name === '_id_') { throw new TsmdbIndexError('cannot drop _id index'); } if (!this.indexes.has(name)) { throw new TsmdbIndexError(`index not found: ${name}`); } this.indexes.delete(name); await this.storage.dropIndex(this.dbName, this.collName, name); } /** * Drop all indexes except _id */ async dropAllIndexes(): Promise { await this.initialize(); const names = Array.from(this.indexes.keys()).filter(n => n !== '_id_'); for (const name of names) { this.indexes.delete(name); await this.storage.dropIndex(this.dbName, this.collName, name); } } /** * List all indexes */ async listIndexes(): Promise { await this.initialize(); return Array.from(this.indexes.values()).map(idx => ({ v: 2, key: idx.key, name: idx.name, unique: idx.unique || undefined, sparse: idx.sparse || undefined, expireAfterSeconds: idx.expireAfterSeconds, })); } /** * Check if an index exists */ async indexExists(name: string): Promise { await this.initialize(); return this.indexes.has(name); } /** * Update index entries after document insert */ async onInsert(doc: IStoredDocument): Promise { await this.initialize(); for (const [name, indexData] of this.indexes) { const keyValue = this.extractKeyValue(doc, indexData.key); if (keyValue === null && indexData.sparse) { continue; } const keyStr = JSON.stringify(keyValue); // Check unique constraint if (indexData.unique) { const existing = indexData.hashMap.get(keyStr); if (existing && existing.size > 0) { throw new TsmdbDuplicateKeyError( `E11000 duplicate key error collection: ${this.dbName}.${this.collName} index: ${name}`, indexData.key as Record, keyValue ); } } // Add to hash map if (!indexData.hashMap.has(keyStr)) { indexData.hashMap.set(keyStr, new Set()); } indexData.hashMap.get(keyStr)!.add(doc._id.toHexString()); // Add to B-tree const btreeSet = indexData.btree.get(keyValue); if (btreeSet) { btreeSet.add(doc._id.toHexString()); } else { indexData.btree.set(keyValue, new Set([doc._id.toHexString()])); } } } /** * Update index entries after document update */ async onUpdate(oldDoc: IStoredDocument, newDoc: IStoredDocument): Promise { await this.initialize(); for (const [name, indexData] of this.indexes) { const oldKeyValue = this.extractKeyValue(oldDoc, indexData.key); const newKeyValue = this.extractKeyValue(newDoc, indexData.key); const oldKeyStr = JSON.stringify(oldKeyValue); const newKeyStr = JSON.stringify(newKeyValue); // Remove old entry if key changed if (oldKeyStr !== newKeyStr) { if (oldKeyValue !== null || !indexData.sparse) { // Remove from hash map const oldHashSet = indexData.hashMap.get(oldKeyStr); if (oldHashSet) { oldHashSet.delete(oldDoc._id.toHexString()); if (oldHashSet.size === 0) { indexData.hashMap.delete(oldKeyStr); } } // Remove from B-tree const oldBtreeSet = indexData.btree.get(oldKeyValue); if (oldBtreeSet) { oldBtreeSet.delete(oldDoc._id.toHexString()); if (oldBtreeSet.size === 0) { indexData.btree.delete(oldKeyValue); } } } // Add new entry if (newKeyValue !== null || !indexData.sparse) { // Check unique constraint if (indexData.unique) { const existing = indexData.hashMap.get(newKeyStr); if (existing && existing.size > 0) { throw new TsmdbDuplicateKeyError( `E11000 duplicate key error collection: ${this.dbName}.${this.collName} index: ${name}`, indexData.key as Record, newKeyValue ); } } // Add to hash map if (!indexData.hashMap.has(newKeyStr)) { indexData.hashMap.set(newKeyStr, new Set()); } indexData.hashMap.get(newKeyStr)!.add(newDoc._id.toHexString()); // Add to B-tree const newBtreeSet = indexData.btree.get(newKeyValue); if (newBtreeSet) { newBtreeSet.add(newDoc._id.toHexString()); } else { indexData.btree.set(newKeyValue, new Set([newDoc._id.toHexString()])); } } } } } /** * Update index entries after document delete */ async onDelete(doc: IStoredDocument): Promise { await this.initialize(); for (const indexData of this.indexes.values()) { const keyValue = this.extractKeyValue(doc, indexData.key); if (keyValue === null && indexData.sparse) { continue; } const keyStr = JSON.stringify(keyValue); // Remove from hash map const hashSet = indexData.hashMap.get(keyStr); if (hashSet) { hashSet.delete(doc._id.toHexString()); if (hashSet.size === 0) { indexData.hashMap.delete(keyStr); } } // Remove from B-tree const btreeSet = indexData.btree.get(keyValue); if (btreeSet) { btreeSet.delete(doc._id.toHexString()); if (btreeSet.size === 0) { indexData.btree.delete(keyValue); } } } } /** * Find the best index for a query */ selectIndex(filter: Document): { name: string; data: IIndexData } | null { if (!filter || Object.keys(filter).length === 0) { return null; } // Get filter fields and operators const filterInfo = this.analyzeFilter(filter); // Score each index let bestIndex: { name: string; data: IIndexData } | null = null; let bestScore = 0; for (const [name, indexData] of this.indexes) { const indexFields = Object.keys(indexData.key); let score = 0; // Count how many index fields can be used for (const field of indexFields) { const info = filterInfo.get(field); if (!info) break; // Equality is best if (info.equality) { score += 2; } else if (info.range) { // Range queries can use B-tree score += 1; } else if (info.in) { score += 1.5; } else { break; } } // Prefer unique indexes if (indexData.unique && score > 0) { score += 0.5; } if (score > bestScore) { bestScore = score; bestIndex = { name, data: indexData }; } } return bestIndex; } /** * Analyze filter to extract field operators */ private analyzeFilter(filter: Document): Map }> { const result = new Map }>(); for (const [key, value] of Object.entries(filter)) { if (key.startsWith('$')) continue; const info = { equality: false, range: false, in: false, ops: {} as Record }; if (typeof value !== 'object' || value === null || value instanceof plugins.bson.ObjectId || value instanceof Date) { info.equality = true; info.ops['$eq'] = value; } else { const ops = value as Record; if (ops.$eq !== undefined) { info.equality = true; info.ops['$eq'] = ops.$eq; } if (ops.$in !== undefined) { info.in = true; info.ops['$in'] = ops.$in; } if (ops.$gt !== undefined || ops.$gte !== undefined || ops.$lt !== undefined || ops.$lte !== undefined) { info.range = true; if (ops.$gt !== undefined) info.ops['$gt'] = ops.$gt; if (ops.$gte !== undefined) info.ops['$gte'] = ops.$gte; if (ops.$lt !== undefined) info.ops['$lt'] = ops.$lt; if (ops.$lte !== undefined) info.ops['$lte'] = ops.$lte; } } result.set(key, info); } return result; } /** * Use index to find candidate document IDs (supports range queries with B-tree) */ async findCandidateIds(filter: Document): Promise | null> { await this.initialize(); const index = this.selectIndex(filter); if (!index) return null; const filterInfo = this.analyzeFilter(filter); const indexFields = Object.keys(index.data.key); // For single-field indexes with range queries, use B-tree if (indexFields.length === 1) { const field = indexFields[0]; const info = filterInfo.get(field); if (info) { // Handle equality using hash map (faster) if (info.equality) { const keyStr = JSON.stringify(info.ops['$eq']); return index.data.hashMap.get(keyStr) || new Set(); } // Handle $in using hash map if (info.in) { const results = new Set(); for (const val of info.ops['$in']) { const keyStr = JSON.stringify(val); const ids = index.data.hashMap.get(keyStr); if (ids) { for (const id of ids) { results.add(id); } } } return results; } // Handle range queries using B-tree if (info.range) { return this.findRangeCandidates(index.data, info.ops); } } } else { // For compound indexes, use hash map with partial key matching const equalityValues: Record = {}; for (const field of indexFields) { const info = filterInfo.get(field); if (!info) break; if (info.equality) { equalityValues[field] = info.ops['$eq']; } else if (info.in) { // Handle $in with multiple lookups const results = new Set(); for (const val of info.ops['$in']) { equalityValues[field] = val; const keyStr = JSON.stringify(this.buildKeyValue(equalityValues, index.data.key)); const ids = index.data.hashMap.get(keyStr); if (ids) { for (const id of ids) { results.add(id); } } } return results; } else { break; // Non-equality/in operator, stop here } } if (Object.keys(equalityValues).length > 0) { const keyStr = JSON.stringify(this.buildKeyValue(equalityValues, index.data.key)); return index.data.hashMap.get(keyStr) || new Set(); } } return null; } /** * Find candidates using B-tree range scan */ private findRangeCandidates(indexData: IIndexData, ops: Record): Set { const results = new Set(); let lowKey: any = undefined; let highKey: any = undefined; let lowInclusive = true; let highInclusive = true; if (ops['$gt'] !== undefined) { lowKey = ops['$gt']; lowInclusive = false; } if (ops['$gte'] !== undefined) { lowKey = ops['$gte']; lowInclusive = true; } if (ops['$lt'] !== undefined) { highKey = ops['$lt']; highInclusive = false; } if (ops['$lte'] !== undefined) { highKey = ops['$lte']; highInclusive = true; } // Use B-tree range iteration indexData.btree.forRange(lowKey, highKey, lowInclusive, highInclusive, (value, key) => { if (value) { for (const id of value) { results.add(id); } } }); return results; } // ============================================================================ // Helper Methods // ============================================================================ private generateIndexName(key: Record): string { return Object.entries(key) .map(([field, dir]) => `${field}_${dir}`) .join('_'); } private extractKeyValue(doc: Document, key: Record): any { const values: any[] = []; for (const field of Object.keys(key)) { const value = QueryEngine.getNestedValue(doc, field); values.push(value === undefined ? null : value); } // For single-field index, return the value directly if (values.length === 1) { return values[0]; } return values; } private buildKeyValue(values: Record, key: Record): any { const result: any[] = []; for (const field of Object.keys(key)) { result.push(values[field] !== undefined ? values[field] : null); } if (result.length === 1) { return result[0]; } return result; } private getFilterFields(filter: Document, prefix = ''): string[] { const fields: string[] = []; for (const [key, value] of Object.entries(filter)) { if (key.startsWith('$')) { // Logical operator if (key === '$and' || key === '$or' || key === '$nor') { for (const subFilter of value as Document[]) { fields.push(...this.getFilterFields(subFilter, prefix)); } } } else { const fullKey = prefix ? `${prefix}.${key}` : key; fields.push(fullKey); // Check for nested filters if (typeof value === 'object' && value !== null && !Array.isArray(value)) { const subKeys = Object.keys(value); if (subKeys.length > 0 && !subKeys[0].startsWith('$')) { fields.push(...this.getFilterFields(value, fullKey)); } } } } return fields; } private getFilterValue(filter: Document, field: string): any { // Handle dot notation const parts = field.split('.'); let current: any = filter; for (const part of parts) { if (current === null || current === undefined) { return undefined; } current = current[part]; } return current; } }