From abf84359b41b5aac44e1c6214f9309da76352d62 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Sun, 5 Apr 2026 02:49:28 +0000 Subject: [PATCH] fix(collection): improve index creation resilience and add collection integrity checks --- changelog.md | 9 ++ ts/00_commitinfo_data.ts | 2 +- ts/classes.collection.ts | 189 ++++++++++++++++++++++++++++++++++++--- ts/classes.doc.ts | 11 +++ 4 files changed, 196 insertions(+), 15 deletions(-) diff --git a/changelog.md b/changelog.md index 6b228f8..f489dac 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2026-04-05 - 7.1.4 - fix(collection) +improve index creation resilience and add collection integrity checks + +- Handle MongoDB index creation failures with structured logging instead of failing silently or racing on repeated attempts +- Log duplicate field values when unique index creation fails due to existing duplicate data +- Await unique and regular index creation during insert operations to ensure index setup completes predictably +- Add collection integrity checks for estimated vs actual document counts and duplicate values on tracked unique fields +- Expose collection integrity checks through the document class API + ## 2026-03-26 - 7.1.3 - fix(deps) bump development dependencies for tooling and Node types diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 6f6d3e5..59a89ad 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartdata', - version: '7.1.3', + version: '7.1.4', description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.' } diff --git a/ts/classes.collection.ts b/ts/classes.collection.ts index 97a1fa4..83597b0 100644 --- a/ts/classes.collection.ts +++ b/ts/classes.collection.ts @@ -216,8 +216,15 @@ export class SmartdataCollection { const indexSpec: Record = {}; searchableFields.forEach(f => { indexSpec[f] = 'text'; }); // Cast to any to satisfy TypeScript IndexSpecification typing - await this.mongoDbCollection.createIndex(indexSpec as any, { name: 'smartdata_text_index' }); - this.textIndexCreated = true; + try { + await this.mongoDbCollection.createIndex(indexSpec as any, { name: 'smartdata_text_index' }); + this.textIndexCreated = true; + } catch (err: any) { + logger.log( + 'warn', + `Failed to create text index on fields [${searchableFields.join(', ')}] in collection "${this.collectionName}": ${err?.message || String(err)}` + ); + } } } } @@ -228,11 +235,25 @@ export class SmartdataCollection { public async markUniqueIndexes(keyArrayArg: string[] = []) { for (const key of keyArrayArg) { if (!this.uniqueIndexes.includes(key)) { - await this.mongoDbCollection.createIndex({ [key]: 1 }, { - unique: true, - }); - // make sure we only call this once and not for every doc we create + // Claim the slot immediately to prevent concurrent inserts from retrying this.uniqueIndexes.push(key); + try { + await this.mongoDbCollection.createIndex({ [key]: 1 }, { + unique: true, + }); + } catch (err: any) { + const errorCode = err?.code || err?.codeName || 'unknown'; + const errorMessage = err?.message || String(err); + logger.log( + 'error', + `Failed to create unique index on field "${key}" in collection "${this.collectionName}". ` + + `MongoDB error [${errorCode}]: ${errorMessage}. ` + + `Uniqueness constraint on "${key}" is NOT enforced.` + ); + if (errorCode === 11000 || errorCode === 'DuplicateKey' || String(errorMessage).includes('E11000')) { + await this.logDuplicatesForField(key); + } + } } } } @@ -245,16 +266,66 @@ export class SmartdataCollection { // Check if we've already created this index const indexKey = indexDef.field; if (!this.regularIndexes.some(i => i.field === indexKey)) { - await this.mongoDbCollection.createIndex( - { [indexDef.field]: 1 }, // Simple single-field index - indexDef.options - ); - // Track that we've created this index + // Claim the slot immediately to prevent concurrent retries this.regularIndexes.push(indexDef); + try { + await this.mongoDbCollection.createIndex( + { [indexDef.field]: 1 }, // Simple single-field index + indexDef.options + ); + } catch (err: any) { + const errorCode = err?.code || err?.codeName || 'unknown'; + const errorMessage = err?.message || String(err); + logger.log( + 'warn', + `Failed to create index on field "${indexKey}" in collection "${this.collectionName}". ` + + `MongoDB error [${errorCode}]: ${errorMessage}.` + ); + if ( + indexDef.options?.unique && + (errorCode === 11000 || errorCode === 'DuplicateKey' || String(errorMessage).includes('E11000')) + ) { + await this.logDuplicatesForField(indexKey); + } + } } } } + /** + * Logs duplicate values for a field to help diagnose unique index creation failures. + */ + private async logDuplicatesForField(field: string): Promise { + try { + const pipeline = [ + { $group: { _id: `$${field}`, count: { $sum: 1 }, ids: { $push: '$_id' } } }, + { $match: { count: { $gt: 1 } } }, + { $limit: 5 }, + ]; + const duplicates = await this.mongoDbCollection.aggregate(pipeline).toArray(); + if (duplicates.length > 0) { + for (const dup of duplicates) { + logger.log( + 'warn', + `Duplicate values for "${field}" in "${this.collectionName}": ` + + `value=${JSON.stringify(dup._id)} appears ${dup.count} times ` + + `(document _ids: ${JSON.stringify(dup.ids.slice(0, 5))})` + ); + } + logger.log( + 'warn', + `Unique index on "${field}" in "${this.collectionName}" was NOT created. ` + + `Resolve duplicates and restart to enforce uniqueness.` + ); + } + } catch (aggErr: any) { + logger.log( + 'warn', + `Could not identify duplicate documents for field "${field}" in "${this.collectionName}": ${aggErr?.message || String(aggErr)}` + ); + } + } + /** * adds a validation function that all newly inserted and updated objects have to pass */ @@ -295,6 +366,28 @@ export class SmartdataCollection { const cursor = this.mongoDbCollection.find(filterObject, { session: opts?.session }); const result = await cursor.toArray(); cursor.close(); + + // In-memory check for duplicate _id values (should never happen) + if (result.length > 0) { + const idSet = new Set(); + const duplicateIds: string[] = []; + for (const doc of result) { + const idStr = String(doc._id); + if (idSet.has(idStr)) { + duplicateIds.push(idStr); + } else { + idSet.add(idStr); + } + } + if (duplicateIds.length > 0) { + logger.log( + 'error', + `Integrity issue in "${this.collectionName}": found ${duplicateIds.length} duplicate _id values ` + + `in findAll results: [${duplicateIds.slice(0, 5).join(', ')}]. This should never happen.` + ); + } + } + return result; } @@ -346,11 +439,11 @@ export class SmartdataCollection { ): Promise { await this.init(); await this.checkDoc(dbDocArg); - this.markUniqueIndexes(dbDocArg.uniqueIndexes); - + await this.markUniqueIndexes(dbDocArg.uniqueIndexes); + // Create regular indexes if available if (dbDocArg.regularIndexes && dbDocArg.regularIndexes.length > 0) { - this.createRegularIndexes(dbDocArg.regularIndexes); + await this.createRegularIndexes(dbDocArg.regularIndexes); } const saveableObject = await dbDocArg.createSavableObject() as any; @@ -402,6 +495,74 @@ export class SmartdataCollection { return this.mongoDbCollection.countDocuments(filterObject, { session: opts?.session }); } + /** + * Runs an integrity check on the collection. + * Compares estimated vs actual document count and checks for duplicates on unique index fields. + */ + public async checkCollectionIntegrity(): Promise<{ + ok: boolean; + estimatedCount: number; + actualCount: number; + duplicateFields: Array<{ field: string; duplicateValues: number }>; + }> { + await this.init(); + const result = { + ok: true, + estimatedCount: 0, + actualCount: 0, + duplicateFields: [] as Array<{ field: string; duplicateValues: number }>, + }; + + try { + result.estimatedCount = await this.mongoDbCollection.estimatedDocumentCount(); + result.actualCount = await this.mongoDbCollection.countDocuments({}); + + if (result.estimatedCount !== result.actualCount) { + result.ok = false; + logger.log( + 'warn', + `Integrity check on "${this.collectionName}": estimatedDocumentCount=${result.estimatedCount} ` + + `but countDocuments=${result.actualCount}. Possible data inconsistency.` + ); + } + + // Check for duplicates on each tracked unique index field + for (const field of this.uniqueIndexes) { + try { + const pipeline = [ + { $group: { _id: `$${field}`, count: { $sum: 1 } } }, + { $match: { count: { $gt: 1 } } }, + { $count: 'total' }, + ]; + const countResult = await this.mongoDbCollection.aggregate(pipeline).toArray(); + const dupCount = countResult[0]?.total || 0; + if (dupCount > 0) { + result.ok = false; + result.duplicateFields.push({ field, duplicateValues: dupCount }); + logger.log( + 'warn', + `Integrity check on "${this.collectionName}": field "${field}" has ${dupCount} values with duplicates ` + + `despite being marked as unique.` + ); + } + } catch (fieldErr: any) { + logger.log( + 'warn', + `Integrity check: could not verify uniqueness of "${field}" in "${this.collectionName}": ${fieldErr?.message || String(fieldErr)}` + ); + } + } + } catch (err: any) { + result.ok = false; + logger.log( + 'error', + `Integrity check failed for "${this.collectionName}": ${err?.message || String(err)}` + ); + } + + return result; + } + /** * checks a Doc for constraints * if this.objectValidation is not set it passes. diff --git a/ts/classes.doc.ts b/ts/classes.doc.ts index d63972a..afdc80b 100644 --- a/ts/classes.doc.ts +++ b/ts/classes.doc.ts @@ -597,6 +597,17 @@ export class SmartDataDbDoc( + this: plugins.tsclass.typeFest.Class, + ) { + const collection: SmartdataCollection = (this as any).collection; + return await collection.checkCollectionIntegrity(); + } + /** * Create a MongoDB filter from a Lucene query string * @param luceneQuery Lucene query string