fix(collection): improve index creation resilience and add collection integrity checks

This commit is contained in:
2026-04-05 02:49:28 +00:00
parent 54fa433d1a
commit abf84359b4
4 changed files with 196 additions and 15 deletions

View File

@@ -1,5 +1,14 @@
# Changelog # Changelog
## 2026-04-05 - 7.1.4 - fix(collection)
improve index creation resilience and add collection integrity checks
- Handle MongoDB index creation failures with structured logging instead of failing silently or racing on repeated attempts
- Log duplicate field values when unique index creation fails due to existing duplicate data
- Await unique and regular index creation during insert operations to ensure index setup completes predictably
- Add collection integrity checks for estimated vs actual document counts and duplicate values on tracked unique fields
- Expose collection integrity checks through the document class API
## 2026-03-26 - 7.1.3 - fix(deps) ## 2026-03-26 - 7.1.3 - fix(deps)
bump development dependencies for tooling and Node types bump development dependencies for tooling and Node types

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smartdata', name: '@push.rocks/smartdata',
version: '7.1.3', version: '7.1.4',
description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.' description: 'An advanced library for NoSQL data organization and manipulation using TypeScript with support for MongoDB, data validation, collections, and custom data types.'
} }

View File

@@ -216,8 +216,15 @@ export class SmartdataCollection<T> {
const indexSpec: Record<string, 'text'> = {}; const indexSpec: Record<string, 'text'> = {};
searchableFields.forEach(f => { indexSpec[f] = 'text'; }); searchableFields.forEach(f => { indexSpec[f] = 'text'; });
// Cast to any to satisfy TypeScript IndexSpecification typing // Cast to any to satisfy TypeScript IndexSpecification typing
await this.mongoDbCollection.createIndex(indexSpec as any, { name: 'smartdata_text_index' }); try {
this.textIndexCreated = true; await this.mongoDbCollection.createIndex(indexSpec as any, { name: 'smartdata_text_index' });
this.textIndexCreated = true;
} catch (err: any) {
logger.log(
'warn',
`Failed to create text index on fields [${searchableFields.join(', ')}] in collection "${this.collectionName}": ${err?.message || String(err)}`
);
}
} }
} }
} }
@@ -228,11 +235,25 @@ export class SmartdataCollection<T> {
public async markUniqueIndexes(keyArrayArg: string[] = []) { public async markUniqueIndexes(keyArrayArg: string[] = []) {
for (const key of keyArrayArg) { for (const key of keyArrayArg) {
if (!this.uniqueIndexes.includes(key)) { if (!this.uniqueIndexes.includes(key)) {
await this.mongoDbCollection.createIndex({ [key]: 1 }, { // Claim the slot immediately to prevent concurrent inserts from retrying
unique: true,
});
// make sure we only call this once and not for every doc we create
this.uniqueIndexes.push(key); this.uniqueIndexes.push(key);
try {
await this.mongoDbCollection.createIndex({ [key]: 1 }, {
unique: true,
});
} catch (err: any) {
const errorCode = err?.code || err?.codeName || 'unknown';
const errorMessage = err?.message || String(err);
logger.log(
'error',
`Failed to create unique index on field "${key}" in collection "${this.collectionName}". ` +
`MongoDB error [${errorCode}]: ${errorMessage}. ` +
`Uniqueness constraint on "${key}" is NOT enforced.`
);
if (errorCode === 11000 || errorCode === 'DuplicateKey' || String(errorMessage).includes('E11000')) {
await this.logDuplicatesForField(key);
}
}
} }
} }
} }
@@ -245,16 +266,66 @@ export class SmartdataCollection<T> {
// Check if we've already created this index // Check if we've already created this index
const indexKey = indexDef.field; const indexKey = indexDef.field;
if (!this.regularIndexes.some(i => i.field === indexKey)) { if (!this.regularIndexes.some(i => i.field === indexKey)) {
await this.mongoDbCollection.createIndex( // Claim the slot immediately to prevent concurrent retries
{ [indexDef.field]: 1 }, // Simple single-field index
indexDef.options
);
// Track that we've created this index
this.regularIndexes.push(indexDef); this.regularIndexes.push(indexDef);
try {
await this.mongoDbCollection.createIndex(
{ [indexDef.field]: 1 }, // Simple single-field index
indexDef.options
);
} catch (err: any) {
const errorCode = err?.code || err?.codeName || 'unknown';
const errorMessage = err?.message || String(err);
logger.log(
'warn',
`Failed to create index on field "${indexKey}" in collection "${this.collectionName}". ` +
`MongoDB error [${errorCode}]: ${errorMessage}.`
);
if (
indexDef.options?.unique &&
(errorCode === 11000 || errorCode === 'DuplicateKey' || String(errorMessage).includes('E11000'))
) {
await this.logDuplicatesForField(indexKey);
}
}
} }
} }
} }
/**
* Logs duplicate values for a field to help diagnose unique index creation failures.
*/
private async logDuplicatesForField(field: string): Promise<void> {
try {
const pipeline = [
{ $group: { _id: `$${field}`, count: { $sum: 1 }, ids: { $push: '$_id' } } },
{ $match: { count: { $gt: 1 } } },
{ $limit: 5 },
];
const duplicates = await this.mongoDbCollection.aggregate(pipeline).toArray();
if (duplicates.length > 0) {
for (const dup of duplicates) {
logger.log(
'warn',
`Duplicate values for "${field}" in "${this.collectionName}": ` +
`value=${JSON.stringify(dup._id)} appears ${dup.count} times ` +
`(document _ids: ${JSON.stringify(dup.ids.slice(0, 5))})`
);
}
logger.log(
'warn',
`Unique index on "${field}" in "${this.collectionName}" was NOT created. ` +
`Resolve duplicates and restart to enforce uniqueness.`
);
}
} catch (aggErr: any) {
logger.log(
'warn',
`Could not identify duplicate documents for field "${field}" in "${this.collectionName}": ${aggErr?.message || String(aggErr)}`
);
}
}
/** /**
* adds a validation function that all newly inserted and updated objects have to pass * adds a validation function that all newly inserted and updated objects have to pass
*/ */
@@ -295,6 +366,28 @@ export class SmartdataCollection<T> {
const cursor = this.mongoDbCollection.find(filterObject, { session: opts?.session }); const cursor = this.mongoDbCollection.find(filterObject, { session: opts?.session });
const result = await cursor.toArray(); const result = await cursor.toArray();
cursor.close(); cursor.close();
// In-memory check for duplicate _id values (should never happen)
if (result.length > 0) {
const idSet = new Set<string>();
const duplicateIds: string[] = [];
for (const doc of result) {
const idStr = String(doc._id);
if (idSet.has(idStr)) {
duplicateIds.push(idStr);
} else {
idSet.add(idStr);
}
}
if (duplicateIds.length > 0) {
logger.log(
'error',
`Integrity issue in "${this.collectionName}": found ${duplicateIds.length} duplicate _id values ` +
`in findAll results: [${duplicateIds.slice(0, 5).join(', ')}]. This should never happen.`
);
}
}
return result; return result;
} }
@@ -346,11 +439,11 @@ export class SmartdataCollection<T> {
): Promise<any> { ): Promise<any> {
await this.init(); await this.init();
await this.checkDoc(dbDocArg); await this.checkDoc(dbDocArg);
this.markUniqueIndexes(dbDocArg.uniqueIndexes); await this.markUniqueIndexes(dbDocArg.uniqueIndexes);
// Create regular indexes if available // Create regular indexes if available
if (dbDocArg.regularIndexes && dbDocArg.regularIndexes.length > 0) { if (dbDocArg.regularIndexes && dbDocArg.regularIndexes.length > 0) {
this.createRegularIndexes(dbDocArg.regularIndexes); await this.createRegularIndexes(dbDocArg.regularIndexes);
} }
const saveableObject = await dbDocArg.createSavableObject() as any; const saveableObject = await dbDocArg.createSavableObject() as any;
@@ -402,6 +495,74 @@ export class SmartdataCollection<T> {
return this.mongoDbCollection.countDocuments(filterObject, { session: opts?.session }); return this.mongoDbCollection.countDocuments(filterObject, { session: opts?.session });
} }
/**
* Runs an integrity check on the collection.
* Compares estimated vs actual document count and checks for duplicates on unique index fields.
*/
public async checkCollectionIntegrity(): Promise<{
ok: boolean;
estimatedCount: number;
actualCount: number;
duplicateFields: Array<{ field: string; duplicateValues: number }>;
}> {
await this.init();
const result = {
ok: true,
estimatedCount: 0,
actualCount: 0,
duplicateFields: [] as Array<{ field: string; duplicateValues: number }>,
};
try {
result.estimatedCount = await this.mongoDbCollection.estimatedDocumentCount();
result.actualCount = await this.mongoDbCollection.countDocuments({});
if (result.estimatedCount !== result.actualCount) {
result.ok = false;
logger.log(
'warn',
`Integrity check on "${this.collectionName}": estimatedDocumentCount=${result.estimatedCount} ` +
`but countDocuments=${result.actualCount}. Possible data inconsistency.`
);
}
// Check for duplicates on each tracked unique index field
for (const field of this.uniqueIndexes) {
try {
const pipeline = [
{ $group: { _id: `$${field}`, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $count: 'total' },
];
const countResult = await this.mongoDbCollection.aggregate(pipeline).toArray();
const dupCount = countResult[0]?.total || 0;
if (dupCount > 0) {
result.ok = false;
result.duplicateFields.push({ field, duplicateValues: dupCount });
logger.log(
'warn',
`Integrity check on "${this.collectionName}": field "${field}" has ${dupCount} values with duplicates ` +
`despite being marked as unique.`
);
}
} catch (fieldErr: any) {
logger.log(
'warn',
`Integrity check: could not verify uniqueness of "${field}" in "${this.collectionName}": ${fieldErr?.message || String(fieldErr)}`
);
}
}
} catch (err: any) {
result.ok = false;
logger.log(
'error',
`Integrity check failed for "${this.collectionName}": ${err?.message || String(err)}`
);
}
return result;
}
/** /**
* checks a Doc for constraints * checks a Doc for constraints
* if this.objectValidation is not set it passes. * if this.objectValidation is not set it passes.

View File

@@ -597,6 +597,17 @@ export class SmartDataDbDoc<T extends TImplements, TImplements, TManager extends
return await collection.getCount(filterArg); return await collection.getCount(filterArg);
} }
/**
* Runs an integrity check on this collection.
* Returns a summary with estimated vs actual counts and any duplicate unique fields.
*/
public static async checkCollectionIntegrity<T>(
this: plugins.tsclass.typeFest.Class<T>,
) {
const collection: SmartdataCollection<T> = (this as any).collection;
return await collection.checkCollectionIntegrity();
}
/** /**
* Create a MongoDB filter from a Lucene query string * Create a MongoDB filter from a Lucene query string
* @param luceneQuery Lucene query string * @param luceneQuery Lucene query string