feat(migration): add lock heartbeats, predictive dry-run planning, and stricter ledger option validation

This commit is contained in:
2026-04-14 12:31:34 +00:00
parent 19ebdee31a
commit 1b4358aca5
17 changed files with 695 additions and 180 deletions
+246 -82
View File
@@ -22,6 +22,22 @@ const DEFAULT_LEDGER_NAME = 'smartmigration';
const DEFAULT_LOCK_WAIT_MS = 60_000;
const DEFAULT_LOCK_TTL_MS = 600_000;
const LOCK_POLL_INTERVAL_MS = 500;
const MIN_LOCK_HEARTBEAT_MS = 1;
interface IResolvedLedgerState {
currentVersionBefore: string | null;
effectiveCurrentVersion: string;
bootstrapVersionToPersist: string | null;
bootstrapMode: 'none' | 'fresh-install-version' | 'chain-start' | 'target-without-steps';
wasFreshInstall: boolean;
plannedSteps: IMigrationStepDefinition[];
predictedCurrentVersionAfter: string;
}
interface ILockHeartbeat {
getError(): SmartMigrationError | null;
stop(): Promise<void>;
}
/**
* SmartMigration — the runner. See readme.md for the full API.
@@ -55,6 +71,10 @@ export class SmartMigration {
}
VersionResolver.assertValid(options.targetVersion, 'options.targetVersion');
if (options.freshInstallVersion !== undefined) {
VersionResolver.assertValid(options.freshInstallVersion, 'options.freshInstallVersion');
}
if (!options.db && !options.bucket) {
throw new SmartMigrationError(
'NO_RESOURCES',
@@ -78,6 +98,28 @@ export class SmartMigration {
);
}
if (
options.ledgerName !== undefined &&
(typeof options.ledgerName !== 'string' || options.ledgerName.trim() === '')
) {
throw new SmartMigrationError(
'INVALID_LEDGER_NAME',
'ledgerName must be a non-empty string when provided.',
);
}
if (options.lockWaitMs !== undefined) {
this.assertIntegerOption('INVALID_LOCK_WAIT_MS', 'lockWaitMs', options.lockWaitMs, {
min: 0,
});
}
if (options.lockTtlMs !== undefined) {
this.assertIntegerOption('INVALID_LOCK_TTL_MS', 'lockTtlMs', options.lockTtlMs, {
min: 1,
});
}
this.settings = {
targetVersion: options.targetVersion,
db: options.db,
@@ -176,70 +218,44 @@ export class SmartMigration {
const runStart = Date.now();
const applied: IMigrationStepResult[] = [];
let wasFreshInstall = false;
let currentVersionBefore: string | null = null;
let lockHeartbeat: ILockHeartbeat | null = null;
try {
lockHeartbeat = this.startLockHeartbeat(ledger);
// Re-read after acquiring lock (state may have changed while we waited).
let data = await ledger.read();
currentVersionBefore = data.currentVersion;
const resolvedState = await this.resolveLedgerState(data);
// Resolve initial version.
let currentVersion: string;
if (data.currentVersion === null) {
const fresh = await this.detectFreshInstall();
if (fresh && this.settings.freshInstallVersion) {
wasFreshInstall = true;
currentVersion = this.settings.freshInstallVersion;
VersionResolver.assertValid(currentVersion, 'freshInstallVersion');
data.currentVersion = currentVersion;
await ledger.write(data);
this.log.log('info', `smartmigration: fresh install detected, jumping to ${currentVersion}`);
} else {
if (this.steps.length === 0) {
// No steps and no current version — nothing to do.
data.currentVersion = this.settings.targetVersion;
await ledger.write(data);
return {
currentVersionBefore: null,
currentVersionAfter: this.settings.targetVersion,
targetVersion: this.settings.targetVersion,
wasUpToDate: false,
wasFreshInstall: true,
stepsApplied: [],
stepsSkipped: [],
totalDurationMs: Date.now() - runStart,
};
}
currentVersion = this.steps[0].fromVersion;
data.currentVersion = currentVersion;
await ledger.write(data);
if (resolvedState.bootstrapVersionToPersist !== null) {
data.currentVersion = resolvedState.bootstrapVersionToPersist;
await ledger.write(data);
if (resolvedState.bootstrapMode === 'fresh-install-version') {
this.log.log(
'info',
`smartmigration: fresh install detected, jumping to ${resolvedState.bootstrapVersionToPersist}`,
);
}
} else {
currentVersion = data.currentVersion;
}
// Already at target after fresh-install resolution?
if (VersionResolver.equals(currentVersion, this.settings.targetVersion)) {
this.assertLockHealthy(lockHeartbeat);
if (resolvedState.plannedSteps.length === 0) {
return {
currentVersionBefore,
currentVersionAfter: currentVersion,
currentVersionBefore: resolvedState.currentVersionBefore,
currentVersionAfter: resolvedState.predictedCurrentVersionAfter,
targetVersion: this.settings.targetVersion,
wasUpToDate: true,
wasFreshInstall,
wasUpToDate: resolvedState.bootstrapMode !== 'target-without-steps',
wasFreshInstall: resolvedState.wasFreshInstall,
stepsApplied: [],
stepsSkipped: [],
totalDurationMs: Date.now() - runStart,
};
}
const plan = VersionResolver.computePlan(
this.steps,
currentVersion,
this.settings.targetVersion,
);
let currentVersion = resolvedState.effectiveCurrentVersion;
for (const step of plan) {
for (const step of resolvedState.plannedSteps) {
const startedAt = new Date();
const stepStart = Date.now();
let entry: IMigrationLedgerEntry;
@@ -272,6 +288,7 @@ export class SmartMigration {
log: this.log,
});
await step.handler(ctx);
this.assertLockHealthy(lockHeartbeat, { stepId: step.id });
const finishedAt = new Date();
const durationMs = Date.now() - stepStart;
@@ -288,6 +305,7 @@ export class SmartMigration {
data = await ledger.read();
data.steps[step.id] = entry;
data.currentVersion = step.toVersion;
delete data.checkpoints[step.id];
await ledger.write(data);
// Advance the running cursor used by skip-forward detection.
currentVersion = step.toVersion;
@@ -300,6 +318,15 @@ export class SmartMigration {
const finishedAt = new Date();
const durationMs = Date.now() - stepStart;
const error = err as Error;
const lockError = this.getLockHealthError(lockHeartbeat, {
stepId: step.id,
originalError: error.message,
stack: error.stack,
});
if (lockError) {
throw lockError;
}
entry = {
id: step.id,
fromVersion: step.fromVersion,
@@ -330,18 +357,23 @@ export class SmartMigration {
}
}
this.assertLockHealthy(lockHeartbeat);
const finalData = await ledger.read();
return {
currentVersionBefore,
currentVersionBefore: resolvedState.currentVersionBefore,
currentVersionAfter: finalData.currentVersion ?? this.settings.targetVersion,
targetVersion: this.settings.targetVersion,
wasUpToDate: false,
wasFreshInstall,
wasFreshInstall: resolvedState.wasFreshInstall,
stepsApplied: applied,
stepsSkipped: [],
totalDurationMs: Date.now() - runStart,
};
} finally {
if (lockHeartbeat) {
await lockHeartbeat.stop();
}
await ledger.releaseLock(this.instanceId).catch((err) => {
this.log.log(
'warn',
@@ -355,31 +387,10 @@ export class SmartMigration {
* Resolve the plan against the current ledger state without acquiring a
* lock or executing anything. Used by `plan()` and `dryRun: true`.
*/
private computeResultWithoutRun(data: ISmartMigrationLedgerData): IMigrationRunResult {
const currentVersion =
data.currentVersion ??
(this.steps.length > 0 ? this.steps[0].fromVersion : this.settings.targetVersion);
private async computeResultWithoutRun(data: ISmartMigrationLedgerData): Promise<IMigrationRunResult> {
const resolvedState = await this.resolveLedgerState(data);
if (VersionResolver.equals(currentVersion, this.settings.targetVersion)) {
return {
currentVersionBefore: data.currentVersion,
currentVersionAfter: currentVersion,
targetVersion: this.settings.targetVersion,
wasUpToDate: true,
wasFreshInstall: false,
stepsApplied: [],
stepsSkipped: [],
totalDurationMs: 0,
};
}
const plan = VersionResolver.computePlan(
this.steps,
currentVersion,
this.settings.targetVersion,
);
const skipped: IMigrationStepResult[] = plan.map((step) => ({
const skipped: IMigrationStepResult[] = resolvedState.plannedSteps.map((step) => ({
id: step.id,
fromVersion: step.fromVersion,
toVersion: step.toVersion,
@@ -390,17 +401,73 @@ export class SmartMigration {
}));
return {
currentVersionBefore: data.currentVersion,
currentVersionAfter: currentVersion,
currentVersionBefore: resolvedState.currentVersionBefore,
currentVersionAfter: resolvedState.predictedCurrentVersionAfter,
targetVersion: this.settings.targetVersion,
wasUpToDate: false,
wasFreshInstall: false,
wasUpToDate:
resolvedState.plannedSteps.length === 0 &&
resolvedState.bootstrapMode !== 'target-without-steps',
wasFreshInstall: resolvedState.wasFreshInstall,
stepsApplied: [],
stepsSkipped: skipped,
totalDurationMs: 0,
};
}
private async resolveLedgerState(
data: ISmartMigrationLedgerData,
): Promise<IResolvedLedgerState> {
let effectiveCurrentVersion: string;
let bootstrapVersionToPersist: string | null = null;
let bootstrapMode: IResolvedLedgerState['bootstrapMode'] = 'none';
let wasFreshInstall = false;
if (data.currentVersion === null) {
const isFreshInstall = await this.detectFreshInstall();
if (isFreshInstall && this.settings.freshInstallVersion) {
effectiveCurrentVersion = this.settings.freshInstallVersion;
bootstrapVersionToPersist = effectiveCurrentVersion;
bootstrapMode = 'fresh-install-version';
wasFreshInstall = true;
} else if (this.steps.length === 0) {
effectiveCurrentVersion = this.settings.targetVersion;
bootstrapVersionToPersist = effectiveCurrentVersion;
bootstrapMode = 'target-without-steps';
wasFreshInstall = isFreshInstall;
} else {
effectiveCurrentVersion = this.steps[0].fromVersion;
bootstrapVersionToPersist = effectiveCurrentVersion;
bootstrapMode = 'chain-start';
}
} else {
effectiveCurrentVersion = data.currentVersion;
}
const plannedSteps = VersionResolver.equals(
effectiveCurrentVersion,
this.settings.targetVersion,
)
? []
: VersionResolver.computePlan(
this.steps,
effectiveCurrentVersion,
this.settings.targetVersion,
);
return {
currentVersionBefore: data.currentVersion,
effectiveCurrentVersion,
bootstrapVersionToPersist,
bootstrapMode,
wasFreshInstall,
plannedSteps,
predictedCurrentVersionAfter:
plannedSteps.length > 0
? plannedSteps[plannedSteps.length - 1].toVersion
: effectiveCurrentVersion,
};
}
private async ensureLedger(): Promise<Ledger> {
if (this.ledger) return this.ledger;
const ledgerName = this.settings.ledgerName;
@@ -424,6 +491,60 @@ export class SmartMigration {
return false;
}
private startLockHeartbeat(ledger: Ledger): ILockHeartbeat {
const intervalMs = this.getLockHeartbeatMs();
let stopped = false;
let resolveStop!: () => void;
let lockError: SmartMigrationError | null = null;
const stopSignal = new Promise<void>((resolve) => {
resolveStop = resolve;
});
const loopPromise = (async () => {
while (!stopped) {
await Promise.race([this.sleep(intervalMs), stopSignal]);
if (stopped) {
return;
}
try {
const renewed = await ledger.renewLock(this.instanceId, this.settings.lockTtlMs);
if (!renewed) {
lockError = new SmartMigrationError(
'LOCK_LOST',
'Lost the migration lock while running steps. Another instance may have taken over.',
{ holderId: this.instanceId },
);
stopped = true;
}
} catch (err) {
const error = err as Error;
lockError = new SmartMigrationError(
'LOCK_LOST',
`Failed to renew the migration lock: ${error.message}`,
{ holderId: this.instanceId, originalError: error.message },
);
stopped = true;
}
}
})();
return {
getError: () => lockError,
stop: async () => {
if (stopped) {
await loopPromise;
return;
}
stopped = true;
resolveStop();
await loopPromise;
},
};
}
/**
* Heuristic fresh-install detector. Returns true when neither mongo nor S3
* contain anything besides smartmigration's own ledger artifacts.
@@ -439,11 +560,10 @@ export class SmartMigration {
if (userCollections.length > 0) return false;
}
if (this.settings.bucket) {
const cursor = (this.settings.bucket as any).createCursor('', { pageSize: 5 });
const batch = (await cursor.next()) as string[] | undefined;
if (batch && batch.length > 0) {
const userKeys = batch.filter((k) => !k.startsWith('.smartmigration/'));
if (userKeys.length > 0) return false;
for await (const key of this.settings.bucket.listAllObjects('')) {
if (!key.startsWith('.smartmigration/')) {
return false;
}
}
}
return true;
@@ -454,6 +574,50 @@ export class SmartMigration {
return name === 'SmartdataEasyStore' || name.startsWith('system.');
}
private getLockHeartbeatMs(): number {
return Math.max(MIN_LOCK_HEARTBEAT_MS, Math.floor(this.settings.lockTtlMs / 3));
}
private getLockHealthError(
lockHeartbeat: ILockHeartbeat | null,
details?: Record<string, unknown>,
): SmartMigrationError | null {
const lockError = lockHeartbeat?.getError();
if (!lockError) {
return null;
}
return new SmartMigrationError(lockError.code, lockError.message, {
...lockError.details,
...details,
});
}
private assertLockHealthy(
lockHeartbeat: ILockHeartbeat | null,
details?: Record<string, unknown>,
): void {
const lockError = this.getLockHealthError(lockHeartbeat, details);
if (lockError) {
throw lockError;
}
}
private assertIntegerOption(
code: string,
optionName: string,
value: number,
constraints: { min: number },
): void {
if (!Number.isInteger(value) || value < constraints.min) {
throw new SmartMigrationError(
code,
`${optionName} must be an integer >= ${constraints.min}.`,
{ [optionName]: value },
);
}
}
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}