feat(migration): add lock heartbeats, predictive dry-run planning, and stricter ledger option validation
This commit is contained in:
+246
-82
@@ -22,6 +22,22 @@ const DEFAULT_LEDGER_NAME = 'smartmigration';
|
||||
const DEFAULT_LOCK_WAIT_MS = 60_000;
|
||||
const DEFAULT_LOCK_TTL_MS = 600_000;
|
||||
const LOCK_POLL_INTERVAL_MS = 500;
|
||||
const MIN_LOCK_HEARTBEAT_MS = 1;
|
||||
|
||||
interface IResolvedLedgerState {
|
||||
currentVersionBefore: string | null;
|
||||
effectiveCurrentVersion: string;
|
||||
bootstrapVersionToPersist: string | null;
|
||||
bootstrapMode: 'none' | 'fresh-install-version' | 'chain-start' | 'target-without-steps';
|
||||
wasFreshInstall: boolean;
|
||||
plannedSteps: IMigrationStepDefinition[];
|
||||
predictedCurrentVersionAfter: string;
|
||||
}
|
||||
|
||||
interface ILockHeartbeat {
|
||||
getError(): SmartMigrationError | null;
|
||||
stop(): Promise<void>;
|
||||
}
|
||||
|
||||
/**
|
||||
* SmartMigration — the runner. See readme.md for the full API.
|
||||
@@ -55,6 +71,10 @@ export class SmartMigration {
|
||||
}
|
||||
VersionResolver.assertValid(options.targetVersion, 'options.targetVersion');
|
||||
|
||||
if (options.freshInstallVersion !== undefined) {
|
||||
VersionResolver.assertValid(options.freshInstallVersion, 'options.freshInstallVersion');
|
||||
}
|
||||
|
||||
if (!options.db && !options.bucket) {
|
||||
throw new SmartMigrationError(
|
||||
'NO_RESOURCES',
|
||||
@@ -78,6 +98,28 @@ export class SmartMigration {
|
||||
);
|
||||
}
|
||||
|
||||
if (
|
||||
options.ledgerName !== undefined &&
|
||||
(typeof options.ledgerName !== 'string' || options.ledgerName.trim() === '')
|
||||
) {
|
||||
throw new SmartMigrationError(
|
||||
'INVALID_LEDGER_NAME',
|
||||
'ledgerName must be a non-empty string when provided.',
|
||||
);
|
||||
}
|
||||
|
||||
if (options.lockWaitMs !== undefined) {
|
||||
this.assertIntegerOption('INVALID_LOCK_WAIT_MS', 'lockWaitMs', options.lockWaitMs, {
|
||||
min: 0,
|
||||
});
|
||||
}
|
||||
|
||||
if (options.lockTtlMs !== undefined) {
|
||||
this.assertIntegerOption('INVALID_LOCK_TTL_MS', 'lockTtlMs', options.lockTtlMs, {
|
||||
min: 1,
|
||||
});
|
||||
}
|
||||
|
||||
this.settings = {
|
||||
targetVersion: options.targetVersion,
|
||||
db: options.db,
|
||||
@@ -176,70 +218,44 @@ export class SmartMigration {
|
||||
|
||||
const runStart = Date.now();
|
||||
const applied: IMigrationStepResult[] = [];
|
||||
let wasFreshInstall = false;
|
||||
let currentVersionBefore: string | null = null;
|
||||
let lockHeartbeat: ILockHeartbeat | null = null;
|
||||
|
||||
try {
|
||||
lockHeartbeat = this.startLockHeartbeat(ledger);
|
||||
|
||||
// Re-read after acquiring lock (state may have changed while we waited).
|
||||
let data = await ledger.read();
|
||||
currentVersionBefore = data.currentVersion;
|
||||
const resolvedState = await this.resolveLedgerState(data);
|
||||
|
||||
// Resolve initial version.
|
||||
let currentVersion: string;
|
||||
if (data.currentVersion === null) {
|
||||
const fresh = await this.detectFreshInstall();
|
||||
if (fresh && this.settings.freshInstallVersion) {
|
||||
wasFreshInstall = true;
|
||||
currentVersion = this.settings.freshInstallVersion;
|
||||
VersionResolver.assertValid(currentVersion, 'freshInstallVersion');
|
||||
data.currentVersion = currentVersion;
|
||||
await ledger.write(data);
|
||||
this.log.log('info', `smartmigration: fresh install detected, jumping to ${currentVersion}`);
|
||||
} else {
|
||||
if (this.steps.length === 0) {
|
||||
// No steps and no current version — nothing to do.
|
||||
data.currentVersion = this.settings.targetVersion;
|
||||
await ledger.write(data);
|
||||
return {
|
||||
currentVersionBefore: null,
|
||||
currentVersionAfter: this.settings.targetVersion,
|
||||
targetVersion: this.settings.targetVersion,
|
||||
wasUpToDate: false,
|
||||
wasFreshInstall: true,
|
||||
stepsApplied: [],
|
||||
stepsSkipped: [],
|
||||
totalDurationMs: Date.now() - runStart,
|
||||
};
|
||||
}
|
||||
currentVersion = this.steps[0].fromVersion;
|
||||
data.currentVersion = currentVersion;
|
||||
await ledger.write(data);
|
||||
if (resolvedState.bootstrapVersionToPersist !== null) {
|
||||
data.currentVersion = resolvedState.bootstrapVersionToPersist;
|
||||
await ledger.write(data);
|
||||
if (resolvedState.bootstrapMode === 'fresh-install-version') {
|
||||
this.log.log(
|
||||
'info',
|
||||
`smartmigration: fresh install detected, jumping to ${resolvedState.bootstrapVersionToPersist}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
currentVersion = data.currentVersion;
|
||||
}
|
||||
|
||||
// Already at target after fresh-install resolution?
|
||||
if (VersionResolver.equals(currentVersion, this.settings.targetVersion)) {
|
||||
this.assertLockHealthy(lockHeartbeat);
|
||||
|
||||
if (resolvedState.plannedSteps.length === 0) {
|
||||
return {
|
||||
currentVersionBefore,
|
||||
currentVersionAfter: currentVersion,
|
||||
currentVersionBefore: resolvedState.currentVersionBefore,
|
||||
currentVersionAfter: resolvedState.predictedCurrentVersionAfter,
|
||||
targetVersion: this.settings.targetVersion,
|
||||
wasUpToDate: true,
|
||||
wasFreshInstall,
|
||||
wasUpToDate: resolvedState.bootstrapMode !== 'target-without-steps',
|
||||
wasFreshInstall: resolvedState.wasFreshInstall,
|
||||
stepsApplied: [],
|
||||
stepsSkipped: [],
|
||||
totalDurationMs: Date.now() - runStart,
|
||||
};
|
||||
}
|
||||
|
||||
const plan = VersionResolver.computePlan(
|
||||
this.steps,
|
||||
currentVersion,
|
||||
this.settings.targetVersion,
|
||||
);
|
||||
let currentVersion = resolvedState.effectiveCurrentVersion;
|
||||
|
||||
for (const step of plan) {
|
||||
for (const step of resolvedState.plannedSteps) {
|
||||
const startedAt = new Date();
|
||||
const stepStart = Date.now();
|
||||
let entry: IMigrationLedgerEntry;
|
||||
@@ -272,6 +288,7 @@ export class SmartMigration {
|
||||
log: this.log,
|
||||
});
|
||||
await step.handler(ctx);
|
||||
this.assertLockHealthy(lockHeartbeat, { stepId: step.id });
|
||||
|
||||
const finishedAt = new Date();
|
||||
const durationMs = Date.now() - stepStart;
|
||||
@@ -288,6 +305,7 @@ export class SmartMigration {
|
||||
data = await ledger.read();
|
||||
data.steps[step.id] = entry;
|
||||
data.currentVersion = step.toVersion;
|
||||
delete data.checkpoints[step.id];
|
||||
await ledger.write(data);
|
||||
// Advance the running cursor used by skip-forward detection.
|
||||
currentVersion = step.toVersion;
|
||||
@@ -300,6 +318,15 @@ export class SmartMigration {
|
||||
const finishedAt = new Date();
|
||||
const durationMs = Date.now() - stepStart;
|
||||
const error = err as Error;
|
||||
const lockError = this.getLockHealthError(lockHeartbeat, {
|
||||
stepId: step.id,
|
||||
originalError: error.message,
|
||||
stack: error.stack,
|
||||
});
|
||||
if (lockError) {
|
||||
throw lockError;
|
||||
}
|
||||
|
||||
entry = {
|
||||
id: step.id,
|
||||
fromVersion: step.fromVersion,
|
||||
@@ -330,18 +357,23 @@ export class SmartMigration {
|
||||
}
|
||||
}
|
||||
|
||||
this.assertLockHealthy(lockHeartbeat);
|
||||
|
||||
const finalData = await ledger.read();
|
||||
return {
|
||||
currentVersionBefore,
|
||||
currentVersionBefore: resolvedState.currentVersionBefore,
|
||||
currentVersionAfter: finalData.currentVersion ?? this.settings.targetVersion,
|
||||
targetVersion: this.settings.targetVersion,
|
||||
wasUpToDate: false,
|
||||
wasFreshInstall,
|
||||
wasFreshInstall: resolvedState.wasFreshInstall,
|
||||
stepsApplied: applied,
|
||||
stepsSkipped: [],
|
||||
totalDurationMs: Date.now() - runStart,
|
||||
};
|
||||
} finally {
|
||||
if (lockHeartbeat) {
|
||||
await lockHeartbeat.stop();
|
||||
}
|
||||
await ledger.releaseLock(this.instanceId).catch((err) => {
|
||||
this.log.log(
|
||||
'warn',
|
||||
@@ -355,31 +387,10 @@ export class SmartMigration {
|
||||
* Resolve the plan against the current ledger state without acquiring a
|
||||
* lock or executing anything. Used by `plan()` and `dryRun: true`.
|
||||
*/
|
||||
private computeResultWithoutRun(data: ISmartMigrationLedgerData): IMigrationRunResult {
|
||||
const currentVersion =
|
||||
data.currentVersion ??
|
||||
(this.steps.length > 0 ? this.steps[0].fromVersion : this.settings.targetVersion);
|
||||
private async computeResultWithoutRun(data: ISmartMigrationLedgerData): Promise<IMigrationRunResult> {
|
||||
const resolvedState = await this.resolveLedgerState(data);
|
||||
|
||||
if (VersionResolver.equals(currentVersion, this.settings.targetVersion)) {
|
||||
return {
|
||||
currentVersionBefore: data.currentVersion,
|
||||
currentVersionAfter: currentVersion,
|
||||
targetVersion: this.settings.targetVersion,
|
||||
wasUpToDate: true,
|
||||
wasFreshInstall: false,
|
||||
stepsApplied: [],
|
||||
stepsSkipped: [],
|
||||
totalDurationMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
const plan = VersionResolver.computePlan(
|
||||
this.steps,
|
||||
currentVersion,
|
||||
this.settings.targetVersion,
|
||||
);
|
||||
|
||||
const skipped: IMigrationStepResult[] = plan.map((step) => ({
|
||||
const skipped: IMigrationStepResult[] = resolvedState.plannedSteps.map((step) => ({
|
||||
id: step.id,
|
||||
fromVersion: step.fromVersion,
|
||||
toVersion: step.toVersion,
|
||||
@@ -390,17 +401,73 @@ export class SmartMigration {
|
||||
}));
|
||||
|
||||
return {
|
||||
currentVersionBefore: data.currentVersion,
|
||||
currentVersionAfter: currentVersion,
|
||||
currentVersionBefore: resolvedState.currentVersionBefore,
|
||||
currentVersionAfter: resolvedState.predictedCurrentVersionAfter,
|
||||
targetVersion: this.settings.targetVersion,
|
||||
wasUpToDate: false,
|
||||
wasFreshInstall: false,
|
||||
wasUpToDate:
|
||||
resolvedState.plannedSteps.length === 0 &&
|
||||
resolvedState.bootstrapMode !== 'target-without-steps',
|
||||
wasFreshInstall: resolvedState.wasFreshInstall,
|
||||
stepsApplied: [],
|
||||
stepsSkipped: skipped,
|
||||
totalDurationMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
private async resolveLedgerState(
|
||||
data: ISmartMigrationLedgerData,
|
||||
): Promise<IResolvedLedgerState> {
|
||||
let effectiveCurrentVersion: string;
|
||||
let bootstrapVersionToPersist: string | null = null;
|
||||
let bootstrapMode: IResolvedLedgerState['bootstrapMode'] = 'none';
|
||||
let wasFreshInstall = false;
|
||||
|
||||
if (data.currentVersion === null) {
|
||||
const isFreshInstall = await this.detectFreshInstall();
|
||||
if (isFreshInstall && this.settings.freshInstallVersion) {
|
||||
effectiveCurrentVersion = this.settings.freshInstallVersion;
|
||||
bootstrapVersionToPersist = effectiveCurrentVersion;
|
||||
bootstrapMode = 'fresh-install-version';
|
||||
wasFreshInstall = true;
|
||||
} else if (this.steps.length === 0) {
|
||||
effectiveCurrentVersion = this.settings.targetVersion;
|
||||
bootstrapVersionToPersist = effectiveCurrentVersion;
|
||||
bootstrapMode = 'target-without-steps';
|
||||
wasFreshInstall = isFreshInstall;
|
||||
} else {
|
||||
effectiveCurrentVersion = this.steps[0].fromVersion;
|
||||
bootstrapVersionToPersist = effectiveCurrentVersion;
|
||||
bootstrapMode = 'chain-start';
|
||||
}
|
||||
} else {
|
||||
effectiveCurrentVersion = data.currentVersion;
|
||||
}
|
||||
|
||||
const plannedSteps = VersionResolver.equals(
|
||||
effectiveCurrentVersion,
|
||||
this.settings.targetVersion,
|
||||
)
|
||||
? []
|
||||
: VersionResolver.computePlan(
|
||||
this.steps,
|
||||
effectiveCurrentVersion,
|
||||
this.settings.targetVersion,
|
||||
);
|
||||
|
||||
return {
|
||||
currentVersionBefore: data.currentVersion,
|
||||
effectiveCurrentVersion,
|
||||
bootstrapVersionToPersist,
|
||||
bootstrapMode,
|
||||
wasFreshInstall,
|
||||
plannedSteps,
|
||||
predictedCurrentVersionAfter:
|
||||
plannedSteps.length > 0
|
||||
? plannedSteps[plannedSteps.length - 1].toVersion
|
||||
: effectiveCurrentVersion,
|
||||
};
|
||||
}
|
||||
|
||||
private async ensureLedger(): Promise<Ledger> {
|
||||
if (this.ledger) return this.ledger;
|
||||
const ledgerName = this.settings.ledgerName;
|
||||
@@ -424,6 +491,60 @@ export class SmartMigration {
|
||||
return false;
|
||||
}
|
||||
|
||||
private startLockHeartbeat(ledger: Ledger): ILockHeartbeat {
|
||||
const intervalMs = this.getLockHeartbeatMs();
|
||||
let stopped = false;
|
||||
let resolveStop!: () => void;
|
||||
let lockError: SmartMigrationError | null = null;
|
||||
|
||||
const stopSignal = new Promise<void>((resolve) => {
|
||||
resolveStop = resolve;
|
||||
});
|
||||
|
||||
const loopPromise = (async () => {
|
||||
while (!stopped) {
|
||||
await Promise.race([this.sleep(intervalMs), stopSignal]);
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const renewed = await ledger.renewLock(this.instanceId, this.settings.lockTtlMs);
|
||||
if (!renewed) {
|
||||
lockError = new SmartMigrationError(
|
||||
'LOCK_LOST',
|
||||
'Lost the migration lock while running steps. Another instance may have taken over.',
|
||||
{ holderId: this.instanceId },
|
||||
);
|
||||
stopped = true;
|
||||
}
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
lockError = new SmartMigrationError(
|
||||
'LOCK_LOST',
|
||||
`Failed to renew the migration lock: ${error.message}`,
|
||||
{ holderId: this.instanceId, originalError: error.message },
|
||||
);
|
||||
stopped = true;
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
getError: () => lockError,
|
||||
stop: async () => {
|
||||
if (stopped) {
|
||||
await loopPromise;
|
||||
return;
|
||||
}
|
||||
|
||||
stopped = true;
|
||||
resolveStop();
|
||||
await loopPromise;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic fresh-install detector. Returns true when neither mongo nor S3
|
||||
* contain anything besides smartmigration's own ledger artifacts.
|
||||
@@ -439,11 +560,10 @@ export class SmartMigration {
|
||||
if (userCollections.length > 0) return false;
|
||||
}
|
||||
if (this.settings.bucket) {
|
||||
const cursor = (this.settings.bucket as any).createCursor('', { pageSize: 5 });
|
||||
const batch = (await cursor.next()) as string[] | undefined;
|
||||
if (batch && batch.length > 0) {
|
||||
const userKeys = batch.filter((k) => !k.startsWith('.smartmigration/'));
|
||||
if (userKeys.length > 0) return false;
|
||||
for await (const key of this.settings.bucket.listAllObjects('')) {
|
||||
if (!key.startsWith('.smartmigration/')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -454,6 +574,50 @@ export class SmartMigration {
|
||||
return name === 'SmartdataEasyStore' || name.startsWith('system.');
|
||||
}
|
||||
|
||||
private getLockHeartbeatMs(): number {
|
||||
return Math.max(MIN_LOCK_HEARTBEAT_MS, Math.floor(this.settings.lockTtlMs / 3));
|
||||
}
|
||||
|
||||
private getLockHealthError(
|
||||
lockHeartbeat: ILockHeartbeat | null,
|
||||
details?: Record<string, unknown>,
|
||||
): SmartMigrationError | null {
|
||||
const lockError = lockHeartbeat?.getError();
|
||||
if (!lockError) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new SmartMigrationError(lockError.code, lockError.message, {
|
||||
...lockError.details,
|
||||
...details,
|
||||
});
|
||||
}
|
||||
|
||||
private assertLockHealthy(
|
||||
lockHeartbeat: ILockHeartbeat | null,
|
||||
details?: Record<string, unknown>,
|
||||
): void {
|
||||
const lockError = this.getLockHealthError(lockHeartbeat, details);
|
||||
if (lockError) {
|
||||
throw lockError;
|
||||
}
|
||||
}
|
||||
|
||||
private assertIntegerOption(
|
||||
code: string,
|
||||
optionName: string,
|
||||
value: number,
|
||||
constraints: { min: number },
|
||||
): void {
|
||||
if (!Number.isInteger(value) || value < constraints.min) {
|
||||
throw new SmartMigrationError(
|
||||
code,
|
||||
`${optionName} must be an integer >= ${constraints.min}.`,
|
||||
{ [optionName]: value },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user