fix(smartacme): prevent SmartAcme startup from blocking router startup

This commit is contained in:
2026-05-31 21:05:34 +00:00
parent 3dc0371f7e
commit 3679cba3a4
2 changed files with 158 additions and 46 deletions
+7
View File
@@ -17,6 +17,13 @@
### Fixes
- prevent SmartAcme startup from blocking router startup (smartacme)
- Start SmartAcme in the background with bounded exponential retry handling
- Re-trigger certificate provisioning after SmartAcme becomes ready
- Cancel stale retry timers and clean up SmartAcme instances during shutdown or config updates
## 2026-05-31 - 13.41.0
### Features
+151 -46
View File
@@ -330,6 +330,11 @@ export class DcRouter {
public serviceManager: plugins.taskbuffer.ServiceManager;
private serviceSubjectSubscription?: plugins.smartrx.rxjs.Subscription;
public smartAcmeReady = false;
private smartAcmeServiceStarted = false;
private smartAcmeStartGeneration = 0;
private smartAcmeStartPromise?: Promise<void>;
private smartAcmeRetryTimer?: ReturnType<typeof setTimeout>;
private smartAcmeRetryAttempt = 0;
// TypedRouter for API endpoints
public typedrouter = new plugins.typedrequest.TypedRouter();
@@ -549,45 +554,14 @@ export class DcRouter {
.optional()
.dependsOn('SmartProxy')
.withStart(async () => {
if (this.smartAcme) {
await this.smartAcme.start();
this.smartAcmeReady = true;
logger.log('info', 'SmartAcme DNS-01 provider is now ready');
// Re-trigger certificate provisioning for all auto-cert routes.
// During startup, certProvisionFunction returned 'http01' (SmartAcme not ready),
// but Rust ACME is disabled when certProvisionFunction is set — so all domains
// failed silently (SmartProxy doesn't emit certificate-failed for this path).
// Calling updateRoutes() re-triggers provisionCertificatesViaCallback internally,
// which calls certProvisionFunction again — now with smartAcmeReady === true.
if (this.routeConfigManager) {
// Go through RouteConfigManager to get the full merged route set
// and serialize via the route-update mutex (prevents stale overwrites)
logger.log('info', 'Re-triggering certificate provisioning via RouteConfigManager');
this.routeConfigManager.applyRoutes().catch((err: any) => {
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
});
} else if (this.smartProxy) {
// No RouteConfigManager (DB disabled) — re-send current routes to trigger cert provisioning
if (this.certProvisionScheduler) {
this.certProvisionScheduler.clear();
}
const currentRoutes = this.smartProxy.routeManager.getRoutes();
logger.log('info', `Re-triggering certificate provisioning for ${currentRoutes.length} routes`);
this.smartProxy.updateRoutes(currentRoutes).catch((err: any) => {
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
});
}
}
this.smartAcmeServiceStarted = true;
this.startSmartAcmeInBackground();
})
.withStop(async () => {
this.smartAcmeReady = false;
if (this.smartAcme) {
await this.smartAcme.stop();
this.smartAcme = undefined;
}
this.smartAcmeServiceStarted = false;
await this.stopSmartAcme();
})
.withRetry({ maxRetries: 20, baseDelayMs: 5000, maxDelayMs: 3_600_000, backoffFactor: 2 }),
.withRetry({ maxRetries: 0 }),
);
}
@@ -778,6 +752,138 @@ export class DcRouter {
});
}
private startSmartAcmeInBackground(): void {
if (!this.smartAcme) {
this.smartAcmeReady = false;
return;
}
const generation = ++this.smartAcmeStartGeneration;
this.smartAcmeReady = false;
this.smartAcmeRetryAttempt = 0;
this.clearSmartAcmeRetryTimer();
this.scheduleSmartAcmeStart(generation, 0);
}
private scheduleSmartAcmeStart(generation: number, delayMs: number): void {
this.clearSmartAcmeRetryTimer();
const retryTimer = setTimeout(() => {
this.smartAcmeRetryTimer = undefined;
this.runSmartAcmeStartAttempt(generation).catch((err) => {
logger.log('error', `Unexpected SmartAcme startup error: ${(err as Error).message}`);
});
}, delayMs);
this.smartAcmeRetryTimer = retryTimer;
const unrefableTimer = retryTimer as any;
if (typeof unrefableTimer?.unref === 'function') {
unrefableTimer.unref();
}
}
private async runSmartAcmeStartAttempt(generation: number): Promise<void> {
const smartAcme = this.smartAcme;
if (!smartAcme || generation !== this.smartAcmeStartGeneration) {
return;
}
const startPromise = smartAcme.start();
this.smartAcmeStartPromise = startPromise;
try {
await startPromise;
if (generation !== this.smartAcmeStartGeneration || this.smartAcme !== smartAcme) {
await smartAcme.stop().catch((err) => {
logger.log('warn', `Failed to stop stale SmartAcme instance: ${(err as Error).message}`);
});
return;
}
this.smartAcmeReady = true;
this.smartAcmeRetryAttempt = 0;
logger.log('info', 'SmartAcme DNS-01 provider is now ready');
this.retriggerCertificateProvisioningAfterSmartAcmeReady();
} catch (err) {
if (generation !== this.smartAcmeStartGeneration || this.smartAcme !== smartAcme) {
return;
}
this.smartAcmeReady = false;
await smartAcme.stop().catch((stopErr) => {
logger.log('warn', `Failed to clean up SmartAcme after startup failure: ${(stopErr as Error).message}`);
});
this.smartAcmeRetryAttempt++;
if (this.smartAcmeRetryAttempt > 20) {
logger.log('error', `SmartAcme DNS-01 provider failed after 20 startup attempts: ${(err as Error).message}`);
return;
}
const baseDelayMs = 5000;
const maxDelayMs = 3_600_000;
const delayMs = Math.min(baseDelayMs * Math.pow(2, this.smartAcmeRetryAttempt - 1), maxDelayMs);
const jitter = 0.8 + Math.random() * 0.4;
const actualDelayMs = Math.floor(delayMs * jitter);
logger.log('warn', `SmartAcme DNS-01 provider startup failed: ${(err as Error).message}; retrying in ${actualDelayMs}ms (attempt ${this.smartAcmeRetryAttempt}/20)`);
this.scheduleSmartAcmeStart(generation, actualDelayMs);
} finally {
if (this.smartAcmeStartPromise === startPromise) {
this.smartAcmeStartPromise = undefined;
}
}
}
private retriggerCertificateProvisioningAfterSmartAcmeReady(): void {
// During startup, certProvisionFunction returns 'http01' while SmartAcme is not ready,
// but Rust ACME is disabled when certProvisionFunction is set. Re-applying routes
// retries provisioning now that DNS-01 is available.
if (this.routeConfigManager) {
logger.log('info', 'Re-triggering certificate provisioning via RouteConfigManager');
this.routeConfigManager.applyRoutes().catch((err: any) => {
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
});
return;
}
if (this.smartProxy) {
if (this.certProvisionScheduler) {
this.certProvisionScheduler.clear();
}
const currentRoutes = this.smartProxy.routeManager.getRoutes();
logger.log('info', `Re-triggering certificate provisioning for ${currentRoutes.length} routes`);
this.smartProxy.updateRoutes(currentRoutes).catch((err: any) => {
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
});
}
}
private clearSmartAcmeRetryTimer(): void {
if (this.smartAcmeRetryTimer) {
clearTimeout(this.smartAcmeRetryTimer);
this.smartAcmeRetryTimer = undefined;
}
}
private async stopSmartAcme(): Promise<void> {
this.smartAcmeStartGeneration++;
this.smartAcmeReady = false;
this.smartAcmeRetryAttempt = 0;
this.clearSmartAcmeRetryTimer();
const smartAcme = this.smartAcme;
if (!smartAcme) {
return;
}
try {
await smartAcme.stop();
} catch (err) {
logger.log('error', 'Error stopping SmartAcme', { error: String(err) });
} finally {
if (this.smartAcme === smartAcme) {
this.smartAcme = undefined;
}
}
}
public async start() {
await this.checkSystemLimits();
logger.log('info', 'Starting DcRouter Services');
@@ -1093,17 +1199,13 @@ export class DcRouter {
// Initialize cert provision scheduler
this.certProvisionScheduler = new CertProvisionScheduler();
// If we have DNS challenge handlers, create SmartAcme instance and wire certProvisionFunction
// Note: SmartAcme.start() is NOT called here — it runs as a separate optional service
// via the ServiceManager, with aggressive retry for rate-limit resilience.
// If we have DNS challenge handlers, create SmartAcme instance and wire certProvisionFunction.
// SmartAcme starts in the background because ACME account setup can be slow or rate-limited,
// and must not block dcrouter's global startup timeout.
if (this.smartAcme) {
await this.stopSmartAcme();
}
if (challengeHandlers.length > 0) {
// Stop old SmartAcme if it exists (e.g., during updateSmartProxyConfig)
if (this.smartAcme) {
this.smartAcmeReady = false;
await this.smartAcme.stop().catch(err =>
logger.log('error', 'Error stopping old SmartAcme', { error: String(err) })
);
}
// Safe non-null: challengeHandlers.length > 0 implies both dnsManager
// and acmeConfig exist (enforced above).
this.smartAcme = new plugins.smartacme.SmartAcme({
@@ -1113,6 +1215,9 @@ export class DcRouter {
challengeHandlers: challengeHandlers,
challengePriority: ['dns-01'],
});
if (this.smartAcmeServiceStarted) {
this.startSmartAcmeInBackground();
}
const scheduler = this.certProvisionScheduler;
smartProxyConfig.certProvisionFallbackToAcme = false;