fix(smartacme): prevent SmartAcme startup from blocking router startup
This commit is contained in:
@@ -17,6 +17,13 @@
|
||||
|
||||
|
||||
|
||||
### Fixes
|
||||
|
||||
- prevent SmartAcme startup from blocking router startup (smartacme)
|
||||
- Start SmartAcme in the background with bounded exponential retry handling
|
||||
- Re-trigger certificate provisioning after SmartAcme becomes ready
|
||||
- Cancel stale retry timers and clean up SmartAcme instances during shutdown or config updates
|
||||
|
||||
## 2026-05-31 - 13.41.0
|
||||
|
||||
### Features
|
||||
|
||||
+151
-46
@@ -330,6 +330,11 @@ export class DcRouter {
|
||||
public serviceManager: plugins.taskbuffer.ServiceManager;
|
||||
private serviceSubjectSubscription?: plugins.smartrx.rxjs.Subscription;
|
||||
public smartAcmeReady = false;
|
||||
private smartAcmeServiceStarted = false;
|
||||
private smartAcmeStartGeneration = 0;
|
||||
private smartAcmeStartPromise?: Promise<void>;
|
||||
private smartAcmeRetryTimer?: ReturnType<typeof setTimeout>;
|
||||
private smartAcmeRetryAttempt = 0;
|
||||
|
||||
// TypedRouter for API endpoints
|
||||
public typedrouter = new plugins.typedrequest.TypedRouter();
|
||||
@@ -549,45 +554,14 @@ export class DcRouter {
|
||||
.optional()
|
||||
.dependsOn('SmartProxy')
|
||||
.withStart(async () => {
|
||||
if (this.smartAcme) {
|
||||
await this.smartAcme.start();
|
||||
this.smartAcmeReady = true;
|
||||
logger.log('info', 'SmartAcme DNS-01 provider is now ready');
|
||||
|
||||
// Re-trigger certificate provisioning for all auto-cert routes.
|
||||
// During startup, certProvisionFunction returned 'http01' (SmartAcme not ready),
|
||||
// but Rust ACME is disabled when certProvisionFunction is set — so all domains
|
||||
// failed silently (SmartProxy doesn't emit certificate-failed for this path).
|
||||
// Calling updateRoutes() re-triggers provisionCertificatesViaCallback internally,
|
||||
// which calls certProvisionFunction again — now with smartAcmeReady === true.
|
||||
if (this.routeConfigManager) {
|
||||
// Go through RouteConfigManager to get the full merged route set
|
||||
// and serialize via the route-update mutex (prevents stale overwrites)
|
||||
logger.log('info', 'Re-triggering certificate provisioning via RouteConfigManager');
|
||||
this.routeConfigManager.applyRoutes().catch((err: any) => {
|
||||
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
|
||||
});
|
||||
} else if (this.smartProxy) {
|
||||
// No RouteConfigManager (DB disabled) — re-send current routes to trigger cert provisioning
|
||||
if (this.certProvisionScheduler) {
|
||||
this.certProvisionScheduler.clear();
|
||||
}
|
||||
const currentRoutes = this.smartProxy.routeManager.getRoutes();
|
||||
logger.log('info', `Re-triggering certificate provisioning for ${currentRoutes.length} routes`);
|
||||
this.smartProxy.updateRoutes(currentRoutes).catch((err: any) => {
|
||||
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
this.smartAcmeServiceStarted = true;
|
||||
this.startSmartAcmeInBackground();
|
||||
})
|
||||
.withStop(async () => {
|
||||
this.smartAcmeReady = false;
|
||||
if (this.smartAcme) {
|
||||
await this.smartAcme.stop();
|
||||
this.smartAcme = undefined;
|
||||
}
|
||||
this.smartAcmeServiceStarted = false;
|
||||
await this.stopSmartAcme();
|
||||
})
|
||||
.withRetry({ maxRetries: 20, baseDelayMs: 5000, maxDelayMs: 3_600_000, backoffFactor: 2 }),
|
||||
.withRetry({ maxRetries: 0 }),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -778,6 +752,138 @@ export class DcRouter {
|
||||
});
|
||||
}
|
||||
|
||||
private startSmartAcmeInBackground(): void {
|
||||
if (!this.smartAcme) {
|
||||
this.smartAcmeReady = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const generation = ++this.smartAcmeStartGeneration;
|
||||
this.smartAcmeReady = false;
|
||||
this.smartAcmeRetryAttempt = 0;
|
||||
this.clearSmartAcmeRetryTimer();
|
||||
this.scheduleSmartAcmeStart(generation, 0);
|
||||
}
|
||||
|
||||
private scheduleSmartAcmeStart(generation: number, delayMs: number): void {
|
||||
this.clearSmartAcmeRetryTimer();
|
||||
const retryTimer = setTimeout(() => {
|
||||
this.smartAcmeRetryTimer = undefined;
|
||||
this.runSmartAcmeStartAttempt(generation).catch((err) => {
|
||||
logger.log('error', `Unexpected SmartAcme startup error: ${(err as Error).message}`);
|
||||
});
|
||||
}, delayMs);
|
||||
this.smartAcmeRetryTimer = retryTimer;
|
||||
const unrefableTimer = retryTimer as any;
|
||||
if (typeof unrefableTimer?.unref === 'function') {
|
||||
unrefableTimer.unref();
|
||||
}
|
||||
}
|
||||
|
||||
private async runSmartAcmeStartAttempt(generation: number): Promise<void> {
|
||||
const smartAcme = this.smartAcme;
|
||||
if (!smartAcme || generation !== this.smartAcmeStartGeneration) {
|
||||
return;
|
||||
}
|
||||
|
||||
const startPromise = smartAcme.start();
|
||||
this.smartAcmeStartPromise = startPromise;
|
||||
|
||||
try {
|
||||
await startPromise;
|
||||
if (generation !== this.smartAcmeStartGeneration || this.smartAcme !== smartAcme) {
|
||||
await smartAcme.stop().catch((err) => {
|
||||
logger.log('warn', `Failed to stop stale SmartAcme instance: ${(err as Error).message}`);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
this.smartAcmeReady = true;
|
||||
this.smartAcmeRetryAttempt = 0;
|
||||
logger.log('info', 'SmartAcme DNS-01 provider is now ready');
|
||||
this.retriggerCertificateProvisioningAfterSmartAcmeReady();
|
||||
} catch (err) {
|
||||
if (generation !== this.smartAcmeStartGeneration || this.smartAcme !== smartAcme) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.smartAcmeReady = false;
|
||||
await smartAcme.stop().catch((stopErr) => {
|
||||
logger.log('warn', `Failed to clean up SmartAcme after startup failure: ${(stopErr as Error).message}`);
|
||||
});
|
||||
this.smartAcmeRetryAttempt++;
|
||||
if (this.smartAcmeRetryAttempt > 20) {
|
||||
logger.log('error', `SmartAcme DNS-01 provider failed after 20 startup attempts: ${(err as Error).message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const baseDelayMs = 5000;
|
||||
const maxDelayMs = 3_600_000;
|
||||
const delayMs = Math.min(baseDelayMs * Math.pow(2, this.smartAcmeRetryAttempt - 1), maxDelayMs);
|
||||
const jitter = 0.8 + Math.random() * 0.4;
|
||||
const actualDelayMs = Math.floor(delayMs * jitter);
|
||||
logger.log('warn', `SmartAcme DNS-01 provider startup failed: ${(err as Error).message}; retrying in ${actualDelayMs}ms (attempt ${this.smartAcmeRetryAttempt}/20)`);
|
||||
this.scheduleSmartAcmeStart(generation, actualDelayMs);
|
||||
} finally {
|
||||
if (this.smartAcmeStartPromise === startPromise) {
|
||||
this.smartAcmeStartPromise = undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private retriggerCertificateProvisioningAfterSmartAcmeReady(): void {
|
||||
// During startup, certProvisionFunction returns 'http01' while SmartAcme is not ready,
|
||||
// but Rust ACME is disabled when certProvisionFunction is set. Re-applying routes
|
||||
// retries provisioning now that DNS-01 is available.
|
||||
if (this.routeConfigManager) {
|
||||
logger.log('info', 'Re-triggering certificate provisioning via RouteConfigManager');
|
||||
this.routeConfigManager.applyRoutes().catch((err: any) => {
|
||||
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.smartProxy) {
|
||||
if (this.certProvisionScheduler) {
|
||||
this.certProvisionScheduler.clear();
|
||||
}
|
||||
const currentRoutes = this.smartProxy.routeManager.getRoutes();
|
||||
logger.log('info', `Re-triggering certificate provisioning for ${currentRoutes.length} routes`);
|
||||
this.smartProxy.updateRoutes(currentRoutes).catch((err: any) => {
|
||||
logger.log('warn', `Failed to re-trigger cert provisioning: ${err?.message || err}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private clearSmartAcmeRetryTimer(): void {
|
||||
if (this.smartAcmeRetryTimer) {
|
||||
clearTimeout(this.smartAcmeRetryTimer);
|
||||
this.smartAcmeRetryTimer = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private async stopSmartAcme(): Promise<void> {
|
||||
this.smartAcmeStartGeneration++;
|
||||
this.smartAcmeReady = false;
|
||||
this.smartAcmeRetryAttempt = 0;
|
||||
this.clearSmartAcmeRetryTimer();
|
||||
|
||||
const smartAcme = this.smartAcme;
|
||||
if (!smartAcme) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await smartAcme.stop();
|
||||
} catch (err) {
|
||||
logger.log('error', 'Error stopping SmartAcme', { error: String(err) });
|
||||
} finally {
|
||||
if (this.smartAcme === smartAcme) {
|
||||
this.smartAcme = undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await this.checkSystemLimits();
|
||||
logger.log('info', 'Starting DcRouter Services');
|
||||
@@ -1093,17 +1199,13 @@ export class DcRouter {
|
||||
// Initialize cert provision scheduler
|
||||
this.certProvisionScheduler = new CertProvisionScheduler();
|
||||
|
||||
// If we have DNS challenge handlers, create SmartAcme instance and wire certProvisionFunction
|
||||
// Note: SmartAcme.start() is NOT called here — it runs as a separate optional service
|
||||
// via the ServiceManager, with aggressive retry for rate-limit resilience.
|
||||
// If we have DNS challenge handlers, create SmartAcme instance and wire certProvisionFunction.
|
||||
// SmartAcme starts in the background because ACME account setup can be slow or rate-limited,
|
||||
// and must not block dcrouter's global startup timeout.
|
||||
if (this.smartAcme) {
|
||||
await this.stopSmartAcme();
|
||||
}
|
||||
if (challengeHandlers.length > 0) {
|
||||
// Stop old SmartAcme if it exists (e.g., during updateSmartProxyConfig)
|
||||
if (this.smartAcme) {
|
||||
this.smartAcmeReady = false;
|
||||
await this.smartAcme.stop().catch(err =>
|
||||
logger.log('error', 'Error stopping old SmartAcme', { error: String(err) })
|
||||
);
|
||||
}
|
||||
// Safe non-null: challengeHandlers.length > 0 implies both dnsManager
|
||||
// and acmeConfig exist (enforced above).
|
||||
this.smartAcme = new plugins.smartacme.SmartAcme({
|
||||
@@ -1113,6 +1215,9 @@ export class DcRouter {
|
||||
challengeHandlers: challengeHandlers,
|
||||
challengePriority: ['dns-01'],
|
||||
});
|
||||
if (this.smartAcmeServiceStarted) {
|
||||
this.startSmartAcmeInBackground();
|
||||
}
|
||||
|
||||
const scheduler = this.certProvisionScheduler;
|
||||
smartProxyConfig.certProvisionFallbackToAcme = false;
|
||||
|
||||
Reference in New Issue
Block a user