fix(rustproxy): Use cooperative cancellation for background tasks, prune stale caches and metric entries, and switch tests to dynamic port allocation to avoid port conflicts

This commit is contained in:
2026-02-24 20:56:37 +00:00
parent 755c81c042
commit 33cd5330c4
24 changed files with 535 additions and 560 deletions

View File

@@ -51,6 +51,7 @@ use rustproxy_passthrough::{TcpListenerManager, TlsCertConfig, ConnectionConfig}
use rustproxy_metrics::{MetricsCollector, Metrics, Statistics};
use rustproxy_tls::{CertManager, CertStore, CertBundle, CertMetadata, CertSource};
use rustproxy_nftables::{NftManager, rule_builder};
use tokio_util::sync::CancellationToken;
/// Certificate status.
#[derive(Debug, Clone)]
@@ -79,6 +80,8 @@ pub struct RustProxy {
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
/// Dynamically loaded certificates (via loadCertificate IPC), independent of CertManager.
loaded_certs: HashMap<String, TlsCertConfig>,
/// Cancellation token for cooperative shutdown of background tasks.
cancel_token: CancellationToken,
}
impl RustProxy {
@@ -121,6 +124,7 @@ impl RustProxy {
started_at: None,
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
loaded_certs: HashMap::new(),
cancel_token: CancellationToken::new(),
})
}
@@ -299,18 +303,26 @@ impl RustProxy {
self.started = true;
self.started_at = Some(Instant::now());
// Start the throughput sampling task
// Start the throughput sampling task with cooperative cancellation
let metrics = Arc::clone(&self.metrics);
let conn_tracker = self.listener_manager.as_ref().unwrap().conn_tracker().clone();
let interval_ms = self.options.metrics.as_ref()
.and_then(|m| m.sample_interval_ms)
.unwrap_or(1000);
let sampling_cancel = self.cancel_token.clone();
self.sampling_handle = Some(tokio::spawn(async move {
let mut interval = tokio::time::interval(
std::time::Duration::from_millis(interval_ms)
);
loop {
interval.tick().await;
metrics.sample_all();
tokio::select! {
_ = sampling_cancel.cancelled() => break,
_ = interval.tick() => {
metrics.sample_all();
// Periodically clean up stale rate-limit timestamp entries
conn_tracker.cleanup_stale_timestamps();
}
}
}
}));
@@ -457,51 +469,59 @@ impl RustProxy {
.unwrap_or(80);
let interval = std::time::Duration::from_secs(check_interval_hours as u64 * 3600);
let renewal_cancel = self.cancel_token.clone();
let handle = tokio::spawn(async move {
loop {
tokio::time::sleep(interval).await;
debug!("Certificate renewal check triggered (interval: {}h)", check_interval_hours);
tokio::select! {
_ = renewal_cancel.cancelled() => {
debug!("Renewal timer shutting down");
break;
}
_ = tokio::time::sleep(interval) => {
debug!("Certificate renewal check triggered (interval: {}h)", check_interval_hours);
// Check which domains need renewal
let domains = {
let cm = cm_arc.lock().await;
cm.check_renewals()
};
// Check which domains need renewal
let domains = {
let cm = cm_arc.lock().await;
cm.check_renewals()
};
if domains.is_empty() {
debug!("No certificates need renewal");
continue;
}
info!("Renewing {} certificate(s)", domains.len());
// Start challenge server for renewals
let mut cs = challenge_server::ChallengeServer::new();
if let Err(e) = cs.start(acme_port).await {
error!("Failed to start challenge server for renewal: {}", e);
continue;
}
for domain in &domains {
let cs_ref = &cs;
let mut cm = cm_arc.lock().await;
let result = cm.renew_domain(domain, |token, key_auth| {
cs_ref.set_challenge(token, key_auth);
async {}
}).await;
match result {
Ok(_bundle) => {
info!("Successfully renewed certificate for {}", domain);
if domains.is_empty() {
debug!("No certificates need renewal");
continue;
}
Err(e) => {
error!("Failed to renew certificate for {}: {}", domain, e);
info!("Renewing {} certificate(s)", domains.len());
// Start challenge server for renewals
let mut cs = challenge_server::ChallengeServer::new();
if let Err(e) = cs.start(acme_port).await {
error!("Failed to start challenge server for renewal: {}", e);
continue;
}
for domain in &domains {
let cs_ref = &cs;
let mut cm = cm_arc.lock().await;
let result = cm.renew_domain(domain, |token, key_auth| {
cs_ref.set_challenge(token, key_auth);
async {}
}).await;
match result {
Ok(_bundle) => {
info!("Successfully renewed certificate for {}", domain);
}
Err(e) => {
error!("Failed to renew certificate for {}: {}", domain, e);
}
}
}
cs.stop().await;
}
}
cs.stop().await;
}
});
@@ -516,14 +536,17 @@ impl RustProxy {
info!("Stopping RustProxy...");
// Stop sampling task
// Signal all background tasks to stop cooperatively
self.cancel_token.cancel();
// Await sampling task (cooperative shutdown)
if let Some(handle) = self.sampling_handle.take() {
handle.abort();
let _ = handle.await;
}
// Stop renewal timer
// Await renewal timer (cooperative shutdown)
if let Some(handle) = self.renewal_handle.take() {
handle.abort();
let _ = handle.await;
}
// Stop challenge server if running
@@ -545,6 +568,8 @@ impl RustProxy {
}
self.listener_manager = None;
self.started = false;
// Reset cancel token so proxy can be restarted
self.cancel_token = CancellationToken::new();
info!("RustProxy stopped");
Ok(())
@@ -585,6 +610,8 @@ impl RustProxy {
// Update listener manager
if let Some(ref mut listener) = self.listener_manager {
listener.update_route_manager(Arc::clone(&new_manager));
// Prune HTTP proxy caches (rate limiters, regex cache, round-robin counters)
listener.prune_http_proxy_caches(&active_route_ids);
// Update TLS configs
let mut tls_configs = Self::extract_tls_configs(&routes);
@@ -983,3 +1010,21 @@ impl RustProxy {
configs
}
}
/// Safety net: abort background tasks if RustProxy is dropped without calling stop().
/// Normal shutdown should still use stop() for graceful behavior.
impl Drop for RustProxy {
fn drop(&mut self) {
self.cancel_token.cancel();
if let Some(handle) = self.sampling_handle.take() {
handle.abort();
}
if let Some(handle) = self.renewal_handle.take() {
handle.abort();
}
// Cancel the listener manager's token and abort accept loops
if let Some(ref mut listener) = self.listener_manager {
listener.stop_all();
}
}
}