fix(rustproxy-http,rustproxy-passthrough): improve upstream connection cleanup and graceful tunnel shutdown
This commit is contained in:
@@ -97,16 +97,25 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
let last_activity = Arc::new(AtomicU64::new(0));
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Per-connection cancellation token: the watchdog cancels this instead of
|
||||
// aborting tasks, so the copy loops can shut down gracefully (TCP FIN instead
|
||||
// of RST, TLS close_notify if the stream is TLS-wrapped).
|
||||
let conn_cancel = CancellationToken::new();
|
||||
|
||||
let la1 = Arc::clone(&last_activity);
|
||||
let initial_len = initial_data.map_or(0u64, |d| d.len() as u64);
|
||||
let metrics_c2b = metrics.clone();
|
||||
let cc1 = conn_cancel.clone();
|
||||
let c2b = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = initial_len;
|
||||
loop {
|
||||
let n = match client_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
let n = tokio::select! {
|
||||
result = client_read.read(&mut buf) => match result {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
},
|
||||
_ = cc1.cancelled() => break,
|
||||
};
|
||||
if backend_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
@@ -117,19 +126,27 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
ctx.collector.record_bytes(n as u64, 0, ctx.route_id.as_deref(), ctx.source_ip.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = backend_write.shutdown().await;
|
||||
// Graceful shutdown with timeout (sends TCP FIN / TLS close_notify)
|
||||
let _ = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(2),
|
||||
backend_write.shutdown(),
|
||||
).await;
|
||||
total
|
||||
});
|
||||
|
||||
let la2 = Arc::clone(&last_activity);
|
||||
let metrics_b2c = metrics;
|
||||
let cc2 = conn_cancel.clone();
|
||||
let b2c = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
loop {
|
||||
let n = match backend_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
let n = tokio::select! {
|
||||
result = backend_read.read(&mut buf) => match result {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
},
|
||||
_ = cc2.cancelled() => break,
|
||||
};
|
||||
if client_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
@@ -140,14 +157,20 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
ctx.collector.record_bytes(0, n as u64, ctx.route_id.as_deref(), ctx.source_ip.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
// Graceful shutdown with timeout (sends TCP FIN / TLS close_notify)
|
||||
let _ = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(2),
|
||||
client_write.shutdown(),
|
||||
).await;
|
||||
total
|
||||
});
|
||||
|
||||
// Watchdog: inactivity, max lifetime, and cancellation
|
||||
// Watchdog: inactivity, max lifetime, and cancellation.
|
||||
// First cancels the per-connection token for graceful shutdown (FIN/close_notify),
|
||||
// then falls back to abort if the tasks are stuck (e.g. on a blocked write_all).
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2b_handle = c2b.abort_handle();
|
||||
let b2c_handle = b2c.abort_handle();
|
||||
let c2b_abort = c2b.abort_handle();
|
||||
let b2c_abort = b2c.abort_handle();
|
||||
let watchdog = tokio::spawn(async move {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
@@ -155,16 +178,12 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
debug!("Connection cancelled by shutdown");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(check_interval) => {
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -174,8 +193,6 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -183,6 +200,13 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
}
|
||||
}
|
||||
}
|
||||
// Phase 1: Signal copy loops to exit gracefully (allows FIN/close_notify)
|
||||
conn_cancel.cancel();
|
||||
// Phase 2: Wait for graceful shutdown (2s shutdown timeout + 2s margin)
|
||||
tokio::time::sleep(std::time::Duration::from_secs(4)).await;
|
||||
// Phase 3: Force-abort if still stuck (e.g. blocked on write_all)
|
||||
c2b_abort.abort();
|
||||
b2c_abort.abort();
|
||||
});
|
||||
|
||||
let bytes_in = c2b.await.unwrap_or(0);
|
||||
|
||||
@@ -465,21 +465,19 @@ impl TcpListenerManager {
|
||||
Ok((stream, peer_addr)) => {
|
||||
let ip = peer_addr.ip();
|
||||
|
||||
// Global connection limit — acquire semaphore permit with timeout
|
||||
let permit = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(5),
|
||||
conn_semaphore.clone().acquire_owned(),
|
||||
).await {
|
||||
Ok(Ok(permit)) => permit,
|
||||
Ok(Err(_)) => {
|
||||
// Semaphore closed — shouldn't happen, but be safe
|
||||
debug!("Connection semaphore closed, dropping connection from {}", peer_addr);
|
||||
// Global connection limit — non-blocking check.
|
||||
// MUST NOT block the accept loop: a blocking acquire would stall
|
||||
// ALL connections to this port (not just the one over limit), because
|
||||
// listener.accept() is not polled while we await the semaphore.
|
||||
let permit = match conn_semaphore.clone().try_acquire_owned() {
|
||||
Ok(permit) => permit,
|
||||
Err(tokio::sync::TryAcquireError::NoPermits) => {
|
||||
debug!("Global connection limit reached, dropping connection from {}", peer_addr);
|
||||
drop(stream);
|
||||
continue;
|
||||
}
|
||||
Err(_) => {
|
||||
// Timeout — global limit reached
|
||||
debug!("Global connection limit reached, dropping connection from {}", peer_addr);
|
||||
Err(tokio::sync::TryAcquireError::Closed) => {
|
||||
debug!("Connection semaphore closed, dropping connection from {}", peer_addr);
|
||||
drop(stream);
|
||||
continue;
|
||||
}
|
||||
@@ -1396,15 +1394,24 @@ impl TcpListenerManager {
|
||||
let last_activity = Arc::new(AtomicU64::new(0));
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Per-connection cancellation token: the watchdog cancels this instead of
|
||||
// aborting tasks, so the copy loops can shut down gracefully (TLS close_notify
|
||||
// for terminate/reencrypt mode, TCP FIN for passthrough mode).
|
||||
let conn_cancel = CancellationToken::new();
|
||||
|
||||
let la1 = Arc::clone(&last_activity);
|
||||
let metrics_c2b = metrics.clone();
|
||||
let cc1 = conn_cancel.clone();
|
||||
let c2b = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
loop {
|
||||
let n = match client_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
let n = tokio::select! {
|
||||
result = client_read.read(&mut buf) => match result {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
},
|
||||
_ = cc1.cancelled() => break,
|
||||
};
|
||||
if backend_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
@@ -1418,19 +1425,27 @@ impl TcpListenerManager {
|
||||
ctx.collector.record_bytes(n as u64, 0, ctx.route_id.as_deref(), ctx.source_ip.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = backend_write.shutdown().await;
|
||||
// Graceful shutdown with timeout (sends TLS close_notify / TCP FIN)
|
||||
let _ = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(2),
|
||||
backend_write.shutdown(),
|
||||
).await;
|
||||
total
|
||||
});
|
||||
|
||||
let la2 = Arc::clone(&last_activity);
|
||||
let metrics_b2c = metrics;
|
||||
let cc2 = conn_cancel.clone();
|
||||
let b2c = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
loop {
|
||||
let n = match backend_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
let n = tokio::select! {
|
||||
result = backend_read.read(&mut buf) => match result {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
},
|
||||
_ = cc2.cancelled() => break,
|
||||
};
|
||||
if client_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
@@ -1444,14 +1459,20 @@ impl TcpListenerManager {
|
||||
ctx.collector.record_bytes(0, n as u64, ctx.route_id.as_deref(), ctx.source_ip.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
// Graceful shutdown with timeout (sends TLS close_notify / TCP FIN)
|
||||
let _ = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(2),
|
||||
client_write.shutdown(),
|
||||
).await;
|
||||
total
|
||||
});
|
||||
|
||||
// Watchdog task: check for inactivity, max lifetime, and cancellation
|
||||
// Watchdog task: check for inactivity, max lifetime, and cancellation.
|
||||
// First cancels the per-connection token for graceful shutdown (close_notify/FIN),
|
||||
// then falls back to abort if the tasks are stuck (e.g. on a blocked write_all).
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2b_handle = c2b.abort_handle();
|
||||
let b2c_handle = b2c.abort_handle();
|
||||
let c2b_abort = c2b.abort_handle();
|
||||
let b2c_abort = b2c.abort_handle();
|
||||
let watchdog = tokio::spawn(async move {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
@@ -1459,16 +1480,12 @@ impl TcpListenerManager {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
debug!("Split-stream connection cancelled by shutdown");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(check_interval) => {
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1479,8 +1496,6 @@ impl TcpListenerManager {
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1488,6 +1503,13 @@ impl TcpListenerManager {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Phase 1: Signal copy loops to exit gracefully (allows close_notify/FIN)
|
||||
conn_cancel.cancel();
|
||||
// Phase 2: Wait for graceful shutdown (2s shutdown timeout + 2s margin)
|
||||
tokio::time::sleep(std::time::Duration::from_secs(4)).await;
|
||||
// Phase 3: Force-abort if still stuck (e.g. blocked on write_all)
|
||||
c2b_abort.abort();
|
||||
b2c_abort.abort();
|
||||
});
|
||||
|
||||
let bytes_in = c2b.await.unwrap_or(0);
|
||||
|
||||
Reference in New Issue
Block a user