fix(remoteingress-core): improve tunnel liveness handling and enable TCP keepalive for accepted client sockets

This commit is contained in:
2026-03-16 17:39:25 +00:00
parent 1d59a48648
commit f8a0171ef3
7 changed files with 45 additions and 6 deletions

13
rust/Cargo.lock generated
View File

@@ -558,6 +558,7 @@ dependencies = [
"rustls-pemfile",
"serde",
"serde_json",
"socket2 0.5.10",
"tokio",
"tokio-rustls",
"tokio-util",
@@ -701,6 +702,16 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "socket2"
version = "0.6.2"
@@ -765,7 +776,7 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"socket2 0.6.2",
"tokio-macros",
"windows-sys 0.61.2",
]

View File

@@ -14,3 +14,4 @@ serde_json = "1"
log = "0.4"
rustls-pemfile = "2"
tokio-util = "0.7"
socket2 = "0.5"

View File

@@ -494,8 +494,10 @@ async fn connect_to_hub_and_run(
FRAME_PING => {
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
if tunnel_writer_tx.try_send(pong_frame).is_err() {
log::warn!("Failed to send PONG, writer channel full/closed");
break EdgeLoopResult::Reconnect;
// Control channel full (WINDOW_UPDATE burst from many streams).
// DON'T disconnect — the 45s liveness timeout gives margin
// for the channel to drain and the next PONG to succeed.
log::warn!("PONG send failed, control channel full — skipping this cycle");
}
log::trace!("Received PING from hub, sent PONG");
}
@@ -588,6 +590,15 @@ fn apply_port_config(
accept_result = listener.accept() => {
match accept_result {
Ok((client_stream, client_addr)) => {
// TCP keepalive detects dead clients that disappear without FIN.
// Without this, zombie streams accumulate and never get cleaned up.
let _ = client_stream.set_nodelay(true);
let ka = socket2::TcpKeepalive::new()
.with_time(Duration::from_secs(60));
#[cfg(target_os = "linux")]
let ka = ka.with_interval(Duration::from_secs(60));
let _ = socket2::SockRef::from(&client_stream).set_tcp_keepalive(&ka);
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();

View File

@@ -726,8 +726,9 @@ async fn handle_edge_connection(
_ = ping_ticker.tick() => {
let ping_frame = encode_frame(0, FRAME_PING, &[]);
if frame_writer_tx.try_send(ping_frame).is_err() {
log::warn!("Failed to send PING to edge {}, writer channel full/closed", edge_id);
break;
// Control channel full — skip this PING cycle.
// The 45s liveness timeout gives margin for the channel to drain.
log::warn!("PING send to edge {} failed, control channel full — skipping", edge_id);
}
log::trace!("Sent PING to edge {}", edge_id);
}

View File

@@ -32,6 +32,15 @@ pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> V
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
}
/// Compute the target per-stream window size based on the number of active streams.
/// Total memory budget is ~32MB shared across all streams. As more streams are active,
/// each gets a smaller window. This adapts to current demand — few streams get high
/// throughput, many streams save memory and reduce control frame pressure.
pub fn compute_window_for_stream_count(active: u32) -> u32 {
let per_stream = (32 * 1024 * 1024u64) / (active.max(1) as u64);
per_stream.clamp(64 * 1024, INITIAL_STREAM_WINDOW as u64) as u32
}
/// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
if payload.len() != 4 {