Compare commits

..

4 Commits

5 changed files with 97 additions and 13 deletions

View File

@@ -1,5 +1,18 @@
# Changelog # Changelog
## 2026-03-17 - 4.7.2 - fix(remoteingress-core)
add tunnel write timeouts and scale initial stream windows by active stream count
- Wrap tunnel frame writes and flushes in a 30-second timeout on both edge and hub to detect stalled writers and trigger faster reconnect or cleanup.
- Compute each stream's initial send window from the current active stream count instead of using a fixed window to keep total in-flight data within the 32MB budget.
## 2026-03-17 - 4.7.1 - fix(remoteingress-core)
improve tunnel failure detection and reconnect handling
- Enable TCP keepalive on edge and hub connections to detect silent network failures sooner
- Trigger immediate reconnect or disconnect when tunnel writer tasks fail instead of waiting for liveness timeouts
- Prevent active stream counter underflow during concurrent connection cleanup
## 2026-03-16 - 4.7.0 - feat(edge,protocol,test) ## 2026-03-16 - 4.7.0 - feat(edge,protocol,test)
add configurable edge bind address and expand flow-control test coverage add configurable edge bind address and expand flow-control test coverage

View File

@@ -1,6 +1,6 @@
{ {
"name": "@serve.zone/remoteingress", "name": "@serve.zone/remoteingress",
"version": "4.7.0", "version": "4.7.2",
"private": false, "private": false,
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.", "description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",

View File

@@ -284,6 +284,13 @@ async fn connect_to_hub_and_run(
Ok(s) => { Ok(s) => {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE) // Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
let _ = s.set_nodelay(true); let _ = s.set_nodelay(true);
// TCP keepalive detects silent network failures (NAT timeout, path change)
// faster than the 45s application-level liveness timeout.
let ka = socket2::TcpKeepalive::new()
.with_time(Duration::from_secs(30));
#[cfg(target_os = "linux")]
let ka = ka.with_interval(Duration::from_secs(10));
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(&ka);
s s
} }
Err(e) => { Err(e) => {
@@ -388,18 +395,26 @@ async fn connect_to_hub_and_run(
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN // Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
let tunnel_writer_tx = tunnel_ctrl_tx.clone(); let tunnel_writer_tx = tunnel_ctrl_tx.clone();
let tw_token = connection_token.clone(); let tw_token = connection_token.clone();
// Oneshot to signal the reader loop when the writer dies from a write error.
// This avoids the 45s liveness timeout delay when the tunnel is already dead.
let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>();
let tunnel_writer_handle = tokio::spawn(async move { let tunnel_writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer // BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data. // TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half); let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
let mut write_error = false;
let write_timeout = Duration::from_secs(30);
loop { loop {
tokio::select! { tokio::select! {
biased; // control frames always take priority over data biased; // control frames always take priority over data
ctrl = tunnel_ctrl_rx.recv() => { ctrl = tunnel_ctrl_rx.recv() => {
match ctrl { match ctrl {
Some(frame_data) => { Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; } let ok = tokio::time::timeout(write_timeout, async {
if writer.flush().await.is_err() { break; } writer.write_all(&frame_data).await?;
writer.flush().await
}).await;
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
} }
None => break, None => break,
} }
@@ -407,8 +422,11 @@ async fn connect_to_hub_and_run(
data = tunnel_data_rx.recv() => { data = tunnel_data_rx.recv() => {
match data { match data {
Some(frame_data) => { Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; } let ok = tokio::time::timeout(write_timeout, async {
if writer.flush().await.is_err() { break; } writer.write_all(&frame_data).await?;
writer.flush().await
}).await;
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
} }
None => break, None => break,
} }
@@ -416,6 +434,10 @@ async fn connect_to_hub_and_run(
_ = tw_token.cancelled() => break, _ = tw_token.cancelled() => break,
} }
} }
if write_error {
log::error!("Tunnel writer failed or stalled, signalling reader for fast reconnect");
let _ = writer_dead_tx.send(());
}
}); });
// Start TCP listeners for initial ports (hot-reloadable) // Start TCP listeners for initial ports (hot-reloadable)
@@ -532,6 +554,10 @@ async fn connect_to_hub_and_run(
liveness_timeout_dur.as_secs()); liveness_timeout_dur.as_secs());
break EdgeLoopResult::Reconnect; break EdgeLoopResult::Reconnect;
} }
_ = &mut writer_dead_rx => {
log::error!("Tunnel writer died, reconnecting immediately");
break EdgeLoopResult::Reconnect;
}
_ = connection_token.cancelled() => { _ = connection_token.cancelled() => {
log::info!("Connection cancelled"); log::info!("Connection cancelled");
break EdgeLoopResult::Shutdown; break EdgeLoopResult::Shutdown;
@@ -636,7 +662,18 @@ fn apply_port_config(
Arc::clone(&active_streams), Arc::clone(&active_streams),
) )
.await; .await;
active_streams.fetch_sub(1, Ordering::Relaxed); // Saturating decrement: prevent underflow when
// edge_main_loop's store(0) races with task cleanup.
loop {
let current = active_streams.load(Ordering::Relaxed);
if current == 0 { break; }
if active_streams.compare_exchange_weak(
current, current - 1,
Ordering::Relaxed, Ordering::Relaxed,
).is_ok() {
break;
}
}
}); });
} }
Err(e) => { Err(e) => {
@@ -682,7 +719,12 @@ async fn handle_client_connection(
// Set up channel for data coming back from hub (capacity 16 is sufficient with flow control) // Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256); let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW)); // Adaptive initial window: scale with current stream count to keep total in-flight
// data within the 32MB budget. Prevents burst flooding when many streams open.
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new()); let window_notify = Arc::new(Notify::new());
{ {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;

View File

@@ -300,6 +300,13 @@ async fn handle_edge_connection(
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE) // Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
stream.set_nodelay(true)?; stream.set_nodelay(true)?;
// TCP keepalive detects silent network failures (NAT timeout, path change)
// faster than the 45s application-level liveness timeout.
let ka = socket2::TcpKeepalive::new()
.with_time(Duration::from_secs(30));
#[cfg(target_os = "linux")]
let ka = ka.with_interval(Duration::from_secs(10));
let _ = socket2::SockRef::from(&stream).set_tcp_keepalive(&ka);
let tls_stream = acceptor.accept(stream).await?; let tls_stream = acceptor.accept(stream).await?;
let (read_half, mut write_half) = tokio::io::split(tls_stream); let (read_half, mut write_half) = tokio::io::split(tls_stream);
let mut buf_reader = BufReader::new(read_half); let mut buf_reader = BufReader::new(read_half);
@@ -383,18 +390,24 @@ async fn handle_edge_connection(
// Legacy alias for code that sends both control and data (will be migrated) // Legacy alias for code that sends both control and data (will be migrated)
let frame_writer_tx = ctrl_tx.clone(); let frame_writer_tx = ctrl_tx.clone();
let writer_token = edge_token.clone(); let writer_token = edge_token.clone();
let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>();
let writer_handle = tokio::spawn(async move { let writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer // BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data. // TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half); let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
let mut write_error = false;
let write_timeout = Duration::from_secs(30);
loop { loop {
tokio::select! { tokio::select! {
biased; // control frames always take priority over data biased; // control frames always take priority over data
ctrl = ctrl_rx.recv() => { ctrl = ctrl_rx.recv() => {
match ctrl { match ctrl {
Some(frame_data) => { Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; } let ok = tokio::time::timeout(write_timeout, async {
if writer.flush().await.is_err() { break; } writer.write_all(&frame_data).await?;
writer.flush().await
}).await;
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
} }
None => break, None => break,
} }
@@ -402,8 +415,11 @@ async fn handle_edge_connection(
data = data_rx.recv() => { data = data_rx.recv() => {
match data { match data {
Some(frame_data) => { Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; } let ok = tokio::time::timeout(write_timeout, async {
if writer.flush().await.is_err() { break; } writer.write_all(&frame_data).await?;
writer.flush().await
}).await;
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
} }
None => break, None => break,
} }
@@ -411,6 +427,10 @@ async fn handle_edge_connection(
_ = writer_token.cancelled() => break, _ = writer_token.cancelled() => break,
} }
} }
if write_error {
log::error!("Tunnel writer to edge failed or stalled, signalling reader for fast cleanup");
let _ = writer_dead_tx.send(());
}
}); });
// Spawn task to forward config updates as FRAME_CONFIG frames // Spawn task to forward config updates as FRAME_CONFIG frames
@@ -499,7 +519,12 @@ async fn handle_edge_connection(
// Create channel for data from edge to this stream (capacity 16 is sufficient with flow control) // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256); let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW)); // Adaptive initial window: scale with current stream count
// to keep total in-flight data within the 32MB budget.
let initial_window = compute_window_for_stream_count(
edge_stream_count.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new()); let window_notify = Arc::new(Notify::new());
{ {
let mut s = streams.lock().await; let mut s = streams.lock().await;
@@ -754,6 +779,10 @@ async fn handle_edge_connection(
edge_id, liveness_timeout_dur.as_secs()); edge_id, liveness_timeout_dur.as_secs());
break; break;
} }
_ = &mut writer_dead_rx => {
log::error!("Tunnel writer to edge {} died, disconnecting immediately", edge_id);
break;
}
_ = edge_token.cancelled() => { _ = edge_token.cancelled() => {
log::info!("Edge {} cancelled by hub", edge_id); log::info!("Edge {} cancelled by hub", edge_id);
break; break;

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@serve.zone/remoteingress', name: '@serve.zone/remoteingress',
version: '4.7.0', version: '4.7.2',
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.' description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
} }