Compare commits

...

16 Commits

Author SHA1 Message Date
a808d4c9de v4.5.12 2026-03-16 17:39:25 +00:00
f8a0171ef3 fix(remoteingress-core): improve tunnel liveness handling and enable TCP keepalive for accepted client sockets 2026-03-16 17:39:25 +00:00
1d59a48648 v4.5.11 2026-03-16 13:55:02 +00:00
af2ec11a2d fix(repo): no changes to commit 2026-03-16 13:55:02 +00:00
b6e66a7fa6 v4.5.10 2026-03-16 13:48:35 +00:00
1391b39601 fix(remoteingress-core): guard zero-window reads to avoid false EOF handling on stalled streams 2026-03-16 13:48:35 +00:00
e813c2f044 v4.5.9 2026-03-16 11:29:38 +00:00
0b8c1f0b57 fix(remoteingress-core): delay stream close until downstream response draining finishes to prevent truncated transfers 2026-03-16 11:29:38 +00:00
a63dbf2502 v4.5.8 2026-03-16 10:51:59 +00:00
4b95a3c999 fix(remoteingress-core): ensure upstream writes cancel promptly and reliably deliver CLOSE_BACK frames 2026-03-16 10:51:59 +00:00
51ab32f6c3 v4.5.7 2026-03-16 09:44:31 +00:00
ed52520d50 fix(remoteingress-core): improve tunnel reconnect and frame write efficiency 2026-03-16 09:44:31 +00:00
a08011d2da v4.5.6 2026-03-16 09:36:03 +00:00
679b247c8a fix(remoteingress-core): disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency 2026-03-16 09:36:03 +00:00
32f9845495 v4.5.5 2026-03-16 09:02:02 +00:00
c0e1daa0e4 fix(remoteingress-core): wait for hub-to-client draining before cleanup and reliably send close frames 2026-03-16 09:02:02 +00:00
8 changed files with 169 additions and 42 deletions

View File

@@ -1,5 +1,54 @@
# Changelog
## 2026-03-16 - 4.5.12 - fix(remoteingress-core)
improve tunnel liveness handling and enable TCP keepalive for accepted client sockets
- Avoid disconnecting edges when PING or PONG frames cannot be queued because the control channel is temporarily full.
- Enable TCP_NODELAY and TCP keepalive on accepted client connections to help detect stale or dropped clients.
## 2026-03-16 - 4.5.11 - fix(repo)
no changes to commit
## 2026-03-16 - 4.5.10 - fix(remoteingress-core)
guard zero-window reads to avoid false EOF handling on stalled streams
- Prevent upload and download loops from calling read on an empty buffer when flow-control window remains at 0 after stall timeout
- Log a warning and close the affected stream instead of misinterpreting Ok(0) as end-of-file
## 2026-03-16 - 4.5.9 - fix(remoteingress-core)
delay stream close until downstream response draining finishes to prevent truncated transfers
- Waits for the hub-to-client download task to finish before sending the stream CLOSE frame
- Prevents upstream reads from being cancelled mid-response during asymmetric transfers such as git fetch
- Retains the existing timeout so stalled downloads still clean up safely
## 2026-03-16 - 4.5.8 - fix(remoteingress-core)
ensure upstream writes cancel promptly and reliably deliver CLOSE_BACK frames
- listen for stream cancellation while waiting on upstream write timeouts so FRAME_CLOSE does not block for up to 60 seconds
- replace try_send with send().await when emitting CLOSE_BACK frames to avoid silently dropping close notifications when the data channel is full
## 2026-03-16 - 4.5.7 - fix(remoteingress-core)
improve tunnel reconnect and frame write efficiency
- Reuse the TLS connector across edge reconnections to preserve session resumption state and reduce reconnect latency.
- Buffer hub and edge frame writes to coalesce small control and data frames into fewer TLS records and syscalls while still flushing each frame promptly.
## 2026-03-16 - 4.5.6 - fix(remoteingress-core)
disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency
- Enable TCP_NODELAY on the edge connection to the hub for faster PING/PONG and WINDOW_UPDATE delivery
- Apply TCP_NODELAY on accepted hub streams before TLS handling
- Enable TCP_NODELAY on SmartProxy upstream connections before sending the PROXY header
## 2026-03-16 - 4.5.5 - fix(remoteingress-core)
wait for hub-to-client draining before cleanup and reliably send close frames
- switch CLOSE frame delivery on the data channel from try_send to send().await to avoid dropping it when the channel is full
- delay stream cleanup until the hub-to-client task finishes or times out so large downstream responses continue after upload EOF
- add a bounded 5-minute wait for download draining to prevent premature termination of asymmetric transfers such as git fetch
## 2026-03-15 - 4.5.4 - fix(remoteingress-core)
preserve stream close ordering and add flow-control stall timeouts

View File

@@ -1,6 +1,6 @@
{
"name": "@serve.zone/remoteingress",
"version": "4.5.4",
"version": "4.5.12",
"private": false,
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
"main": "dist_ts/index.js",

13
rust/Cargo.lock generated
View File

@@ -558,6 +558,7 @@ dependencies = [
"rustls-pemfile",
"serde",
"serde_json",
"socket2 0.5.10",
"tokio",
"tokio-rustls",
"tokio-util",
@@ -701,6 +702,16 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "socket2"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "socket2"
version = "0.6.2"
@@ -765,7 +776,7 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"socket2 0.6.2",
"tokio-macros",
"windows-sys 0.61.2",
]

View File

@@ -14,3 +14,4 @@ serde_json = "1"
log = "0.4"
rustls-pemfile = "2"
tokio-util = "0.7"
socket2 = "0.5"

View File

@@ -194,6 +194,14 @@ async fn edge_main_loop(
let mut backoff_ms: u64 = 1000;
let max_backoff_ms: u64 = 30000;
// Build TLS config ONCE outside the reconnect loop — preserves session
// cache across reconnections for TLS session resumption (saves 1 RTT).
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
loop {
// Create a per-connection child token
let connection_token = cancel_token.child_token();
@@ -209,6 +217,7 @@ async fn edge_main_loop(
&listen_ports,
&mut shutdown_rx,
&connection_token,
&connector,
)
.await;
@@ -259,18 +268,16 @@ async fn connect_to_hub_and_run(
listen_ports: &Arc<RwLock<Vec<u16>>>,
shutdown_rx: &mut mpsc::Receiver<()>,
connection_token: &CancellationToken,
connector: &TlsConnector,
) -> EdgeLoopResult {
// Build TLS connector that skips cert verification (auth is via secret)
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
let addr = format!("{}:{}", config.hub_host, config.hub_port);
let tcp = match TcpStream::connect(&addr).await {
Ok(s) => s,
Ok(s) => {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
let _ = s.set_nodelay(true);
s
}
Err(e) => {
log::error!("Failed to connect to hub at {}: {}", addr, e);
return EdgeLoopResult::Reconnect;
@@ -374,15 +381,17 @@ async fn connect_to_hub_and_run(
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
let tw_token = connection_token.clone();
let tunnel_writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop {
tokio::select! {
biased; // control frames always take priority over data
ctrl = tunnel_ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() {
break;
}
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
@@ -390,9 +399,8 @@ async fn connect_to_hub_and_run(
data = tunnel_data_rx.recv() => {
match data {
Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() {
break;
}
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
@@ -486,8 +494,10 @@ async fn connect_to_hub_and_run(
FRAME_PING => {
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
if tunnel_writer_tx.try_send(pong_frame).is_err() {
log::warn!("Failed to send PONG, writer channel full/closed");
break EdgeLoopResult::Reconnect;
// Control channel full (WINDOW_UPDATE burst from many streams).
// DON'T disconnect — the 45s liveness timeout gives margin
// for the channel to drain and the next PONG to succeed.
log::warn!("PONG send failed, control channel full — skipping this cycle");
}
log::trace!("Received PING from hub, sent PONG");
}
@@ -580,6 +590,15 @@ fn apply_port_config(
accept_result = listener.accept() => {
match accept_result {
Ok((client_stream, client_addr)) => {
// TCP keepalive detects dead clients that disappear without FIN.
// Without this, zombie streams accumulate and never get cleaned up.
let _ = client_stream.set_nodelay(true);
let ka = socket2::TcpKeepalive::new()
.with_time(Duration::from_secs(60));
#[cfg(target_os = "linux")]
let ka = ka.with_interval(Duration::from_secs(60));
let _ = socket2::SockRef::from(&client_stream).set_tcp_keepalive(&ka);
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
@@ -665,7 +684,7 @@ async fn handle_client_connection(
// After writing to client TCP, send WINDOW_UPDATE to hub so it can send more
let hub_to_client_token = client_token.clone();
let wu_tx = tunnel_ctrl_tx.clone();
let hub_to_client = tokio::spawn(async move {
let mut hub_to_client = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop {
tokio::select! {
@@ -718,8 +737,15 @@ async fn handle_client_connection(
}
if client_token.is_cancelled() { break; }
// Limit read size to available window
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} upload: window still 0 after stall timeout, closing", stream_id);
break;
}
let max_read = w.min(buf.len());
tokio::select! {
@@ -741,18 +767,29 @@ async fn handle_client_connection(
}
}
// Send CLOSE frame via DATA channel (must arrive AFTER last DATA for this stream)
// Wait for the download task (hub → client) to finish BEFORE sending CLOSE.
// Upload EOF (client done sending) does NOT mean the response is done.
// For asymmetric transfers like git fetch (small request, large response),
// the response is still streaming when the upload finishes.
// Sending CLOSE before the response finishes would cause the hub to cancel
// the upstream reader mid-response, truncating the data.
let _ = tokio::time::timeout(
Duration::from_secs(300), // 5 min max wait for download to finish
&mut hub_to_client,
).await;
// NOW send CLOSE — the response has been fully delivered (or timed out).
if !client_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]);
let _ = tunnel_data_tx.try_send(close_frame);
let _ = tunnel_data_tx.send(close_frame).await;
}
// Cleanup
// Clean up
{
let mut writers = client_writers.lock().await;
writers.remove(&stream_id);
}
hub_to_client.abort();
hub_to_client.abort(); // No-op if already finished; safety net if timeout fired
let _ = edge_id; // used for logging context
}

View File

@@ -298,6 +298,8 @@ async fn handle_edge_connection(
edge_token: CancellationToken,
peer_addr: String,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
stream.set_nodelay(true)?;
let tls_stream = acceptor.accept(stream).await?;
let (read_half, mut write_half) = tokio::io::split(tls_stream);
let mut buf_reader = BufReader::new(read_half);
@@ -379,15 +381,17 @@ async fn handle_edge_connection(
let frame_writer_tx = ctrl_tx.clone();
let writer_token = edge_token.clone();
let writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop {
tokio::select! {
biased; // control frames always take priority over data
ctrl = ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() {
break;
}
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
@@ -395,9 +399,8 @@ async fn handle_edge_connection(
data = data_rx.recv() => {
match data {
Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() {
break;
}
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
@@ -520,6 +523,7 @@ async fn handle_edge_connection(
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) =
@@ -537,10 +541,16 @@ async fn handle_edge_connection(
match data {
Some(data) => {
let len = data.len() as u32;
match tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
).await {
// Check cancellation alongside the write so we respond
// promptly to FRAME_CLOSE instead of blocking up to 60s.
let write_result = tokio::select! {
r = tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
) => r,
_ = writer_token.cancelled() => break,
};
match write_result {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
@@ -591,8 +601,15 @@ async fn handle_edge_connection(
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
let max_read = w.min(buf.len());
tokio::select! {
@@ -615,10 +632,11 @@ async fn handle_edge_connection(
}
}
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK)
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
// Use send().await to guarantee delivery (try_send silently drops if full).
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.try_send(close_frame);
let _ = data_writer_tx.send(close_frame).await;
}
writer_for_edge_data.abort();
@@ -628,10 +646,11 @@ async fn handle_edge_connection(
if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK)
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
// Use send().await to guarantee delivery.
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.try_send(close_frame);
let _ = data_writer_tx.send(close_frame).await;
}
}
@@ -707,8 +726,9 @@ async fn handle_edge_connection(
_ = ping_ticker.tick() => {
let ping_frame = encode_frame(0, FRAME_PING, &[]);
if frame_writer_tx.try_send(ping_frame).is_err() {
log::warn!("Failed to send PING to edge {}, writer channel full/closed", edge_id);
break;
// Control channel full — skip this PING cycle.
// The 45s liveness timeout gives margin for the channel to drain.
log::warn!("PING send to edge {} failed, control channel full — skipping", edge_id);
}
log::trace!("Sent PING to edge {}", edge_id);
}

View File

@@ -32,6 +32,15 @@ pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> V
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
}
/// Compute the target per-stream window size based on the number of active streams.
/// Total memory budget is ~32MB shared across all streams. As more streams are active,
/// each gets a smaller window. This adapts to current demand — few streams get high
/// throughput, many streams save memory and reduce control frame pressure.
pub fn compute_window_for_stream_count(active: u32) -> u32 {
let per_stream = (32 * 1024 * 1024u64) / (active.max(1) as u64);
per_stream.clamp(64 * 1024, INITIAL_STREAM_WINDOW as u64) as u32
}
/// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
if payload.len() != 4 {

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@serve.zone/remoteingress',
version: '4.5.4',
version: '4.5.12',
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
}