Compare commits

...

16 Commits

Author SHA1 Message Date
51ab32f6c3 v4.5.7 2026-03-16 09:44:31 +00:00
ed52520d50 fix(remoteingress-core): improve tunnel reconnect and frame write efficiency 2026-03-16 09:44:31 +00:00
a08011d2da v4.5.6 2026-03-16 09:36:03 +00:00
679b247c8a fix(remoteingress-core): disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency 2026-03-16 09:36:03 +00:00
32f9845495 v4.5.5 2026-03-16 09:02:02 +00:00
c0e1daa0e4 fix(remoteingress-core): wait for hub-to-client draining before cleanup and reliably send close frames 2026-03-16 09:02:02 +00:00
fd511c8a5c v4.5.4 2026-03-15 21:06:44 +00:00
c490e35a8f fix(remoteingress-core): preserve stream close ordering and add flow-control stall timeouts 2026-03-15 21:06:44 +00:00
579e553da0 v4.5.3 2026-03-15 19:26:39 +00:00
a8ee0b33d7 fix(remoteingress-core): prioritize control frames over data in edge and hub tunnel writers 2026-03-15 19:26:39 +00:00
43e320a36d v4.5.2 2026-03-15 18:16:10 +00:00
6ac4b37532 fix(remoteingress-core): improve stream flow control retries and increase channel buffer capacity 2026-03-15 18:16:10 +00:00
f456b0ba4f v4.5.1 2026-03-15 17:52:45 +00:00
69530f73aa fix(protocol): increase per-stream flow control window and channel buffers to improve high-RTT throughput 2026-03-15 17:52:45 +00:00
207b4a5cec v4.5.0 2026-03-15 17:33:59 +00:00
761551596b feat(remoteingress-core): add per-stream flow control for edge and hub tunnel data transfer 2026-03-15 17:33:59 +00:00
6 changed files with 369 additions and 84 deletions

View File

@@ -1,5 +1,58 @@
# Changelog # Changelog
## 2026-03-16 - 4.5.7 - fix(remoteingress-core)
improve tunnel reconnect and frame write efficiency
- Reuse the TLS connector across edge reconnections to preserve session resumption state and reduce reconnect latency.
- Buffer hub and edge frame writes to coalesce small control and data frames into fewer TLS records and syscalls while still flushing each frame promptly.
## 2026-03-16 - 4.5.6 - fix(remoteingress-core)
disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency
- Enable TCP_NODELAY on the edge connection to the hub for faster PING/PONG and WINDOW_UPDATE delivery
- Apply TCP_NODELAY on accepted hub streams before TLS handling
- Enable TCP_NODELAY on SmartProxy upstream connections before sending the PROXY header
## 2026-03-16 - 4.5.5 - fix(remoteingress-core)
wait for hub-to-client draining before cleanup and reliably send close frames
- switch CLOSE frame delivery on the data channel from try_send to send().await to avoid dropping it when the channel is full
- delay stream cleanup until the hub-to-client task finishes or times out so large downstream responses continue after upload EOF
- add a bounded 5-minute wait for download draining to prevent premature termination of asymmetric transfers such as git fetch
## 2026-03-15 - 4.5.4 - fix(remoteingress-core)
preserve stream close ordering and add flow-control stall timeouts
- Send CLOSE and CLOSE_BACK frames on the data channel so they arrive after the final stream data frames.
- Log and abort stalled upload and download paths when flow-control windows stay empty for 120 seconds.
- Apply a 60-second timeout when writing buffered stream data to the upstream connection to prevent hung streams.
## 2026-03-15 - 4.5.3 - fix(remoteingress-core)
prioritize control frames over data in edge and hub tunnel writers
- Split tunnel/frame writers into separate control and data channels in edge and hub
- Use biased select loops so PING, PONG, WINDOW_UPDATE, OPEN, and CLOSE frames are sent before data frames
- Route stream data through dedicated data channels while keeping OPEN, CLOSE, and flow-control updates on control channels to prevent keepalive starvation under load
## 2026-03-15 - 4.5.2 - fix(remoteingress-core)
improve stream flow control retries and increase channel buffer capacity
- increase per-stream mpsc channel capacity from 128 to 256 on both edge and hub paths
- only reset accumulated window update bytes after a successful try_send to avoid dropping flow-control credits when the update channel is busy
## 2026-03-15 - 4.5.1 - fix(protocol)
increase per-stream flow control window and channel buffers to improve high-RTT throughput
- raise the initial stream window from 256 KB to 4 MB to allow more in-flight data per stream
- increase edge and hub mpsc channel capacities from 16 to 128 to better absorb throughput under flow control
## 2026-03-15 - 4.5.0 - feat(remoteingress-core)
add per-stream flow control for edge and hub tunnel data transfer
- introduce WINDOW_UPDATE frame types and protocol helpers for per-stream flow control
- track per-stream send windows on both edge and hub to limit reads based on available capacity
- send window updates after downstream writes to reduce channel pressure during large transfers
## 2026-03-15 - 4.4.1 - fix(remoteingress-core) ## 2026-03-15 - 4.4.1 - fix(remoteingress-core)
prevent stream data loss by applying backpressure and closing saturated channels prevent stream data loss by applying backpressure and closing saturated channels

View File

@@ -1,6 +1,6 @@
{ {
"name": "@serve.zone/remoteingress", "name": "@serve.zone/remoteingress",
"version": "4.4.1", "version": "4.5.7",
"private": false, "private": false,
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.", "description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",

View File

@@ -4,7 +4,7 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::net::{TcpListener, TcpStream}; use tokio::net::{TcpListener, TcpStream};
use tokio::sync::{mpsc, Mutex, RwLock}; use tokio::sync::{mpsc, Mutex, Notify, RwLock};
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio::time::{Instant, sleep_until}; use tokio::time::{Instant, sleep_until};
use tokio_rustls::TlsConnector; use tokio_rustls::TlsConnector;
@@ -13,6 +13,17 @@ use serde::{Deserialize, Serialize};
use remoteingress_protocol::*; use remoteingress_protocol::*;
/// Per-stream state tracked in the edge's client_writers map.
struct EdgeStreamState {
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
back_tx: mpsc::Sender<Vec<u8>>,
/// Send window for FRAME_DATA (upload direction).
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
send_window: Arc<AtomicU32>,
/// Notifier to wake the client reader when the window opens.
window_notify: Arc<Notify>,
}
/// Edge configuration (hub-host + credentials only; ports come from hub). /// Edge configuration (hub-host + credentials only; ports come from hub).
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@@ -183,6 +194,14 @@ async fn edge_main_loop(
let mut backoff_ms: u64 = 1000; let mut backoff_ms: u64 = 1000;
let max_backoff_ms: u64 = 30000; let max_backoff_ms: u64 = 30000;
// Build TLS config ONCE outside the reconnect loop — preserves session
// cache across reconnections for TLS session resumption (saves 1 RTT).
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
loop { loop {
// Create a per-connection child token // Create a per-connection child token
let connection_token = cancel_token.child_token(); let connection_token = cancel_token.child_token();
@@ -198,6 +217,7 @@ async fn edge_main_loop(
&listen_ports, &listen_ports,
&mut shutdown_rx, &mut shutdown_rx,
&connection_token, &connection_token,
&connector,
) )
.await; .await;
@@ -248,18 +268,16 @@ async fn connect_to_hub_and_run(
listen_ports: &Arc<RwLock<Vec<u16>>>, listen_ports: &Arc<RwLock<Vec<u16>>>,
shutdown_rx: &mut mpsc::Receiver<()>, shutdown_rx: &mut mpsc::Receiver<()>,
connection_token: &CancellationToken, connection_token: &CancellationToken,
connector: &TlsConnector,
) -> EdgeLoopResult { ) -> EdgeLoopResult {
// Build TLS connector that skips cert verification (auth is via secret)
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
let addr = format!("{}:{}", config.hub_host, config.hub_port); let addr = format!("{}:{}", config.hub_host, config.hub_port);
let tcp = match TcpStream::connect(&addr).await { let tcp = match TcpStream::connect(&addr).await {
Ok(s) => s, Ok(s) => {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
let _ = s.set_nodelay(true);
s
}
Err(e) => { Err(e) => {
log::error!("Failed to connect to hub at {}: {}", addr, e); log::error!("Failed to connect to hub at {}: {}", addr, e);
return EdgeLoopResult::Reconnect; return EdgeLoopResult::Reconnect;
@@ -351,22 +369,38 @@ async fn connect_to_hub_and_run(
} }
}); });
// Client socket map: stream_id -> sender for writing data back to client // Client socket map: stream_id -> per-stream state (back channel + flow control)
let client_writers: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>> = let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
Arc::new(Mutex::new(HashMap::new())); Arc::new(Mutex::new(HashMap::new()));
// A5: Channel-based tunnel writer replaces Arc<Mutex<WriteHalf>> // QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN)
let (tunnel_writer_tx, mut tunnel_writer_rx) = mpsc::channel::<Vec<u8>>(4096); // have priority over data frames (DATA). Prevents PING starvation under load.
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
let tw_token = connection_token.clone(); let tw_token = connection_token.clone();
let tunnel_writer_handle = tokio::spawn(async move { let tunnel_writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop { loop {
tokio::select! { tokio::select! {
data = tunnel_writer_rx.recv() => { biased; // control frames always take priority over data
ctrl = tunnel_ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
data = tunnel_data_rx.recv() => {
match data { match data {
Some(frame_data) => { Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() { if writer.write_all(&frame_data).await.is_err() { break; }
break; if writer.flush().await.is_err() { break; }
}
} }
None => break, None => break,
} }
@@ -382,6 +416,7 @@ async fn connect_to_hub_and_run(
&handshake.listen_ports, &handshake.listen_ports,
&mut port_listeners, &mut port_listeners,
&tunnel_writer_tx, &tunnel_writer_tx,
&tunnel_data_tx,
&client_writers, &client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -407,17 +442,31 @@ async fn connect_to_hub_and_run(
match frame.frame_type { match frame.frame_type {
FRAME_DATA_BACK => { FRAME_DATA_BACK => {
// Non-blocking send to prevent head-of-line blocking in the main dispatch loop. // Non-blocking dispatch to per-stream channel.
// If the per-stream channel is full, close the stream rather than silently // With flow control, the sender should rarely exceed the channel capacity.
// dropping data (which would corrupt the TCP stream).
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
if let Some(tx) = writers.get(&frame.stream_id) { if let Some(state) = writers.get(&frame.stream_id) {
if tx.try_send(frame.payload).is_err() { if state.back_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} back-channel full, closing stream to prevent data corruption", frame.stream_id); log::warn!("Stream {} back-channel full, closing stream", frame.stream_id);
writers.remove(&frame.stream_id); writers.remove(&frame.stream_id);
} }
} }
} }
FRAME_WINDOW_UPDATE_BACK => {
// Hub consumed data — increase our send window for this stream (upload direction)
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE_BACK => { FRAME_CLOSE_BACK => {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
writers.remove(&frame.stream_id); writers.remove(&frame.stream_id);
@@ -433,6 +482,7 @@ async fn connect_to_hub_and_run(
&update.listen_ports, &update.listen_ports,
&mut port_listeners, &mut port_listeners,
&tunnel_writer_tx, &tunnel_writer_tx,
&tunnel_data_tx,
&client_writers, &client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -494,8 +544,9 @@ async fn connect_to_hub_and_run(
fn apply_port_config( fn apply_port_config(
new_ports: &[u16], new_ports: &[u16],
port_listeners: &mut HashMap<u16, JoinHandle<()>>, port_listeners: &mut HashMap<u16, JoinHandle<()>>,
tunnel_writer_tx: &mpsc::Sender<Vec<u8>>, tunnel_ctrl_tx: &mpsc::Sender<Vec<u8>>,
client_writers: &Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>>, tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
active_streams: &Arc<AtomicU32>, active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>, next_stream_id: &Arc<AtomicU32>,
edge_id: &str, edge_id: &str,
@@ -514,7 +565,8 @@ fn apply_port_config(
// Add new ports // Add new ports
for &port in new_set.difference(&old_set) { for &port in new_set.difference(&old_set) {
let tunnel_writer_tx = tunnel_writer_tx.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let next_stream_id = next_stream_id.clone(); let next_stream_id = next_stream_id.clone();
@@ -537,7 +589,8 @@ fn apply_port_config(
match accept_result { match accept_result {
Ok((client_stream, client_addr)) => { Ok((client_stream, client_addr)) => {
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed); let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_writer_tx = tunnel_writer_tx.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let edge_id = edge_id.clone(); let edge_id = edge_id.clone();
@@ -552,7 +605,8 @@ fn apply_port_config(
stream_id, stream_id,
port, port,
&edge_id, &edge_id,
tunnel_writer_tx, tunnel_ctrl_tx,
tunnel_data_tx,
client_writers, client_writers,
client_token, client_token,
) )
@@ -582,8 +636,9 @@ async fn handle_client_connection(
stream_id: u32, stream_id: u32,
dest_port: u16, dest_port: u16,
edge_id: &str, edge_id: &str,
tunnel_writer_tx: mpsc::Sender<Vec<u8>>, tunnel_ctrl_tx: mpsc::Sender<Vec<u8>>,
client_writers: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>>, tunnel_data_tx: mpsc::Sender<Vec<u8>>,
client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
client_token: CancellationToken, client_token: CancellationToken,
) { ) {
let client_ip = client_addr.ip().to_string(); let client_ip = client_addr.ip().to_string();
@@ -592,33 +647,52 @@ async fn handle_client_connection(
// Determine edge IP (use 0.0.0.0 as placeholder — hub doesn't use it for routing) // Determine edge IP (use 0.0.0.0 as placeholder — hub doesn't use it for routing)
let edge_ip = "0.0.0.0"; let edge_ip = "0.0.0.0";
// Send OPEN frame with PROXY v1 header via writer channel // Send OPEN frame with PROXY v1 header via control channel
let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port); let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port);
let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes()); let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes());
if tunnel_writer_tx.send(open_frame).await.is_err() { if tunnel_ctrl_tx.send(open_frame).await.is_err() {
return; return;
} }
// Set up channel for data coming back from hub // Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256); let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
let window_notify = Arc::new(Notify::new());
{ {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
writers.insert(stream_id, back_tx); writers.insert(stream_id, EdgeStreamState {
back_tx,
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
} }
let (mut client_read, mut client_write) = client_stream.into_split(); let (mut client_read, mut client_write) = client_stream.into_split();
// Task: hub -> client // Task: hub -> client (download direction)
// After writing to client TCP, send WINDOW_UPDATE to hub so it can send more
let hub_to_client_token = client_token.clone(); let hub_to_client_token = client_token.clone();
let hub_to_client = tokio::spawn(async move { let wu_tx = tunnel_ctrl_tx.clone();
let mut hub_to_client = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop { loop {
tokio::select! { tokio::select! {
data = back_rx.recv() => { data = back_rx.recv() => {
match data { match data {
Some(data) => { Some(data) => {
let len = data.len() as u32;
if client_write.write_all(&data).await.is_err() { if client_write.write_all(&data).await.is_err() {
break; break;
} }
// Track consumption for flow control
consumed_since_update += len;
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
if wu_tx.try_send(frame).is_ok() {
consumed_since_update = 0;
}
// If try_send fails, keep accumulating — retry on next threshold
}
} }
None => break, None => break,
} }
@@ -626,23 +700,45 @@ async fn handle_client_connection(
_ = hub_to_client_token.cancelled() => break, _ = hub_to_client_token.cancelled() => break,
} }
} }
// Send final window update for any remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
let _ = wu_tx.try_send(frame);
}
let _ = client_write.shutdown().await; let _ = client_write.shutdown().await;
}); });
// Task: client -> hub (via writer channel) // Task: client -> hub (upload direction) with per-stream flow control
let mut buf = vec![0u8; 32768]; let mut buf = vec![0u8; 32768];
loop { loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = client_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
break;
}
}
}
if client_token.is_cancelled() { break; }
// Limit read size to available window
let w = send_window.load(Ordering::Acquire) as usize;
let max_read = w.min(buf.len());
tokio::select! { tokio::select! {
read_result = client_read.read(&mut buf) => { read_result = client_read.read(&mut buf[..max_read]) => {
match read_result { match read_result {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]); let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]);
// Use send().await for backpressure — this is a per-stream task so if tunnel_data_tx.send(data_frame).await.is_err() {
// blocking only stalls this stream, not others. Prevents data loss log::warn!("Stream {} data channel closed, closing", stream_id);
// for large transfers (e.g. 352MB Docker layers).
if tunnel_writer_tx.send(data_frame).await.is_err() {
log::warn!("Stream {} tunnel writer closed, closing", stream_id);
break; break;
} }
} }
@@ -653,18 +749,32 @@ async fn handle_client_connection(
} }
} }
// Send CLOSE frame (only if not cancelled) // Send CLOSE frame via DATA channel (must arrive AFTER last DATA for this stream).
// Use send().await to guarantee delivery (try_send silently drops if channel full).
if !client_token.is_cancelled() { if !client_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]);
let _ = tunnel_writer_tx.try_send(close_frame); let _ = tunnel_data_tx.send(close_frame).await;
} }
// Cleanup // Wait for the download task (hub → client) to finish draining all buffered
// response data. Upload EOF just means the client is done sending; the download
// must continue until all response data has been written to the client.
// This is critical for asymmetric transfers like git fetch (small request, large response).
// The download task will exit when:
// - back_rx returns None (back_tx dropped below after await, or hub sent CLOSE_BACK)
// - client_write fails (client disconnected)
// - client_token is cancelled
let _ = tokio::time::timeout(
Duration::from_secs(300), // 5 min max wait for download to finish
&mut hub_to_client,
).await;
// Now safe to clean up — download has finished or timed out
{ {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
writers.remove(&stream_id); writers.remove(&stream_id);
} }
hub_to_client.abort(); hub_to_client.abort(); // No-op if already finished; safety net if timeout fired
let _ = edge_id; // used for logging context let _ = edge_id; // used for logging context
} }

View File

@@ -1,9 +1,10 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::time::Duration; use std::time::Duration;
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::net::{TcpListener, TcpStream}; use tokio::net::{TcpListener, TcpStream};
use tokio::sync::{mpsc, Mutex, RwLock, Semaphore}; use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore};
use tokio::time::{interval, sleep_until, Instant}; use tokio::time::{interval, sleep_until, Instant};
use tokio_rustls::TlsAcceptor; use tokio_rustls::TlsAcceptor;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
@@ -11,6 +12,19 @@ use serde::{Deserialize, Serialize};
use remoteingress_protocol::*; use remoteingress_protocol::*;
/// Per-stream state tracked in the hub's stream map.
struct HubStreamState {
/// Channel to deliver FRAME_DATA payloads to the upstream writer task.
data_tx: mpsc::Sender<Vec<u8>>,
/// Cancellation token for this stream.
cancel_token: CancellationToken,
/// Send window for FRAME_DATA_BACK (download direction).
/// Decremented by the upstream reader, incremented by FRAME_WINDOW_UPDATE from edge.
send_window: Arc<AtomicU32>,
/// Notifier to wake the upstream reader when the window opens.
window_notify: Arc<Notify>,
}
/// Hub configuration. /// Hub configuration.
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@@ -109,7 +123,7 @@ pub struct TunnelHub {
struct ConnectedEdgeInfo { struct ConnectedEdgeInfo {
connected_at: u64, connected_at: u64,
peer_addr: String, peer_addr: String,
active_streams: Arc<Mutex<HashMap<u32, (mpsc::Sender<Vec<u8>>, CancellationToken)>>>, active_streams: Arc<Mutex<HashMap<u32, HubStreamState>>>,
config_tx: mpsc::Sender<EdgeConfigUpdate>, config_tx: mpsc::Sender<EdgeConfigUpdate>,
#[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed #[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed
cancel_token: CancellationToken, cancel_token: CancellationToken,
@@ -284,6 +298,8 @@ async fn handle_edge_connection(
edge_token: CancellationToken, edge_token: CancellationToken,
peer_addr: String, peer_addr: String,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
stream.set_nodelay(true)?;
let tls_stream = acceptor.accept(stream).await?; let tls_stream = acceptor.accept(stream).await?;
let (read_half, mut write_half) = tokio::io::split(tls_stream); let (read_half, mut write_half) = tokio::io::split(tls_stream);
let mut buf_reader = BufReader::new(read_half); let mut buf_reader = BufReader::new(read_half);
@@ -333,7 +349,7 @@ async fn handle_edge_connection(
write_half.write_all(handshake_json.as_bytes()).await?; write_half.write_all(handshake_json.as_bytes()).await?;
// Track this edge // Track this edge
let streams: Arc<Mutex<HashMap<u32, (mpsc::Sender<Vec<u8>>, CancellationToken)>>> = let streams: Arc<Mutex<HashMap<u32, HubStreamState>>> =
Arc::new(Mutex::new(HashMap::new())); Arc::new(Mutex::new(HashMap::new()));
let now = std::time::SystemTime::now() let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
@@ -357,19 +373,34 @@ async fn handle_edge_connection(
); );
} }
// A5: Channel-based writer replaces Arc<Mutex<WriteHalf>> // QoS dual-channel tunnel writer: control frames (PING/PONG/WINDOW_UPDATE/CLOSE)
// All frame writes go through this channel → dedicated writer task serializes them // have priority over data frames (DATA_BACK). This prevents PING starvation under load.
let (frame_writer_tx, mut frame_writer_rx) = mpsc::channel::<Vec<u8>>(4096); let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
// Legacy alias for code that sends both control and data (will be migrated)
let frame_writer_tx = ctrl_tx.clone();
let writer_token = edge_token.clone(); let writer_token = edge_token.clone();
let writer_handle = tokio::spawn(async move { let writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop { loop {
tokio::select! { tokio::select! {
data = frame_writer_rx.recv() => { biased; // control frames always take priority over data
ctrl = ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
data = data_rx.recv() => {
match data { match data {
Some(frame_data) => { Some(frame_data) => {
if write_half.write_all(&frame_data).await.is_err() { if writer.write_all(&frame_data).await.is_err() { break; }
break; if writer.flush().await.is_err() { break; }
}
} }
None => break, None => break,
} }
@@ -453,7 +484,8 @@ async fn handle_edge_connection(
let edge_id_clone = edge_id.clone(); let edge_id_clone = edge_id.clone();
let event_tx_clone = event_tx.clone(); let event_tx_clone = event_tx.clone();
let streams_clone = streams.clone(); let streams_clone = streams.clone();
let writer_tx = frame_writer_tx.clone(); let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let target = target_host.clone(); let target = target_host.clone();
let stream_token = edge_token.child_token(); let stream_token = edge_token.child_token();
@@ -462,11 +494,18 @@ async fn handle_edge_connection(
stream_id, stream_id,
}); });
// Create channel for data from edge to this stream // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256); let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
let window_notify = Arc::new(Notify::new());
{ {
let mut s = streams.lock().await; let mut s = streams.lock().await;
s.insert(stream_id, (data_tx, stream_token.clone())); s.insert(stream_id, HubStreamState {
data_tx,
cancel_token: stream_token.clone(),
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
} }
// Spawn task: connect to SmartProxy, send PROXY header, pipe data // Spawn task: connect to SmartProxy, send PROXY header, pipe data
@@ -484,21 +523,43 @@ async fn handle_edge_connection(
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into() format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??; })??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?; upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) = let (mut up_read, mut up_write) =
upstream.into_split(); upstream.into_split();
// Forward data from edge (via channel) to SmartProxy // Forward data from edge (via channel) to SmartProxy
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
let writer_token = stream_token.clone(); let writer_token = stream_token.clone();
let wub_tx = writer_tx.clone();
let writer_for_edge_data = tokio::spawn(async move { let writer_for_edge_data = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop { loop {
tokio::select! { tokio::select! {
data = data_rx.recv() => { data = data_rx.recv() => {
match data { match data {
Some(data) => { Some(data) => {
if up_write.write_all(&data).await.is_err() { let len = data.len() as u32;
break; match tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
).await {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
break;
}
}
// Track consumption for flow control
consumed_since_update += len;
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
if wub_tx.try_send(frame).is_ok() {
consumed_since_update = 0;
}
// If try_send fails, keep accumulating — retry on next threshold
} }
} }
None => break, None => break,
@@ -507,24 +568,47 @@ async fn handle_edge_connection(
_ = writer_token.cancelled() => break, _ = writer_token.cancelled() => break,
} }
} }
// Send final window update for remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
let _ = wub_tx.try_send(frame);
}
let _ = up_write.shutdown().await; let _ = up_write.shutdown().await;
}); });
// Forward data from SmartProxy back to edge via writer channel // Forward data from SmartProxy back to edge via writer channel
// with per-stream flow control (check send_window before reading)
let mut buf = vec![0u8; 32768]; let mut buf = vec![0u8; 32768];
loop { loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = stream_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
break;
}
}
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window
let w = send_window.load(Ordering::Acquire) as usize;
let max_read = w.min(buf.len());
tokio::select! { tokio::select! {
read_result = up_read.read(&mut buf) => { read_result = up_read.read(&mut buf[..max_read]) => {
match read_result { match read_result {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let frame = let frame =
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]); encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
// Use send().await for backpressure — this is a per-stream task so if data_writer_tx.send(frame).await.is_err() {
// blocking only stalls this stream, not others. Prevents data loss log::warn!("Stream {} data channel closed, closing", stream_id);
// for large transfers (e.g. 352MB Docker layers).
if writer_tx.send(frame).await.is_err() {
log::warn!("Stream {} writer channel closed, closing", stream_id);
break; break;
} }
} }
@@ -535,10 +619,10 @@ async fn handle_edge_connection(
} }
} }
// Send CLOSE_BACK to edge (only if not cancelled) // Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK)
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = writer_tx.try_send(close_frame); let _ = data_writer_tx.try_send(close_frame);
} }
writer_for_edge_data.abort(); writer_for_edge_data.abort();
@@ -548,10 +632,10 @@ async fn handle_edge_connection(
if let Err(e) = result { if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e); log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK on error (only if not cancelled) // Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK)
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = writer_tx.try_send(close_frame); let _ = data_writer_tx.try_send(close_frame);
} }
} }
@@ -569,23 +653,37 @@ async fn handle_edge_connection(
}); });
} }
FRAME_DATA => { FRAME_DATA => {
// Non-blocking send to prevent head-of-line blocking in the main dispatch loop. // Non-blocking dispatch to per-stream channel.
// If the per-stream channel is full, close the stream rather than silently // With flow control, the sender should rarely exceed the channel capacity.
// dropping data (which would corrupt the TCP stream).
let mut s = streams.lock().await; let mut s = streams.lock().await;
if let Some((tx, _)) = s.get(&frame.stream_id) { if let Some(state) = s.get(&frame.stream_id) {
if tx.try_send(frame.payload).is_err() { if state.data_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} data channel full, closing stream to prevent data corruption", frame.stream_id); log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
if let Some((_, token)) = s.remove(&frame.stream_id) { if let Some(state) = s.remove(&frame.stream_id) {
token.cancel(); state.cancel_token.cancel();
}
}
}
}
FRAME_WINDOW_UPDATE => {
// Edge consumed data — increase our send window for this stream
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
} }
} }
} }
} }
FRAME_CLOSE => { FRAME_CLOSE => {
let mut s = streams.lock().await; let mut s = streams.lock().await;
if let Some((_, token)) = s.remove(&frame.stream_id) { if let Some(state) = s.remove(&frame.stream_id) {
token.cancel(); state.cancel_token.cancel();
let _ = event_tx.try_send(HubEvent::StreamClosed { let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.clone(), edge_id: edge_id.clone(),
stream_id: frame.stream_id, stream_id: frame.stream_id,

View File

@@ -9,6 +9,8 @@ pub const FRAME_CLOSE_BACK: u8 = 0x05;
pub const FRAME_CONFIG: u8 = 0x06; // Hub -> Edge: configuration update pub const FRAME_CONFIG: u8 = 0x06; // Hub -> Edge: configuration update
pub const FRAME_PING: u8 = 0x07; // Hub -> Edge: heartbeat probe pub const FRAME_PING: u8 = 0x07; // Hub -> Edge: heartbeat probe
pub const FRAME_PONG: u8 = 0x08; // Edge -> Hub: heartbeat response pub const FRAME_PONG: u8 = 0x08; // Edge -> Hub: heartbeat response
pub const FRAME_WINDOW_UPDATE: u8 = 0x09; // Edge -> Hub: per-stream flow control
pub const FRAME_WINDOW_UPDATE_BACK: u8 = 0x0A; // Hub -> Edge: per-stream flow control
// Frame header size: 4 (stream_id) + 1 (type) + 4 (length) = 9 bytes // Frame header size: 4 (stream_id) + 1 (type) + 4 (length) = 9 bytes
pub const FRAME_HEADER_SIZE: usize = 9; pub const FRAME_HEADER_SIZE: usize = 9;
@@ -16,6 +18,28 @@ pub const FRAME_HEADER_SIZE: usize = 9;
// Maximum payload size (16 MB) // Maximum payload size (16 MB)
pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024; pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024;
// Per-stream flow control constants
/// Initial per-stream window size (4 MB). Sized for full throughput at high RTT:
/// at 100ms RTT, this sustains ~40 MB/s per stream.
pub const INITIAL_STREAM_WINDOW: u32 = 4 * 1024 * 1024;
/// Send WINDOW_UPDATE after consuming this many bytes (half the initial window).
pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
/// Maximum window size to prevent overflow.
pub const MAX_WINDOW_SIZE: u32 = 16 * 1024 * 1024;
/// Encode a WINDOW_UPDATE frame for a specific stream.
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Vec<u8> {
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
}
/// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
if payload.len() != 4 {
return None;
}
Some(u32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]))
}
/// A single multiplexed frame. /// A single multiplexed frame.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Frame { pub struct Frame {

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@serve.zone/remoteingress', name: '@serve.zone/remoteingress',
version: '4.4.1', version: '4.5.7',
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.' description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
} }