fix(rust-core,protocol): eliminate edge stream registration races and reduce frame buffering copies

This commit is contained in:
2026-03-17 16:37:43 +00:00
parent e8d429f117
commit 156b17135f
8 changed files with 283 additions and 174 deletions

View File

@@ -9,6 +9,7 @@ use tokio::task::JoinHandle;
use tokio::time::{Instant, sleep_until};
use tokio_rustls::TlsConnector;
use tokio_util::sync::CancellationToken;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use remoteingress_protocol::*;
@@ -22,11 +23,11 @@ enum EdgeFrameAction {
Disconnect(String),
}
/// Per-stream state tracked in the edge's client_writers map.
/// Per-stream state tracked in the edge's stream map.
struct EdgeStreamState {
/// Unbounded channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
back_tx: mpsc::UnboundedSender<Vec<u8>>,
back_tx: mpsc::UnboundedSender<Bytes>,
/// Send window for FRAME_DATA (upload direction).
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
send_window: Arc<AtomicU32>,
@@ -34,6 +35,12 @@ struct EdgeStreamState {
window_notify: Arc<Notify>,
}
/// Registration message sent from per-stream tasks to the main I/O loop.
struct StreamRegistration {
stream_id: u32,
state: EdgeStreamState,
}
/// Edge configuration (hub-host + credentials only; ports come from hub).
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
@@ -284,39 +291,29 @@ enum EdgeLoopResult {
/// Process a single frame received from the hub side of the tunnel.
/// Handles FRAME_DATA_BACK, FRAME_WINDOW_UPDATE_BACK, FRAME_CLOSE_BACK, FRAME_CONFIG, FRAME_PING.
async fn handle_edge_frame(
/// No mutex — edge_streams is owned by the main I/O loop (same pattern as hub.rs).
fn handle_edge_frame(
frame: Frame,
tunnel_io: &mut remoteingress_protocol::TunnelIo<EdgeTlsStream>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
listen_ports: &Arc<RwLock<Vec<u16>>>,
event_tx: &mpsc::Sender<EdgeEvent>,
tunnel_writer_tx: &mpsc::Sender<Vec<u8>>,
tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>,
edge_id: &str,
connection_token: &CancellationToken,
bind_address: &str,
edge_streams: &mut HashMap<u32, EdgeStreamState>,
listen_ports_update: &mut Option<Vec<u16>>,
) -> EdgeFrameAction {
match frame.frame_type {
FRAME_DATA_BACK => {
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
// fails if the receiver is dropped (hub_to_client task already exited).
let mut writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if let Some(state) = edge_streams.get(&frame.stream_id) {
if state.back_tx.send(frame.payload).is_err() {
// Receiver dropped — hub_to_client task already exited, clean up
writers.remove(&frame.stream_id);
edge_streams.remove(&frame.stream_id);
}
}
}
FRAME_WINDOW_UPDATE_BACK => {
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if let Some(state) = edge_streams.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
@@ -327,28 +324,12 @@ async fn handle_edge_frame(
}
}
FRAME_CLOSE_BACK => {
let mut writers = client_writers.lock().await;
writers.remove(&frame.stream_id);
edge_streams.remove(&frame.stream_id);
}
FRAME_CONFIG => {
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
log::info!("Config update from hub: ports {:?}", update.listen_ports);
*listen_ports.write().await = update.listen_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: update.listen_ports.clone(),
});
apply_port_config(
&update.listen_ports,
port_listeners,
tunnel_writer_tx,
tunnel_data_tx,
client_writers,
active_streams,
next_stream_id,
edge_id,
connection_token,
bind_address,
);
*listen_ports_update = Some(update.listen_ports);
}
}
FRAME_PING => {
@@ -490,14 +471,17 @@ async fn connect_to_hub_and_run(
}
});
// Client socket map: stream_id -> per-stream state (back channel + flow control)
let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
Arc::new(Mutex::new(HashMap::new()));
// Stream map owned by the main I/O loop — no mutex, matching hub.rs pattern.
let mut edge_streams: HashMap<u32, EdgeStreamState> = HashMap::new();
// Channel for per-stream tasks to register their stream state with the main loop.
let (register_tx, mut register_rx) = mpsc::channel::<StreamRegistration>(256);
// Channel for per-stream tasks to deregister when done.
let (cleanup_tx, mut cleanup_rx) = mpsc::channel::<u32>(256);
// QoS dual-channel: ctrl frames have priority over data frames.
// Stream handlers send through these channels → TunnelIo drains them.
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Bytes>(256);
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Bytes>(4096);
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
// Start TCP listeners for initial ports
@@ -508,7 +492,8 @@ async fn connect_to_hub_and_run(
&mut port_listeners,
&tunnel_writer_tx,
&tunnel_data_tx,
&client_writers,
&register_tx,
&cleanup_tx,
active_streams,
next_stream_id,
&config.edge_id,
@@ -525,7 +510,18 @@ async fn connect_to_hub_and_run(
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
let result = 'io_loop: loop {
// Drain stream registrations from per-stream tasks (before poll_step so
// registrations are processed before OPEN frames are sent to the hub).
while let Ok(reg) = register_rx.try_recv() {
edge_streams.insert(reg.stream_id, reg.state);
}
// Drain stream cleanups from per-stream tasks
while let Ok(stream_id) = cleanup_rx.try_recv() {
edge_streams.remove(&stream_id);
}
// Drain any buffered frames
let mut listen_ports_update = None;
loop {
let frame = match tunnel_io.try_parse_frame() {
Some(Ok(f)) => f,
@@ -538,28 +534,55 @@ async fn connect_to_hub_and_run(
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await {
frame, &mut tunnel_io, &mut edge_streams, &mut listen_ports_update,
) {
break 'io_loop EdgeLoopResult::Reconnect(reason);
}
}
// Apply port config update if handle_edge_frame signalled one
if let Some(new_ports) = listen_ports_update.take() {
*listen_ports.write().await = new_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: new_ports.clone(),
});
apply_port_config(
&new_ports,
&mut port_listeners,
&tunnel_writer_tx,
&tunnel_data_tx,
&register_tx,
&cleanup_tx,
active_streams,
next_stream_id,
&config.edge_id,
connection_token,
bind_address,
);
}
// Poll I/O: write(ctrl→data), flush, read, channels, timers
let event = std::future::poll_fn(|cx| {
tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut liveness_deadline, connection_token)
}).await;
// Drain registrations/cleanups before processing the event — registrations
// may have arrived while poll_step was running (multiple poll cycles inside .await).
while let Ok(reg) = register_rx.try_recv() {
edge_streams.insert(reg.stream_id, reg.state);
}
while let Ok(stream_id) = cleanup_rx.try_recv() {
edge_streams.remove(&stream_id);
}
let mut listen_ports_update = None;
match event {
remoteingress_protocol::TunnelEvent::Frame(frame) => {
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await {
frame, &mut tunnel_io, &mut edge_streams, &mut listen_ports_update,
) {
break EdgeLoopResult::Reconnect(reason);
}
}
@@ -586,6 +609,27 @@ async fn connect_to_hub_and_run(
break EdgeLoopResult::Shutdown;
}
}
// Apply port config update if handle_edge_frame signalled one
if let Some(new_ports) = listen_ports_update.take() {
*listen_ports.write().await = new_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: new_ports.clone(),
});
apply_port_config(
&new_ports,
&mut port_listeners,
&tunnel_writer_tx,
&tunnel_data_tx,
&register_tx,
&cleanup_tx,
active_streams,
next_stream_id,
&config.edge_id,
connection_token,
bind_address,
);
}
};
// Cancel stream tokens FIRST so stream handlers exit immediately.
@@ -612,9 +656,10 @@ async fn connect_to_hub_and_run(
fn apply_port_config(
new_ports: &[u16],
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
tunnel_ctrl_tx: &mpsc::Sender<Vec<u8>>,
tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
tunnel_ctrl_tx: &mpsc::Sender<Bytes>,
tunnel_data_tx: &mpsc::Sender<Bytes>,
register_tx: &mpsc::Sender<StreamRegistration>,
cleanup_tx: &mpsc::Sender<u32>,
active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>,
edge_id: &str,
@@ -636,7 +681,8 @@ fn apply_port_config(
for &port in new_set.difference(&old_set) {
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone();
let register_tx = register_tx.clone();
let cleanup_tx = cleanup_tx.clone();
let active_streams = active_streams.clone();
let next_stream_id = next_stream_id.clone();
let edge_id = edge_id.to_string();
@@ -670,7 +716,8 @@ fn apply_port_config(
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone();
let register_tx = register_tx.clone();
let cleanup_tx = cleanup_tx.clone();
let active_streams = active_streams.clone();
let edge_id = edge_id.clone();
let client_token = port_token.child_token();
@@ -686,7 +733,8 @@ fn apply_port_config(
&edge_id,
tunnel_ctrl_tx,
tunnel_data_tx,
client_writers,
register_tx,
cleanup_tx,
client_token,
Arc::clone(&active_streams),
)
@@ -727,9 +775,10 @@ async fn handle_client_connection(
stream_id: u32,
dest_port: u16,
edge_id: &str,
tunnel_ctrl_tx: mpsc::Sender<Vec<u8>>,
tunnel_data_tx: mpsc::Sender<Vec<u8>>,
client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
tunnel_ctrl_tx: mpsc::Sender<Bytes>,
tunnel_data_tx: mpsc::Sender<Bytes>,
register_tx: mpsc::Sender<StreamRegistration>,
cleanup_tx: mpsc::Sender<u32>,
client_token: CancellationToken,
active_streams: Arc<AtomicU32>,
) {
@@ -739,6 +788,36 @@ async fn handle_client_connection(
// Determine edge IP (use 0.0.0.0 as placeholder — hub doesn't use it for routing)
let edge_ip = "0.0.0.0";
// Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
// bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
// streams due to channel overflow — backpressure slows streams, never kills them.
let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Bytes>();
// Adaptive initial window: scale with current stream count to keep total in-flight
// data within the 32MB budget. Prevents burst flooding when many streams open.
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new());
// Register with the main I/O loop BEFORE sending OPEN. The main loop drains
// register_rx before poll_step drains ctrl_rx, guaranteeing the stream is
// registered before the OPEN frame reaches the hub and DATA_BACK arrives.
let reg_ok = tokio::select! {
result = register_tx.send(StreamRegistration {
stream_id,
state: EdgeStreamState {
back_tx,
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
},
}) => result.is_ok(),
_ = client_token.cancelled() => false,
};
if !reg_ok {
return;
}
// Send OPEN frame with PROXY v1 header via control channel
let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port);
let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes());
@@ -747,29 +826,10 @@ async fn handle_client_connection(
_ = client_token.cancelled() => false,
};
if !send_ok {
let _ = cleanup_tx.try_send(stream_id);
return;
}
// Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
// bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
// streams due to channel overflow — backpressure slows streams, never kills them.
let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Vec<u8>>();
// Adaptive initial window: scale with current stream count to keep total in-flight
// data within the 32MB budget. Prevents burst flooding when many streams open.
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new());
{
let mut writers = client_writers.lock().await;
writers.insert(stream_id, EdgeStreamState {
back_tx,
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
}
let (mut client_read, mut client_write) = client_stream.into_split();
// Task: hub -> client (download direction)
@@ -853,20 +913,17 @@ async fn handle_client_connection(
}
if client_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
// Proactive QoS: clamp send_window to current adaptive target so existing
// streams converge immediately when concurrency increases (no drain cycle).
let adaptive_target = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
);
let w = remoteingress_protocol::clamp_send_window(&send_window, adaptive_target) as usize;
if w == 0 {
log::warn!("Stream {} upload: window still 0 after stall timeout, closing", stream_id);
break;
}
// Adaptive: cap read to current per-stream target window
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(32768).min(adaptive_cap);
let max_read = w.min(32768);
tokio::select! {
read_result = client_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
@@ -875,7 +932,7 @@ async fn handle_client_connection(
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
let data_frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
let data_frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
let sent = tokio::select! {
result = tunnel_data_tx.send(data_frame) => result.is_ok(),
_ = client_token.cancelled() => false,
@@ -910,11 +967,8 @@ async fn handle_client_connection(
}
}
// Clean up
{
let mut writers = client_writers.lock().await;
writers.remove(&stream_id);
}
// Clean up — notify main loop to remove stream state
let _ = cleanup_tx.try_send(stream_id);
hub_to_client.abort(); // No-op if already finished; safety net if timeout fired
let _ = edge_id; // used for logging context
}

View File

@@ -10,6 +10,7 @@ use tokio_rustls::TlsAcceptor;
use tokio_util::sync::CancellationToken;
use serde::{Deserialize, Serialize};
use bytes::Bytes;
use remoteingress_protocol::*;
type HubTlsStream = tokio_rustls::server::TlsStream<TcpStream>;
@@ -26,7 +27,7 @@ struct HubStreamState {
/// Unbounded channel to deliver FRAME_DATA payloads to the upstream writer task.
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
/// A bounded channel would kill streams instead of applying backpressure.
data_tx: mpsc::UnboundedSender<Vec<u8>>,
data_tx: mpsc::UnboundedSender<Bytes>,
/// Cancellation token for this stream.
cancel_token: CancellationToken,
/// Send window for FRAME_DATA_BACK (download direction).
@@ -307,8 +308,8 @@ async fn handle_hub_frame(
edge_stream_count: &Arc<AtomicU32>,
edge_id: &str,
event_tx: &mpsc::Sender<HubEvent>,
ctrl_tx: &mpsc::Sender<Vec<u8>>,
data_tx: &mpsc::Sender<Vec<u8>>,
ctrl_tx: &mpsc::Sender<Bytes>,
data_tx: &mpsc::Sender<Bytes>,
target_host: &str,
edge_token: &CancellationToken,
cleanup_tx: &mpsc::Sender<u32>,
@@ -346,7 +347,7 @@ async fn handle_hub_frame(
});
// Create channel for data from edge to this stream
let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Vec<u8>>();
let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Bytes>();
// Adaptive initial window: scale with current stream count
// to keep total in-flight data within the 32MB budget.
let initial_window = compute_window_for_stream_count(
@@ -478,20 +479,17 @@ async fn handle_hub_frame(
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
// Proactive QoS: clamp send_window to current adaptive target so existing
// streams converge immediately when concurrency increases (no drain cycle).
let adaptive_target = remoteingress_protocol::compute_window_for_stream_count(
stream_counter.load(Ordering::Relaxed),
);
let w = remoteingress_protocol::clamp_send_window(&send_window, adaptive_target) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
// Adaptive: cap read to current per-stream target window
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
stream_counter.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(32768).min(adaptive_cap);
let max_read = w.min(32768);
tokio::select! {
read_result = up_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
@@ -500,7 +498,7 @@ async fn handle_hub_frame(
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
let frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
let frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
let sent = tokio::select! {
result = data_writer_tx.send(frame) => result.is_ok(),
_ = stream_token.cancelled() => false,
@@ -711,8 +709,8 @@ async fn handle_edge_connection(
// QoS dual-channel: ctrl frames have priority over data frames.
// Stream handlers send through these channels -> TunnelIo drains them.
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Bytes>(256);
let (data_tx, mut data_rx) = mpsc::channel::<Bytes>(4096);
// Spawn task to forward config updates as FRAME_CONFIG frames
let config_writer_tx = ctrl_tx.clone();