fix(protocol,edge): optimize tunnel frame handling and zero-copy uploads in edge I/O

This commit is contained in:
2026-03-17 11:15:18 +00:00
parent 1979910f6f
commit 2e5ceeaf5c
5 changed files with 555 additions and 732 deletions

View File

@@ -13,6 +13,15 @@ use serde::{Deserialize, Serialize};
use remoteingress_protocol::*;
type EdgeTlsStream = tokio_rustls::client::TlsStream<TcpStream>;
/// Result of processing a frame (shared with hub.rs pattern).
#[allow(dead_code)]
enum EdgeFrameAction {
Continue,
Disconnect(String),
}
/// Per-stream state tracked in the edge's client_writers map.
struct EdgeStreamState {
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
@@ -272,6 +281,83 @@ enum EdgeLoopResult {
Reconnect(String), // reason for disconnection
}
/// Process a single frame received from the hub side of the tunnel.
/// Handles FRAME_DATA_BACK, FRAME_WINDOW_UPDATE_BACK, FRAME_CLOSE_BACK, FRAME_CONFIG, FRAME_PING.
async fn handle_edge_frame(
frame: Frame,
tunnel_io: &mut remoteingress_protocol::TunnelIo<EdgeTlsStream>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
listen_ports: &Arc<RwLock<Vec<u16>>>,
event_tx: &mpsc::Sender<EdgeEvent>,
tunnel_writer_tx: &mpsc::Sender<Vec<u8>>,
tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>,
edge_id: &str,
connection_token: &CancellationToken,
bind_address: &str,
) -> EdgeFrameAction {
match frame.frame_type {
FRAME_DATA_BACK => {
let mut writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if state.back_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
writers.remove(&frame.stream_id);
}
}
}
FRAME_WINDOW_UPDATE_BACK => {
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE_BACK => {
let mut writers = client_writers.lock().await;
writers.remove(&frame.stream_id);
}
FRAME_CONFIG => {
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
log::info!("Config update from hub: ports {:?}", update.listen_ports);
*listen_ports.write().await = update.listen_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: update.listen_ports.clone(),
});
apply_port_config(
&update.listen_ports,
port_listeners,
tunnel_writer_tx,
tunnel_data_tx,
client_writers,
active_streams,
next_stream_id,
edge_id,
connection_token,
bind_address,
);
}
}
FRAME_PING => {
// Queue PONG directly — no channel round-trip, guaranteed delivery
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
}
_ => {
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
}
}
EdgeFrameAction::Continue
}
async fn connect_to_hub_and_run(
config: &EdgeConfig,
connected: &Arc<RwLock<bool>>,
@@ -436,73 +522,22 @@ async fn connect_to_hub_and_run(
let result = 'io_loop: loop {
// Drain any buffered frames
loop {
match tunnel_io.try_parse_frame() {
Some(Ok(frame)) => {
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type {
FRAME_DATA_BACK => {
let mut writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if state.back_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
writers.remove(&frame.stream_id);
}
}
}
FRAME_WINDOW_UPDATE_BACK => {
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE_BACK => {
let mut writers = client_writers.lock().await;
writers.remove(&frame.stream_id);
}
FRAME_CONFIG => {
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
log::info!("Config update from hub: ports {:?}", update.listen_ports);
*listen_ports.write().await = update.listen_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: update.listen_ports.clone(),
});
apply_port_config(
&update.listen_ports,
&mut port_listeners,
&tunnel_writer_tx,
&tunnel_data_tx,
&client_writers,
active_streams,
next_stream_id,
&config.edge_id,
connection_token,
bind_address,
);
}
}
FRAME_PING => {
// Queue PONG directly — no channel round-trip, guaranteed delivery
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
}
_ => {
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
}
}
}
let frame = match tunnel_io.try_parse_frame() {
Some(Ok(f)) => f,
Some(Err(e)) => {
log::error!("Hub frame error: {}", e);
break 'io_loop EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
}
None => break,
};
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await {
break 'io_loop EdgeLoopResult::Reconnect(reason);
}
}
@@ -515,61 +550,12 @@ async fn connect_to_hub_and_run(
remoteingress_protocol::TunnelEvent::Frame(frame) => {
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type {
FRAME_DATA_BACK => {
let mut writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if state.back_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
writers.remove(&frame.stream_id);
}
}
}
FRAME_WINDOW_UPDATE_BACK => {
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE_BACK => {
let mut writers = client_writers.lock().await;
writers.remove(&frame.stream_id);
}
FRAME_CONFIG => {
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
log::info!("Config update from hub: ports {:?}", update.listen_ports);
*listen_ports.write().await = update.listen_ports.clone();
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
listen_ports: update.listen_ports.clone(),
});
apply_port_config(
&update.listen_ports,
&mut port_listeners,
&tunnel_writer_tx,
&tunnel_data_tx,
&client_writers,
active_streams,
next_stream_id,
&config.edge_id,
connection_token,
bind_address,
);
}
}
FRAME_PING => {
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
}
_ => {
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
}
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await {
break EdgeLoopResult::Reconnect(reason);
}
}
remoteingress_protocol::TunnelEvent::Eof => {
@@ -813,15 +799,21 @@ async fn handle_client_connection(
let _ = client_write.shutdown().await;
});
// Task: client -> hub (upload direction) with per-stream flow control
let mut buf = vec![0u8; 32768];
// Task: client -> hub (upload direction) with per-stream flow control.
// Zero-copy: read payload directly after the header, then prepend header.
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
loop {
// Wait for send window to have capacity (with stall timeout)
// Wait for send window to have capacity (with stall timeout).
// Safe pattern: register notified BEFORE checking the condition
// to avoid missing a notify_one that fires between load and select.
loop {
let notified = window_notify.notified();
tokio::pin!(notified);
notified.as_mut().enable();
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = notified => continue,
_ = client_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
@@ -844,15 +836,16 @@ async fn handle_client_connection(
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
active_streams.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(buf.len()).min(adaptive_cap);
let max_read = w.min(32768).min(adaptive_cap);
tokio::select! {
read_result = client_read.read(&mut buf[..max_read]) => {
read_result = client_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
match read_result {
Ok(0) => break,
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]);
encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
let data_frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
if tunnel_data_tx.send(data_frame).await.is_err() {
log::warn!("Stream {} data channel closed, closing", stream_id);
break;

View File

@@ -12,6 +12,19 @@ use serde::{Deserialize, Serialize};
use remoteingress_protocol::*;
type HubTlsStream = tokio_rustls::server::TlsStream<TcpStream>;
/// Per-stream data channel capacity. With 4MB window and 32KB frames,
/// at most ~128 frames are in-flight. 256 provides comfortable headroom.
const PER_STREAM_DATA_CAPACITY: usize = 256;
/// Result of processing a frame.
#[allow(dead_code)]
enum FrameAction {
Continue,
Disconnect(String),
}
/// Per-stream state tracked in the hub's stream map.
struct HubStreamState {
/// Channel to deliver FRAME_DATA payloads to the upstream writer task.
@@ -123,7 +136,7 @@ pub struct TunnelHub {
struct ConnectedEdgeInfo {
connected_at: u64,
peer_addr: String,
active_streams: Arc<Mutex<HashMap<u32, HubStreamState>>>,
edge_stream_count: Arc<AtomicU32>,
config_tx: mpsc::Sender<EdgeConfigUpdate>,
#[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed
cancel_token: CancellationToken,
@@ -189,11 +202,10 @@ impl TunnelHub {
let mut connected = Vec::new();
for (id, info) in edges.iter() {
let streams = info.active_streams.lock().await;
connected.push(ConnectedEdgeStatus {
edge_id: id.clone(),
connected_at: info.connected_at,
active_streams: streams.len(),
active_streams: info.edge_stream_count.load(Ordering::Relaxed) as usize,
peer_addr: info.peer_addr.clone(),
});
}
@@ -287,6 +299,285 @@ impl Drop for TunnelHub {
/// Maximum concurrent streams per edge connection.
const MAX_STREAMS_PER_EDGE: usize = 1024;
/// Process a single frame received from the edge side of the tunnel.
/// Handles FRAME_OPEN, FRAME_DATA, FRAME_WINDOW_UPDATE, FRAME_CLOSE, and FRAME_PONG.
async fn handle_hub_frame(
frame: Frame,
tunnel_io: &mut remoteingress_protocol::TunnelIo<HubTlsStream>,
streams: &mut HashMap<u32, HubStreamState>,
stream_semaphore: &Arc<Semaphore>,
edge_stream_count: &Arc<AtomicU32>,
edge_id: &str,
event_tx: &mpsc::Sender<HubEvent>,
ctrl_tx: &mpsc::Sender<Vec<u8>>,
data_tx: &mpsc::Sender<Vec<u8>>,
target_host: &str,
edge_token: &CancellationToken,
cleanup_tx: &mpsc::Sender<u32>,
) -> FrameAction {
match frame.frame_type {
FRAME_OPEN => {
// A4: Check stream limit before processing
let permit = match stream_semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
tunnel_io.queue_ctrl(close_frame);
return FrameAction::Continue;
}
};
// Payload is PROXY v1 header line
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
// Parse destination port from PROXY header
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
let stream_id = frame.stream_id;
let cleanup = cleanup_tx.clone();
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let target = target_host.to_string();
let stream_token = edge_token.child_token();
let _ = event_tx.try_send(HubEvent::StreamOpened {
edge_id: edge_id.to_string(),
stream_id,
});
// Create channel for data from edge to this stream
let (stream_data_tx, mut stream_data_rx) = mpsc::channel::<Vec<u8>>(PER_STREAM_DATA_CAPACITY);
// Adaptive initial window: scale with current stream count
// to keep total in-flight data within the 32MB budget.
let initial_window = compute_window_for_stream_count(
edge_stream_count.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new());
streams.insert(stream_id, HubStreamState {
data_tx: stream_data_tx,
cancel_token: stream_token.clone(),
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
let stream_counter = Arc::clone(edge_stream_count);
tokio::spawn(async move {
let _permit = permit; // hold semaphore permit until stream completes
stream_counter.fetch_add(1, Ordering::Relaxed);
let result = async {
// A2: Connect to SmartProxy with timeout
let mut upstream = tokio::time::timeout(
Duration::from_secs(10),
TcpStream::connect((target.as_str(), dest_port)),
)
.await
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) =
upstream.into_split();
// Forward data from edge (via channel) to SmartProxy
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
let writer_token = stream_token.clone();
let wub_tx = writer_tx.clone();
let stream_counter_w = Arc::clone(&stream_counter);
let writer_for_edge_data = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop {
tokio::select! {
data = stream_data_rx.recv() => {
match data {
Some(data) => {
let len = data.len() as u32;
// Check cancellation alongside the write so we respond
// promptly to FRAME_CLOSE instead of blocking up to 60s.
let write_result = tokio::select! {
r = tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
) => r,
_ = writer_token.cancelled() => break,
};
match write_result {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
break;
}
}
// Track consumption for adaptive flow control.
// Increment capped to adaptive window to limit per-stream in-flight data.
consumed_since_update += len;
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
stream_counter_w.load(Ordering::Relaxed),
);
let threshold = adaptive_window / 2;
if consumed_since_update >= threshold {
let increment = consumed_since_update.min(adaptive_window);
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
if wub_tx.try_send(frame).is_ok() {
consumed_since_update -= increment;
}
// If try_send fails, keep accumulating — retry on next threshold
}
}
None => break,
}
}
_ = writer_token.cancelled() => break,
}
}
// Send final window update for remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
let _ = wub_tx.try_send(frame);
}
let _ = up_write.shutdown().await;
});
// Forward data from SmartProxy back to edge via writer channel
// with per-stream flow control (check send_window before reading).
// Zero-copy: read payload directly after the header, then prepend header.
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
loop {
// Wait for send window to have capacity (with stall timeout).
// Safe pattern: register notified BEFORE checking the condition
// to avoid missing a notify_one that fires between load and select.
loop {
let notified = window_notify.notified();
tokio::pin!(notified);
notified.as_mut().enable();
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = notified => continue,
_ = stream_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
break;
}
}
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
// Adaptive: cap read to current per-stream target window
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
stream_counter.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(32768).min(adaptive_cap);
tokio::select! {
read_result = up_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
match read_result {
Ok(0) => break,
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
let frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
if data_writer_tx.send(frame).await.is_err() {
log::warn!("Stream {} data channel closed, closing", stream_id);
break;
}
}
Err(_) => break,
}
}
_ = stream_token.cancelled() => break,
}
}
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
// Use send().await to guarantee delivery (try_send silently drops if full).
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
writer_for_edge_data.abort();
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
}
.await;
if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
// Use send().await to guarantee delivery.
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
}
// Signal main loop to remove stream from the map
let _ = cleanup.send(stream_id).await;
stream_counter.fetch_sub(1, Ordering::Relaxed);
});
}
FRAME_DATA => {
// Non-blocking dispatch to per-stream channel.
// With flow control, the sender should rarely exceed the channel capacity.
if let Some(state) = streams.get(&frame.stream_id) {
if state.data_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
if let Some(state) = streams.remove(&frame.stream_id) {
state.cancel_token.cancel();
}
}
}
}
FRAME_WINDOW_UPDATE => {
// Edge consumed data — increase our send window for this stream
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
if let Some(state) = streams.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE => {
if let Some(state) = streams.remove(&frame.stream_id) {
state.cancel_token.cancel();
let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.to_string(),
stream_id: frame.stream_id,
});
}
}
FRAME_PONG => {
log::debug!("Received PONG from edge {}", edge_id);
}
_ => {
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
}
}
FrameAction::Continue
}
/// Handle a single edge connection: authenticate, then enter frame loop.
async fn handle_edge_connection(
stream: TcpStream,
@@ -368,8 +659,11 @@ async fn handle_edge_connection(
tls_stream.flush().await?;
// Track this edge
let streams: Arc<Mutex<HashMap<u32, HubStreamState>>> =
Arc::new(Mutex::new(HashMap::new()));
let mut streams: HashMap<u32, HubStreamState> = HashMap::new();
// Per-edge active stream counter for adaptive flow control
let edge_stream_count = Arc::new(AtomicU32::new(0));
// Cleanup channel: spawned stream tasks send stream_id here when done
let (cleanup_tx, mut cleanup_rx) = mpsc::channel::<u32>(256);
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
@@ -385,16 +679,13 @@ async fn handle_edge_connection(
ConnectedEdgeInfo {
connected_at: now,
peer_addr,
active_streams: streams.clone(),
edge_stream_count: edge_stream_count.clone(),
config_tx,
cancel_token: edge_token.clone(),
},
);
}
// Per-edge active stream counter for adaptive flow control
let edge_stream_count = Arc::new(AtomicU32::new(0));
// QoS dual-channel: ctrl frames have priority over data frames.
// Stream handlers send through these channels -> TunnelIo drains them.
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
@@ -444,292 +735,36 @@ async fn handle_edge_connection(
let mut disconnect_reason = "unknown".to_string();
'hub_loop: loop {
// Drain completed stream cleanups from spawned tasks
while let Ok(stream_id) = cleanup_rx.try_recv() {
if streams.remove(&stream_id).is_some() {
let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.clone(),
stream_id,
});
}
}
// Drain any buffered frames
loop {
match tunnel_io.try_parse_frame() {
Some(Ok(frame)) => {
// Reset liveness on any received frame
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type {
FRAME_OPEN => {
// A4: Check stream limit before processing
let permit = match stream_semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
tunnel_io.queue_ctrl(close_frame);
continue;
}
};
// Payload is PROXY v1 header line
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
// Parse destination port from PROXY header
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
let stream_id = frame.stream_id;
let edge_id_clone = edge_id.clone();
let event_tx_clone = event_tx.clone();
let streams_clone = streams.clone();
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let target = target_host.clone();
let stream_token = edge_token.child_token();
let _ = event_tx.try_send(HubEvent::StreamOpened {
edge_id: edge_id.clone(),
stream_id,
});
// Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(1024);
// Adaptive initial window: scale with current stream count
// to keep total in-flight data within the 32MB budget.
let initial_window = compute_window_for_stream_count(
edge_stream_count.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new());
{
let mut s = streams.lock().await;
s.insert(stream_id, HubStreamState {
data_tx,
cancel_token: stream_token.clone(),
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
}
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
let stream_counter = Arc::clone(&edge_stream_count);
tokio::spawn(async move {
let _permit = permit; // hold semaphore permit until stream completes
stream_counter.fetch_add(1, Ordering::Relaxed);
let result = async {
// A2: Connect to SmartProxy with timeout
let mut upstream = tokio::time::timeout(
Duration::from_secs(10),
TcpStream::connect((target.as_str(), dest_port)),
)
.await
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) =
upstream.into_split();
// Forward data from edge (via channel) to SmartProxy
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
let writer_token = stream_token.clone();
let wub_tx = writer_tx.clone();
let stream_counter_w = Arc::clone(&stream_counter);
let writer_for_edge_data = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop {
tokio::select! {
data = data_rx.recv() => {
match data {
Some(data) => {
let len = data.len() as u32;
// Check cancellation alongside the write so we respond
// promptly to FRAME_CLOSE instead of blocking up to 60s.
let write_result = tokio::select! {
r = tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
) => r,
_ = writer_token.cancelled() => break,
};
match write_result {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
break;
}
}
// Track consumption for adaptive flow control.
// Increment capped to adaptive window to limit per-stream in-flight data.
consumed_since_update += len;
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
stream_counter_w.load(Ordering::Relaxed),
);
let threshold = adaptive_window / 2;
if consumed_since_update >= threshold {
let increment = consumed_since_update.min(adaptive_window);
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
if wub_tx.try_send(frame).is_ok() {
consumed_since_update -= increment;
}
// If try_send fails, keep accumulating — retry on next threshold
}
}
None => break,
}
}
_ = writer_token.cancelled() => break,
}
}
// Send final window update for remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
let _ = wub_tx.try_send(frame);
}
let _ = up_write.shutdown().await;
});
// Forward data from SmartProxy back to edge via writer channel
// with per-stream flow control (check send_window before reading)
let mut buf = vec![0u8; 32768];
loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = stream_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
break;
}
}
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
// Adaptive: cap read to current per-stream target window
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
stream_counter.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(buf.len()).min(adaptive_cap);
tokio::select! {
read_result = up_read.read(&mut buf[..max_read]) => {
match read_result {
Ok(0) => break,
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let frame =
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
if data_writer_tx.send(frame).await.is_err() {
log::warn!("Stream {} data channel closed, closing", stream_id);
break;
}
}
Err(_) => break,
}
}
_ = stream_token.cancelled() => break,
}
}
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
// Use send().await to guarantee delivery (try_send silently drops if full).
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
writer_for_edge_data.abort();
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
}
.await;
if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
// Use send().await to guarantee delivery.
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
}
// Clean up stream (guard against duplicate if FRAME_CLOSE already removed it)
let was_present = {
let mut s = streams_clone.lock().await;
s.remove(&stream_id).is_some()
};
if was_present {
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
edge_id: edge_id_clone,
stream_id,
});
}
stream_counter.fetch_sub(1, Ordering::Relaxed);
});
}
FRAME_DATA => {
// Non-blocking dispatch to per-stream channel.
// With flow control, the sender should rarely exceed the channel capacity.
let mut s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
if state.data_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
}
}
}
}
FRAME_WINDOW_UPDATE => {
// Edge consumed data — increase our send window for this stream
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE => {
let mut s = streams.lock().await;
if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.clone(),
stream_id: frame.stream_id,
});
}
}
FRAME_PONG => {
log::debug!("Received PONG from edge {}", edge_id);
}
_ => {
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
}
}
}
let frame = match tunnel_io.try_parse_frame() {
Some(Ok(f)) => f,
Some(Err(e)) => {
log::error!("Edge {} frame error: {}", edge_id, e);
disconnect_reason = format!("edge_frame_error: {}", e);
break 'hub_loop;
}
None => break,
};
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let FrameAction::Disconnect(reason) = handle_hub_frame(
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token,
&cleanup_tx,
).await {
disconnect_reason = reason;
break 'hub_loop;
}
}
@@ -744,280 +779,15 @@ async fn handle_edge_connection(
match event {
remoteingress_protocol::TunnelEvent::Frame(frame) => {
// Reset liveness on any received frame
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type {
FRAME_OPEN => {
// A4: Check stream limit before processing
let permit = match stream_semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
tunnel_io.queue_ctrl(close_frame);
continue;
}
};
// Payload is PROXY v1 header line
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
// Parse destination port from PROXY header
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
let stream_id = frame.stream_id;
let edge_id_clone = edge_id.clone();
let event_tx_clone = event_tx.clone();
let streams_clone = streams.clone();
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let target = target_host.clone();
let stream_token = edge_token.child_token();
let _ = event_tx.try_send(HubEvent::StreamOpened {
edge_id: edge_id.clone(),
stream_id,
});
// Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
// Adaptive initial window: scale with current stream count
// to keep total in-flight data within the 32MB budget.
let initial_window = compute_window_for_stream_count(
edge_stream_count.load(Ordering::Relaxed),
);
let send_window = Arc::new(AtomicU32::new(initial_window));
let window_notify = Arc::new(Notify::new());
{
let mut s = streams.lock().await;
s.insert(stream_id, HubStreamState {
data_tx,
cancel_token: stream_token.clone(),
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
}
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
let stream_counter = Arc::clone(&edge_stream_count);
tokio::spawn(async move {
let _permit = permit; // hold semaphore permit until stream completes
stream_counter.fetch_add(1, Ordering::Relaxed);
let result = async {
// A2: Connect to SmartProxy with timeout
let mut upstream = tokio::time::timeout(
Duration::from_secs(10),
TcpStream::connect((target.as_str(), dest_port)),
)
.await
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) =
upstream.into_split();
// Forward data from edge (via channel) to SmartProxy
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
let writer_token = stream_token.clone();
let wub_tx = writer_tx.clone();
let stream_counter_w = Arc::clone(&stream_counter);
let writer_for_edge_data = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop {
tokio::select! {
data = data_rx.recv() => {
match data {
Some(data) => {
let len = data.len() as u32;
// Check cancellation alongside the write so we respond
// promptly to FRAME_CLOSE instead of blocking up to 60s.
let write_result = tokio::select! {
r = tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
) => r,
_ = writer_token.cancelled() => break,
};
match write_result {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
break;
}
}
// Track consumption for adaptive flow control.
// Increment capped to adaptive window to limit per-stream in-flight data.
consumed_since_update += len;
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
stream_counter_w.load(Ordering::Relaxed),
);
let threshold = adaptive_window / 2;
if consumed_since_update >= threshold {
let increment = consumed_since_update.min(adaptive_window);
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
if wub_tx.try_send(frame).is_ok() {
consumed_since_update -= increment;
}
// If try_send fails, keep accumulating — retry on next threshold
}
}
None => break,
}
}
_ = writer_token.cancelled() => break,
}
}
// Send final window update for remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
let _ = wub_tx.try_send(frame);
}
let _ = up_write.shutdown().await;
});
// Forward data from SmartProxy back to edge via writer channel
// with per-stream flow control (check send_window before reading)
let mut buf = vec![0u8; 32768];
loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = stream_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
break;
}
}
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
// Adaptive: cap read to current per-stream target window
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
stream_counter.load(Ordering::Relaxed),
) as usize;
let max_read = w.min(buf.len()).min(adaptive_cap);
tokio::select! {
read_result = up_read.read(&mut buf[..max_read]) => {
match read_result {
Ok(0) => break,
Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let frame =
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
if data_writer_tx.send(frame).await.is_err() {
log::warn!("Stream {} data channel closed, closing", stream_id);
break;
}
}
Err(_) => break,
}
}
_ = stream_token.cancelled() => break,
}
}
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
// Use send().await to guarantee delivery (try_send silently drops if full).
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
writer_for_edge_data.abort();
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
}
.await;
if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
// Use send().await to guarantee delivery.
if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let _ = data_writer_tx.send(close_frame).await;
}
}
// Clean up stream (guard against duplicate if FRAME_CLOSE already removed it)
let was_present = {
let mut s = streams_clone.lock().await;
s.remove(&stream_id).is_some()
};
if was_present {
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
edge_id: edge_id_clone,
stream_id,
});
}
stream_counter.fetch_sub(1, Ordering::Relaxed);
});
}
FRAME_DATA => {
// Non-blocking dispatch to per-stream channel.
// With flow control, the sender should rarely exceed the channel capacity.
let mut s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
if state.data_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
}
}
}
}
FRAME_WINDOW_UPDATE => {
// Edge consumed data — increase our send window for this stream
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
}
}
FRAME_CLOSE => {
let mut s = streams.lock().await;
if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.clone(),
stream_id: frame.stream_id,
});
}
}
FRAME_PONG => {
log::debug!("Received PONG from edge {}", edge_id);
}
_ => {
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
}
if let FrameAction::Disconnect(reason) = handle_hub_frame(
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token,
&cleanup_tx,
).await {
disconnect_reason = reason;
break;
}
}
remoteingress_protocol::TunnelEvent::Eof => {