diff --git a/changelog.md b/changelog.md index 6bf1f6f..904f711 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2026-03-17 - 4.8.2 - fix(rust-edge) +refactor tunnel I/O to preserve TLS state and prioritize control frames + +- replace split TLS handling with a single-owner TunnelIo to avoid handshake and buffered read corruption +- prioritize control frames over data frames to prevent WINDOW_UPDATE starvation and flow-control deadlocks +- improve tunnel reliability with incremental frame parsing, liveness/error events, and corrupt frame header logging + ## 2026-03-17 - 4.8.1 - fix(remoteingress-core) remove tunnel writer timeouts from edge and hub buffered writes diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 81bc61e..900f3a7 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -568,7 +568,9 @@ dependencies = [ name = "remoteingress-protocol" version = "2.0.0" dependencies = [ + "log", "tokio", + "tokio-util", ] [[package]] diff --git a/rust/crates/remoteingress-core/src/edge.rs b/rust/crates/remoteingress-core/src/edge.rs index e25cc70..ceb41a6 100644 --- a/rust/crates/remoteingress-core/src/edge.rs +++ b/rust/crates/remoteingress-core/src/edge.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; use std::time::Duration; -use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::{TcpListener, TcpStream}; use tokio::sync::{mpsc, Mutex, Notify, RwLock}; use tokio::task::JoinHandle; @@ -308,7 +308,7 @@ async fn connect_to_hub_and_run( let server_name = rustls::pki_types::ServerName::try_from(config.hub_host.clone()) .unwrap_or_else(|_| rustls::pki_types::ServerName::try_from("remoteingress-hub".to_string()).unwrap()); - let tls_stream = match connector.connect(server_name, tcp).await { + let mut tls_stream = match connector.connect(server_name, tcp).await { Ok(s) => s, Err(e) => { log::error!("TLS handshake failed: {}", e); @@ -316,28 +316,38 @@ async fn connect_to_hub_and_run( } }; - let (read_half, mut write_half) = tokio::io::split(tls_stream); - - // Send auth line + // Send auth line (we own the whole stream — no split) let auth_line = format!("EDGE {} {}\n", config.edge_id, config.secret); - if write_half.write_all(auth_line.as_bytes()).await.is_err() { + if tls_stream.write_all(auth_line.as_bytes()).await.is_err() { return EdgeLoopResult::Reconnect("auth_write_failed".to_string()); } + if tls_stream.flush().await.is_err() { + return EdgeLoopResult::Reconnect("auth_flush_failed".to_string()); + } - // Read handshake response line from hub (JSON with initial config) - let mut buf_reader = BufReader::new(read_half); - let mut handshake_line = String::new(); - match buf_reader.read_line(&mut handshake_line).await { - Ok(0) => { - log::error!("Hub rejected connection (EOF before handshake)"); - return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string()); - } - Ok(_) => {} - Err(e) => { - log::error!("Failed to read handshake response: {}", e); - return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e)); + // Read handshake line byte-by-byte (no BufReader — into_inner corrupts TLS state) + let mut handshake_bytes = Vec::with_capacity(512); + let mut byte = [0u8; 1]; + loop { + match tls_stream.read_exact(&mut byte).await { + Ok(_) => { + handshake_bytes.push(byte[0]); + if byte[0] == b'\n' { break; } + if handshake_bytes.len() > 8192 { + return EdgeLoopResult::Reconnect("handshake_too_long".to_string()); + } + } + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + log::error!("Hub rejected connection (EOF before handshake)"); + return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string()); + } + Err(e) => { + log::error!("Failed to read handshake response: {}", e); + return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e)); + } } } + let handshake_line = String::from_utf8_lossy(&handshake_bytes); let handshake: HandshakeConfig = match serde_json::from_str(handshake_line.trim()) { Ok(h) => h, @@ -394,52 +404,13 @@ async fn connect_to_hub_and_run( let client_writers: Arc>> = Arc::new(Mutex::new(HashMap::new())); - // QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN) - // have priority over data frames (DATA). Prevents PING starvation under load. + // QoS dual-channel: ctrl frames have priority over data frames. + // Stream handlers send through these channels → TunnelIo drains them. let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::>(256); let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::>(4096); - // Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN let tunnel_writer_tx = tunnel_ctrl_tx.clone(); - let tw_token = connection_token.clone(); - // Oneshot to signal the reader loop when the writer dies from a write error. - // This avoids the 45s liveness timeout delay when the tunnel is already dead. - let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>(); - let tunnel_writer_handle = tokio::spawn(async move { - // BufWriter coalesces small writes (frame headers, control frames) into fewer - // TLS records and syscalls. Flushed after each frame to avoid holding data. - let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half); - let mut write_error = false; - loop { - tokio::select! { - biased; // control frames always take priority over data - ctrl = tunnel_ctrl_rx.recv() => { - match ctrl { - Some(frame_data) => { - if writer.write_all(&frame_data).await.is_err() { write_error = true; break; } - if writer.flush().await.is_err() { write_error = true; break; } - } - None => break, - } - } - data = tunnel_data_rx.recv() => { - match data { - Some(frame_data) => { - if writer.write_all(&frame_data).await.is_err() { write_error = true; break; } - if writer.flush().await.is_err() { write_error = true; break; } - } - None => break, - } - } - _ = tw_token.cancelled() => break, - } - } - if write_error { - log::error!("Tunnel writer failed, signalling reader for fast reconnect"); - let _ = writer_dead_tx.send(()); - } - }); - // Start TCP listeners for initial ports (hot-reloadable) + // Start TCP listeners for initial ports let mut port_listeners: HashMap> = HashMap::new(); let bind_address = config.bind_address.as_deref().unwrap_or("0.0.0.0"); apply_port_config( @@ -455,122 +426,180 @@ async fn connect_to_hub_and_run( bind_address, ); - // Heartbeat: liveness timeout detects silent hub failures + // Single-owner I/O engine — no tokio::io::split, no mutex + let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new()); + let liveness_timeout_dur = Duration::from_secs(45); let mut last_activity = Instant::now(); let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur)); - // Read frames from hub - let mut frame_reader = FrameReader::new(buf_reader); - let result = loop { - tokio::select! { - frame_result = frame_reader.next_frame() => { - match frame_result { - Ok(Some(frame)) => { - // Reset liveness on any received frame - last_activity = Instant::now(); - liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); - - match frame.frame_type { - FRAME_DATA_BACK => { - // Non-blocking dispatch to per-stream channel. - // With flow control, the sender should rarely exceed the channel capacity. - let mut writers = client_writers.lock().await; - if let Some(state) = writers.get(&frame.stream_id) { - if state.back_tx.try_send(frame.payload).is_err() { - log::warn!("Stream {} back-channel full, closing stream", frame.stream_id); - writers.remove(&frame.stream_id); - } + let result = 'io_loop: loop { + // Drain any buffered frames + loop { + match tunnel_io.try_parse_frame() { + Some(Ok(frame)) => { + last_activity = Instant::now(); + liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); + match frame.frame_type { + FRAME_DATA_BACK => { + let mut writers = client_writers.lock().await; + if let Some(state) = writers.get(&frame.stream_id) { + if state.back_tx.try_send(frame.payload).is_err() { + log::warn!("Stream {} back-channel full, closing", frame.stream_id); + writers.remove(&frame.stream_id); } } - FRAME_WINDOW_UPDATE_BACK => { - // Hub consumed data — increase our send window for this stream (upload direction) - if let Some(increment) = decode_window_update(&frame.payload) { - if increment > 0 { - let writers = client_writers.lock().await; - if let Some(state) = writers.get(&frame.stream_id) { - let prev = state.send_window.fetch_add(increment, Ordering::Release); - if prev + increment > MAX_WINDOW_SIZE { - state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); - } - state.window_notify.notify_one(); + } + FRAME_WINDOW_UPDATE_BACK => { + if let Some(increment) = decode_window_update(&frame.payload) { + if increment > 0 { + let writers = client_writers.lock().await; + if let Some(state) = writers.get(&frame.stream_id) { + let prev = state.send_window.fetch_add(increment, Ordering::Release); + if prev + increment > MAX_WINDOW_SIZE { + state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); } + state.window_notify.notify_one(); } } } - FRAME_CLOSE_BACK => { - let mut writers = client_writers.lock().await; + } + FRAME_CLOSE_BACK => { + let mut writers = client_writers.lock().await; + writers.remove(&frame.stream_id); + } + FRAME_CONFIG => { + if let Ok(update) = serde_json::from_slice::(&frame.payload) { + log::info!("Config update from hub: ports {:?}", update.listen_ports); + *listen_ports.write().await = update.listen_ports.clone(); + let _ = event_tx.try_send(EdgeEvent::PortsUpdated { + listen_ports: update.listen_ports.clone(), + }); + apply_port_config( + &update.listen_ports, + &mut port_listeners, + &tunnel_writer_tx, + &tunnel_data_tx, + &client_writers, + active_streams, + next_stream_id, + &config.edge_id, + connection_token, + bind_address, + ); + } + } + FRAME_PING => { + // Queue PONG directly — no channel round-trip, guaranteed delivery + tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[])); + } + _ => { + log::warn!("Unexpected frame type {} from hub", frame.frame_type); + } + } + } + Some(Err(e)) => { + log::error!("Hub frame error: {}", e); + break 'io_loop EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e)); + } + None => break, + } + } + + // Poll I/O: write(ctrl→data), flush, read, channels, timers + let event = std::future::poll_fn(|cx| { + tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut liveness_deadline, connection_token) + }).await; + + match event { + remoteingress_protocol::TunnelEvent::Frame(frame) => { + last_activity = Instant::now(); + liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); + match frame.frame_type { + FRAME_DATA_BACK => { + let mut writers = client_writers.lock().await; + if let Some(state) = writers.get(&frame.stream_id) { + if state.back_tx.try_send(frame.payload).is_err() { + log::warn!("Stream {} back-channel full, closing", frame.stream_id); writers.remove(&frame.stream_id); } - FRAME_CONFIG => { - if let Ok(update) = serde_json::from_slice::(&frame.payload) { - log::info!("Config update from hub: ports {:?}", update.listen_ports); - *listen_ports.write().await = update.listen_ports.clone(); - let _ = event_tx.try_send(EdgeEvent::PortsUpdated { - listen_ports: update.listen_ports.clone(), - }); - apply_port_config( - &update.listen_ports, - &mut port_listeners, - &tunnel_writer_tx, - &tunnel_data_tx, - &client_writers, - active_streams, - next_stream_id, - &config.edge_id, - connection_token, - bind_address, - ); - } - } - FRAME_PING => { - let pong_frame = encode_frame(0, FRAME_PONG, &[]); - if tunnel_writer_tx.try_send(pong_frame).is_err() { - // Control channel full (WINDOW_UPDATE burst from many streams). - // DON'T disconnect — the 45s liveness timeout gives margin - // for the channel to drain and the next PONG to succeed. - log::warn!("PONG send failed, control channel full — skipping this cycle"); - } - log::trace!("Received PING from hub, sent PONG"); - } - _ => { - log::warn!("Unexpected frame type {} from hub", frame.frame_type); - } } } - Ok(None) => { - log::info!("Hub disconnected (EOF)"); - break EdgeLoopResult::Reconnect("hub_eof".to_string()); + FRAME_WINDOW_UPDATE_BACK => { + if let Some(increment) = decode_window_update(&frame.payload) { + if increment > 0 { + let writers = client_writers.lock().await; + if let Some(state) = writers.get(&frame.stream_id) { + let prev = state.send_window.fetch_add(increment, Ordering::Release); + if prev + increment > MAX_WINDOW_SIZE { + state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); + } + state.window_notify.notify_one(); + } + } + } } - Err(e) => { - log::error!("Hub frame error: {}", e); - break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e)); + FRAME_CLOSE_BACK => { + let mut writers = client_writers.lock().await; + writers.remove(&frame.stream_id); + } + FRAME_CONFIG => { + if let Ok(update) = serde_json::from_slice::(&frame.payload) { + log::info!("Config update from hub: ports {:?}", update.listen_ports); + *listen_ports.write().await = update.listen_ports.clone(); + let _ = event_tx.try_send(EdgeEvent::PortsUpdated { + listen_ports: update.listen_ports.clone(), + }); + apply_port_config( + &update.listen_ports, + &mut port_listeners, + &tunnel_writer_tx, + &tunnel_data_tx, + &client_writers, + active_streams, + next_stream_id, + &config.edge_id, + connection_token, + bind_address, + ); + } + } + FRAME_PING => { + tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[])); + } + _ => { + log::warn!("Unexpected frame type {} from hub", frame.frame_type); } } } - _ = &mut liveness_deadline => { - log::warn!("Hub liveness timeout (no frames for {}s), reconnecting", - liveness_timeout_dur.as_secs()); + remoteingress_protocol::TunnelEvent::Eof => { + log::info!("Hub disconnected (EOF)"); + break EdgeLoopResult::Reconnect("hub_eof".to_string()); + } + remoteingress_protocol::TunnelEvent::ReadError(e) => { + log::error!("Hub frame read error: {}", e); + break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e)); + } + remoteingress_protocol::TunnelEvent::WriteError(e) => { + log::error!("Tunnel write error: {}", e); + break EdgeLoopResult::Reconnect(format!("tunnel_write_error: {}", e)); + } + remoteingress_protocol::TunnelEvent::LivenessTimeout => { + log::warn!("Hub liveness timeout (no frames for {}s), reconnecting", liveness_timeout_dur.as_secs()); break EdgeLoopResult::Reconnect("liveness_timeout".to_string()); } - _ = &mut writer_dead_rx => { - log::error!("Tunnel writer died, reconnecting immediately"); - break EdgeLoopResult::Reconnect("writer_dead".to_string()); - } - _ = connection_token.cancelled() => { - log::info!("Connection cancelled"); - break EdgeLoopResult::Shutdown; - } - _ = shutdown_rx.recv() => { + remoteingress_protocol::TunnelEvent::Cancelled => { + if shutdown_rx.try_recv().is_ok() { + break EdgeLoopResult::Shutdown; + } break EdgeLoopResult::Shutdown; } } }; - // Cancel connection token to propagate to all child tasks BEFORE aborting + // Cleanup connection_token.cancel(); stun_handle.abort(); - tunnel_writer_handle.abort(); for (_, h) in port_listeners.drain() { h.abort(); } @@ -717,7 +746,7 @@ async fn handle_client_connection( } // Set up channel for data coming back from hub (capacity 16 is sufficient with flow control) - let (back_tx, mut back_rx) = mpsc::channel::>(256); + let (back_tx, mut back_rx) = mpsc::channel::>(1024); // Adaptive initial window: scale with current stream count to keep total in-flight // data within the 32MB budget. Prevents burst flooding when many streams open. let initial_window = remoteingress_protocol::compute_window_for_stream_count( diff --git a/rust/crates/remoteingress-core/src/hub.rs b/rust/crates/remoteingress-core/src/hub.rs index 9245a39..57028eb 100644 --- a/rust/crates/remoteingress-core/src/hub.rs +++ b/rust/crates/remoteingress-core/src/hub.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; use std::time::Duration; -use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::{TcpListener, TcpStream}; use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore}; use tokio::time::{interval, sleep_until, Instant}; @@ -307,13 +307,24 @@ async fn handle_edge_connection( #[cfg(target_os = "linux")] let ka = ka.with_interval(Duration::from_secs(10)); let _ = socket2::SockRef::from(&stream).set_tcp_keepalive(&ka); - let tls_stream = acceptor.accept(stream).await?; - let (read_half, mut write_half) = tokio::io::split(tls_stream); - let mut buf_reader = BufReader::new(read_half); + let mut tls_stream = acceptor.accept(stream).await?; - // Read auth line: "EDGE \n" - let mut auth_line = String::new(); - buf_reader.read_line(&mut auth_line).await?; + // Byte-by-byte auth line reading (no BufReader). + // Auth line: "EDGE \n" + let mut auth_buf = Vec::with_capacity(512); + loop { + let mut byte = [0u8; 1]; + tls_stream.read_exact(&mut byte).await?; + if byte[0] == b'\n' { + break; + } + auth_buf.push(byte[0]); + if auth_buf.len() > 4096 { + return Err("auth line too long".into()); + } + } + let auth_line = String::from_utf8(auth_buf) + .map_err(|_| "auth line not valid UTF-8")?; let auth_line = auth_line.trim(); let parts: Vec<&str> = auth_line.splitn(3, ' ').collect(); @@ -353,7 +364,8 @@ async fn handle_edge_connection( }; let mut handshake_json = serde_json::to_string(&handshake)?; handshake_json.push('\n'); - write_half.write_all(handshake_json.as_bytes()).await?; + tls_stream.write_all(handshake_json.as_bytes()).await?; + tls_stream.flush().await?; // Track this edge let streams: Arc>> = @@ -383,51 +395,13 @@ async fn handle_edge_connection( // Per-edge active stream counter for adaptive flow control let edge_stream_count = Arc::new(AtomicU32::new(0)); - // QoS dual-channel tunnel writer: control frames (PING/PONG/WINDOW_UPDATE/CLOSE) - // have priority over data frames (DATA_BACK). This prevents PING starvation under load. + // QoS dual-channel: ctrl frames have priority over data frames. + // Stream handlers send through these channels -> TunnelIo drains them. let (ctrl_tx, mut ctrl_rx) = mpsc::channel::>(256); let (data_tx, mut data_rx) = mpsc::channel::>(4096); - // Legacy alias for code that sends both control and data (will be migrated) - let frame_writer_tx = ctrl_tx.clone(); - let writer_token = edge_token.clone(); - let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>(); - let writer_handle = tokio::spawn(async move { - // BufWriter coalesces small writes (frame headers, control frames) into fewer - // TLS records and syscalls. Flushed after each frame to avoid holding data. - let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half); - let mut write_error = false; - loop { - tokio::select! { - biased; // control frames always take priority over data - ctrl = ctrl_rx.recv() => { - match ctrl { - Some(frame_data) => { - if writer.write_all(&frame_data).await.is_err() { write_error = true; break; } - if writer.flush().await.is_err() { write_error = true; break; } - } - None => break, - } - } - data = data_rx.recv() => { - match data { - Some(frame_data) => { - if writer.write_all(&frame_data).await.is_err() { write_error = true; break; } - if writer.flush().await.is_err() { write_error = true; break; } - } - None => break, - } - } - _ = writer_token.cancelled() => break, - } - } - if write_error { - log::error!("Tunnel writer to edge failed, signalling reader for fast cleanup"); - let _ = writer_dead_tx.send(()); - } - }); // Spawn task to forward config updates as FRAME_CONFIG frames - let config_writer_tx = frame_writer_tx.clone(); + let config_writer_tx = ctrl_tx.clone(); let config_edge_id = edge_id.clone(); let config_token = edge_token.clone(); let config_handle = tokio::spawn(async move { @@ -464,324 +438,610 @@ async fn handle_edge_connection( let mut last_activity = Instant::now(); let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur)); - // Frame reading loop - let mut frame_reader = FrameReader::new(buf_reader); + // Single-owner I/O engine — no tokio::io::split, no mutex + let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new()); + let mut disconnect_reason = "unknown".to_string(); - loop { - tokio::select! { - frame_result = frame_reader.next_frame() => { - match frame_result { - Ok(Some(frame)) => { - // Reset liveness on any received frame - last_activity = Instant::now(); - liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); + 'hub_loop: loop { + // Drain any buffered frames + loop { + match tunnel_io.try_parse_frame() { + Some(Ok(frame)) => { + // Reset liveness on any received frame + last_activity = Instant::now(); + liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); - match frame.frame_type { - FRAME_OPEN => { - // A4: Check stream limit before processing - let permit = match stream_semaphore.clone().try_acquire_owned() { - Ok(p) => p, - Err(_) => { - log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}", - edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id); - let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]); - let _ = frame_writer_tx.try_send(close_frame); - continue; - } - }; - - // Payload is PROXY v1 header line - let proxy_header = String::from_utf8_lossy(&frame.payload).to_string(); - - // Parse destination port from PROXY header - let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443); - - let stream_id = frame.stream_id; - let edge_id_clone = edge_id.clone(); - let event_tx_clone = event_tx.clone(); - let streams_clone = streams.clone(); - let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK - let data_writer_tx = data_tx.clone(); // data: DATA_BACK - let target = target_host.clone(); - let stream_token = edge_token.child_token(); - - let _ = event_tx.try_send(HubEvent::StreamOpened { - edge_id: edge_id.clone(), - stream_id, - }); - - // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control) - let (data_tx, mut data_rx) = mpsc::channel::>(256); - // Adaptive initial window: scale with current stream count - // to keep total in-flight data within the 32MB budget. - let initial_window = compute_window_for_stream_count( - edge_stream_count.load(Ordering::Relaxed), - ); - let send_window = Arc::new(AtomicU32::new(initial_window)); - let window_notify = Arc::new(Notify::new()); - { - let mut s = streams.lock().await; - s.insert(stream_id, HubStreamState { - data_tx, - cancel_token: stream_token.clone(), - send_window: Arc::clone(&send_window), - window_notify: Arc::clone(&window_notify), - }); + match frame.frame_type { + FRAME_OPEN => { + // A4: Check stream limit before processing + let permit = match stream_semaphore.clone().try_acquire_owned() { + Ok(p) => p, + Err(_) => { + log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}", + edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id); + let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]); + tunnel_io.queue_ctrl(close_frame); + continue; } + }; - // Spawn task: connect to SmartProxy, send PROXY header, pipe data - let stream_counter = Arc::clone(&edge_stream_count); - tokio::spawn(async move { - let _permit = permit; // hold semaphore permit until stream completes - stream_counter.fetch_add(1, Ordering::Relaxed); + // Payload is PROXY v1 header line + let proxy_header = String::from_utf8_lossy(&frame.payload).to_string(); - let result = async { - // A2: Connect to SmartProxy with timeout - let mut upstream = tokio::time::timeout( - Duration::from_secs(10), - TcpStream::connect((target.as_str(), dest_port)), - ) - .await - .map_err(|_| -> Box { - format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into() - })??; + // Parse destination port from PROXY header + let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443); - upstream.set_nodelay(true)?; - upstream.write_all(proxy_header.as_bytes()).await?; + let stream_id = frame.stream_id; + let edge_id_clone = edge_id.clone(); + let event_tx_clone = event_tx.clone(); + let streams_clone = streams.clone(); + let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK + let data_writer_tx = data_tx.clone(); // data: DATA_BACK + let target = target_host.clone(); + let stream_token = edge_token.child_token(); - let (mut up_read, mut up_write) = - upstream.into_split(); + let _ = event_tx.try_send(HubEvent::StreamOpened { + edge_id: edge_id.clone(), + stream_id, + }); - // Forward data from edge (via channel) to SmartProxy - // After writing to upstream, send WINDOW_UPDATE_BACK to edge - let writer_token = stream_token.clone(); - let wub_tx = writer_tx.clone(); - let stream_counter_w = Arc::clone(&stream_counter); - let writer_for_edge_data = tokio::spawn(async move { - let mut consumed_since_update: u32 = 0; - loop { - tokio::select! { - data = data_rx.recv() => { - match data { - Some(data) => { - let len = data.len() as u32; - // Check cancellation alongside the write so we respond - // promptly to FRAME_CLOSE instead of blocking up to 60s. - let write_result = tokio::select! { - r = tokio::time::timeout( - Duration::from_secs(60), - up_write.write_all(&data), - ) => r, - _ = writer_token.cancelled() => break, - }; - match write_result { - Ok(Ok(())) => {} - Ok(Err(_)) => break, - Err(_) => { - log::warn!("Stream {} write to upstream timed out (60s)", stream_id); - break; - } - } - // Track consumption for adaptive flow control. - // Increment capped to adaptive window to limit per-stream in-flight data. - consumed_since_update += len; - let adaptive_window = remoteingress_protocol::compute_window_for_stream_count( - stream_counter_w.load(Ordering::Relaxed), - ); - let threshold = adaptive_window / 2; - if consumed_since_update >= threshold { - let increment = consumed_since_update.min(adaptive_window); - let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment); - if wub_tx.try_send(frame).is_ok() { - consumed_since_update -= increment; - } - // If try_send fails, keep accumulating — retry on next threshold + // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control) + let (data_tx, mut data_rx) = mpsc::channel::>(1024); + // Adaptive initial window: scale with current stream count + // to keep total in-flight data within the 32MB budget. + let initial_window = compute_window_for_stream_count( + edge_stream_count.load(Ordering::Relaxed), + ); + let send_window = Arc::new(AtomicU32::new(initial_window)); + let window_notify = Arc::new(Notify::new()); + { + let mut s = streams.lock().await; + s.insert(stream_id, HubStreamState { + data_tx, + cancel_token: stream_token.clone(), + send_window: Arc::clone(&send_window), + window_notify: Arc::clone(&window_notify), + }); + } + + // Spawn task: connect to SmartProxy, send PROXY header, pipe data + let stream_counter = Arc::clone(&edge_stream_count); + tokio::spawn(async move { + let _permit = permit; // hold semaphore permit until stream completes + stream_counter.fetch_add(1, Ordering::Relaxed); + + let result = async { + // A2: Connect to SmartProxy with timeout + let mut upstream = tokio::time::timeout( + Duration::from_secs(10), + TcpStream::connect((target.as_str(), dest_port)), + ) + .await + .map_err(|_| -> Box { + format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into() + })??; + + upstream.set_nodelay(true)?; + upstream.write_all(proxy_header.as_bytes()).await?; + + let (mut up_read, mut up_write) = + upstream.into_split(); + + // Forward data from edge (via channel) to SmartProxy + // After writing to upstream, send WINDOW_UPDATE_BACK to edge + let writer_token = stream_token.clone(); + let wub_tx = writer_tx.clone(); + let stream_counter_w = Arc::clone(&stream_counter); + let writer_for_edge_data = tokio::spawn(async move { + let mut consumed_since_update: u32 = 0; + loop { + tokio::select! { + data = data_rx.recv() => { + match data { + Some(data) => { + let len = data.len() as u32; + // Check cancellation alongside the write so we respond + // promptly to FRAME_CLOSE instead of blocking up to 60s. + let write_result = tokio::select! { + r = tokio::time::timeout( + Duration::from_secs(60), + up_write.write_all(&data), + ) => r, + _ = writer_token.cancelled() => break, + }; + match write_result { + Ok(Ok(())) => {} + Ok(Err(_)) => break, + Err(_) => { + log::warn!("Stream {} write to upstream timed out (60s)", stream_id); + break; } } - None => break, + // Track consumption for adaptive flow control. + // Increment capped to adaptive window to limit per-stream in-flight data. + consumed_since_update += len; + let adaptive_window = remoteingress_protocol::compute_window_for_stream_count( + stream_counter_w.load(Ordering::Relaxed), + ); + let threshold = adaptive_window / 2; + if consumed_since_update >= threshold { + let increment = consumed_since_update.min(adaptive_window); + let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment); + if wub_tx.try_send(frame).is_ok() { + consumed_since_update -= increment; + } + // If try_send fails, keep accumulating — retry on next threshold + } + } + None => break, + } + } + _ = writer_token.cancelled() => break, + } + } + // Send final window update for remaining consumed bytes + if consumed_since_update > 0 { + let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update); + let _ = wub_tx.try_send(frame); + } + let _ = up_write.shutdown().await; + }); + + // Forward data from SmartProxy back to edge via writer channel + // with per-stream flow control (check send_window before reading) + let mut buf = vec![0u8; 32768]; + loop { + // Wait for send window to have capacity (with stall timeout) + loop { + let w = send_window.load(Ordering::Acquire); + if w > 0 { break; } + tokio::select! { + _ = window_notify.notified() => continue, + _ = stream_token.cancelled() => break, + _ = tokio::time::sleep(Duration::from_secs(120)) => { + log::warn!("Stream {} download stalled (window empty for 120s)", stream_id); + break; + } + } + } + if stream_token.is_cancelled() { break; } + + // Limit read size to available window. + // IMPORTANT: if window is 0 (stall timeout fired), we must NOT + // read into an empty buffer — read(&mut buf[..0]) returns Ok(0) + // which would be falsely interpreted as EOF. + let w = send_window.load(Ordering::Acquire) as usize; + if w == 0 { + log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id); + break; + } + // Adaptive: cap read to current per-stream target window + let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count( + stream_counter.load(Ordering::Relaxed), + ) as usize; + let max_read = w.min(buf.len()).min(adaptive_cap); + + tokio::select! { + read_result = up_read.read(&mut buf[..max_read]) => { + match read_result { + Ok(0) => break, + Ok(n) => { + send_window.fetch_sub(n as u32, Ordering::Release); + let frame = + encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]); + if data_writer_tx.send(frame).await.is_err() { + log::warn!("Stream {} data channel closed, closing", stream_id); + break; } } - _ = writer_token.cancelled() => break, + Err(_) => break, } } - // Send final window update for remaining consumed bytes - if consumed_since_update > 0 { - let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update); - let _ = wub_tx.try_send(frame); - } - let _ = up_write.shutdown().await; - }); + _ = stream_token.cancelled() => break, + } + } - // Forward data from SmartProxy back to edge via writer channel - // with per-stream flow control (check send_window before reading) - let mut buf = vec![0u8; 32768]; - loop { - // Wait for send window to have capacity (with stall timeout) - loop { - let w = send_window.load(Ordering::Acquire); - if w > 0 { break; } - tokio::select! { - _ = window_notify.notified() => continue, - _ = stream_token.cancelled() => break, - _ = tokio::time::sleep(Duration::from_secs(120)) => { - log::warn!("Stream {} download stalled (window empty for 120s)", stream_id); - break; - } - } - } - if stream_token.is_cancelled() { break; } + // Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK). + // Use send().await to guarantee delivery (try_send silently drops if full). + if !stream_token.is_cancelled() { + let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); + let _ = data_writer_tx.send(close_frame).await; + } - // Limit read size to available window. - // IMPORTANT: if window is 0 (stall timeout fired), we must NOT - // read into an empty buffer — read(&mut buf[..0]) returns Ok(0) - // which would be falsely interpreted as EOF. - let w = send_window.load(Ordering::Acquire) as usize; - if w == 0 { - log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id); - break; - } - // Adaptive: cap read to current per-stream target window - let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count( - stream_counter.load(Ordering::Relaxed), - ) as usize; - let max_read = w.min(buf.len()).min(adaptive_cap); + writer_for_edge_data.abort(); + Ok::<(), Box>(()) + } + .await; - tokio::select! { - read_result = up_read.read(&mut buf[..max_read]) => { - match read_result { - Ok(0) => break, - Ok(n) => { - send_window.fetch_sub(n as u32, Ordering::Release); - let frame = - encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]); - if data_writer_tx.send(frame).await.is_err() { - log::warn!("Stream {} data channel closed, closing", stream_id); + if let Err(e) = result { + log::error!("Stream {} error: {}", stream_id, e); + // Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK). + // Use send().await to guarantee delivery. + if !stream_token.is_cancelled() { + let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); + let _ = data_writer_tx.send(close_frame).await; + } + } + + // Clean up stream (guard against duplicate if FRAME_CLOSE already removed it) + let was_present = { + let mut s = streams_clone.lock().await; + s.remove(&stream_id).is_some() + }; + if was_present { + let _ = event_tx_clone.try_send(HubEvent::StreamClosed { + edge_id: edge_id_clone, + stream_id, + }); + } + stream_counter.fetch_sub(1, Ordering::Relaxed); + }); + } + FRAME_DATA => { + // Non-blocking dispatch to per-stream channel. + // With flow control, the sender should rarely exceed the channel capacity. + let mut s = streams.lock().await; + if let Some(state) = s.get(&frame.stream_id) { + if state.data_tx.try_send(frame.payload).is_err() { + log::warn!("Stream {} data channel full, closing stream", frame.stream_id); + if let Some(state) = s.remove(&frame.stream_id) { + state.cancel_token.cancel(); + } + } + } + } + FRAME_WINDOW_UPDATE => { + // Edge consumed data — increase our send window for this stream + if let Some(increment) = decode_window_update(&frame.payload) { + if increment > 0 { + let s = streams.lock().await; + if let Some(state) = s.get(&frame.stream_id) { + let prev = state.send_window.fetch_add(increment, Ordering::Release); + if prev + increment > MAX_WINDOW_SIZE { + state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); + } + state.window_notify.notify_one(); + } + } + } + } + FRAME_CLOSE => { + let mut s = streams.lock().await; + if let Some(state) = s.remove(&frame.stream_id) { + state.cancel_token.cancel(); + let _ = event_tx.try_send(HubEvent::StreamClosed { + edge_id: edge_id.clone(), + stream_id: frame.stream_id, + }); + } + } + FRAME_PONG => { + log::debug!("Received PONG from edge {}", edge_id); + } + _ => { + log::warn!("Unexpected frame type {} from edge", frame.frame_type); + } + } + } + Some(Err(e)) => { + log::error!("Edge {} frame error: {}", edge_id, e); + disconnect_reason = format!("edge_frame_error: {}", e); + break 'hub_loop; + } + None => break, + } + } + + // Poll I/O: write(ctrl->data), flush, read, channels, timers + let event = std::future::poll_fn(|cx| { + // Queue PING if ticker fires + if ping_ticker.poll_tick(cx).is_ready() { + tunnel_io.queue_ctrl(encode_frame(0, FRAME_PING, &[])); + } + tunnel_io.poll_step(cx, &mut ctrl_rx, &mut data_rx, &mut liveness_deadline, &edge_token) + }).await; + + match event { + remoteingress_protocol::TunnelEvent::Frame(frame) => { + // Reset liveness on any received frame + last_activity = Instant::now(); + liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); + + match frame.frame_type { + FRAME_OPEN => { + // A4: Check stream limit before processing + let permit = match stream_semaphore.clone().try_acquire_owned() { + Ok(p) => p, + Err(_) => { + log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}", + edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id); + let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]); + tunnel_io.queue_ctrl(close_frame); + continue; + } + }; + + // Payload is PROXY v1 header line + let proxy_header = String::from_utf8_lossy(&frame.payload).to_string(); + + // Parse destination port from PROXY header + let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443); + + let stream_id = frame.stream_id; + let edge_id_clone = edge_id.clone(); + let event_tx_clone = event_tx.clone(); + let streams_clone = streams.clone(); + let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK + let data_writer_tx = data_tx.clone(); // data: DATA_BACK + let target = target_host.clone(); + let stream_token = edge_token.child_token(); + + let _ = event_tx.try_send(HubEvent::StreamOpened { + edge_id: edge_id.clone(), + stream_id, + }); + + // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control) + let (data_tx, mut data_rx) = mpsc::channel::>(256); + // Adaptive initial window: scale with current stream count + // to keep total in-flight data within the 32MB budget. + let initial_window = compute_window_for_stream_count( + edge_stream_count.load(Ordering::Relaxed), + ); + let send_window = Arc::new(AtomicU32::new(initial_window)); + let window_notify = Arc::new(Notify::new()); + { + let mut s = streams.lock().await; + s.insert(stream_id, HubStreamState { + data_tx, + cancel_token: stream_token.clone(), + send_window: Arc::clone(&send_window), + window_notify: Arc::clone(&window_notify), + }); + } + + // Spawn task: connect to SmartProxy, send PROXY header, pipe data + let stream_counter = Arc::clone(&edge_stream_count); + tokio::spawn(async move { + let _permit = permit; // hold semaphore permit until stream completes + stream_counter.fetch_add(1, Ordering::Relaxed); + + let result = async { + // A2: Connect to SmartProxy with timeout + let mut upstream = tokio::time::timeout( + Duration::from_secs(10), + TcpStream::connect((target.as_str(), dest_port)), + ) + .await + .map_err(|_| -> Box { + format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into() + })??; + + upstream.set_nodelay(true)?; + upstream.write_all(proxy_header.as_bytes()).await?; + + let (mut up_read, mut up_write) = + upstream.into_split(); + + // Forward data from edge (via channel) to SmartProxy + // After writing to upstream, send WINDOW_UPDATE_BACK to edge + let writer_token = stream_token.clone(); + let wub_tx = writer_tx.clone(); + let stream_counter_w = Arc::clone(&stream_counter); + let writer_for_edge_data = tokio::spawn(async move { + let mut consumed_since_update: u32 = 0; + loop { + tokio::select! { + data = data_rx.recv() => { + match data { + Some(data) => { + let len = data.len() as u32; + // Check cancellation alongside the write so we respond + // promptly to FRAME_CLOSE instead of blocking up to 60s. + let write_result = tokio::select! { + r = tokio::time::timeout( + Duration::from_secs(60), + up_write.write_all(&data), + ) => r, + _ = writer_token.cancelled() => break, + }; + match write_result { + Ok(Ok(())) => {} + Ok(Err(_)) => break, + Err(_) => { + log::warn!("Stream {} write to upstream timed out (60s)", stream_id); break; } } - Err(_) => break, + // Track consumption for adaptive flow control. + // Increment capped to adaptive window to limit per-stream in-flight data. + consumed_since_update += len; + let adaptive_window = remoteingress_protocol::compute_window_for_stream_count( + stream_counter_w.load(Ordering::Relaxed), + ); + let threshold = adaptive_window / 2; + if consumed_since_update >= threshold { + let increment = consumed_since_update.min(adaptive_window); + let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment); + if wub_tx.try_send(frame).is_ok() { + consumed_since_update -= increment; + } + // If try_send fails, keep accumulating — retry on next threshold + } + } + None => break, + } + } + _ = writer_token.cancelled() => break, + } + } + // Send final window update for remaining consumed bytes + if consumed_since_update > 0 { + let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update); + let _ = wub_tx.try_send(frame); + } + let _ = up_write.shutdown().await; + }); + + // Forward data from SmartProxy back to edge via writer channel + // with per-stream flow control (check send_window before reading) + let mut buf = vec![0u8; 32768]; + loop { + // Wait for send window to have capacity (with stall timeout) + loop { + let w = send_window.load(Ordering::Acquire); + if w > 0 { break; } + tokio::select! { + _ = window_notify.notified() => continue, + _ = stream_token.cancelled() => break, + _ = tokio::time::sleep(Duration::from_secs(120)) => { + log::warn!("Stream {} download stalled (window empty for 120s)", stream_id); + break; + } + } + } + if stream_token.is_cancelled() { break; } + + // Limit read size to available window. + // IMPORTANT: if window is 0 (stall timeout fired), we must NOT + // read into an empty buffer — read(&mut buf[..0]) returns Ok(0) + // which would be falsely interpreted as EOF. + let w = send_window.load(Ordering::Acquire) as usize; + if w == 0 { + log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id); + break; + } + // Adaptive: cap read to current per-stream target window + let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count( + stream_counter.load(Ordering::Relaxed), + ) as usize; + let max_read = w.min(buf.len()).min(adaptive_cap); + + tokio::select! { + read_result = up_read.read(&mut buf[..max_read]) => { + match read_result { + Ok(0) => break, + Ok(n) => { + send_window.fetch_sub(n as u32, Ordering::Release); + let frame = + encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]); + if data_writer_tx.send(frame).await.is_err() { + log::warn!("Stream {} data channel closed, closing", stream_id); + break; } } - _ = stream_token.cancelled() => break, + Err(_) => break, } } - - // Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK). - // Use send().await to guarantee delivery (try_send silently drops if full). - if !stream_token.is_cancelled() { - let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); - let _ = data_writer_tx.send(close_frame).await; - } - - writer_for_edge_data.abort(); - Ok::<(), Box>(()) + _ = stream_token.cancelled() => break, } - .await; + } - if let Err(e) = result { - log::error!("Stream {} error: {}", stream_id, e); - // Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK). - // Use send().await to guarantee delivery. - if !stream_token.is_cancelled() { - let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); - let _ = data_writer_tx.send(close_frame).await; - } - } + // Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK). + // Use send().await to guarantee delivery (try_send silently drops if full). + if !stream_token.is_cancelled() { + let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); + let _ = data_writer_tx.send(close_frame).await; + } - // Clean up stream (guard against duplicate if FRAME_CLOSE already removed it) - let was_present = { - let mut s = streams_clone.lock().await; - s.remove(&stream_id).is_some() - }; - if was_present { - let _ = event_tx_clone.try_send(HubEvent::StreamClosed { - edge_id: edge_id_clone, - stream_id, - }); - } - stream_counter.fetch_sub(1, Ordering::Relaxed); + writer_for_edge_data.abort(); + Ok::<(), Box>(()) + } + .await; + + if let Err(e) = result { + log::error!("Stream {} error: {}", stream_id, e); + // Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK). + // Use send().await to guarantee delivery. + if !stream_token.is_cancelled() { + let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); + let _ = data_writer_tx.send(close_frame).await; + } + } + + // Clean up stream (guard against duplicate if FRAME_CLOSE already removed it) + let was_present = { + let mut s = streams_clone.lock().await; + s.remove(&stream_id).is_some() + }; + if was_present { + let _ = event_tx_clone.try_send(HubEvent::StreamClosed { + edge_id: edge_id_clone, + stream_id, }); } - FRAME_DATA => { - // Non-blocking dispatch to per-stream channel. - // With flow control, the sender should rarely exceed the channel capacity. - let mut s = streams.lock().await; - if let Some(state) = s.get(&frame.stream_id) { - if state.data_tx.try_send(frame.payload).is_err() { - log::warn!("Stream {} data channel full, closing stream", frame.stream_id); - if let Some(state) = s.remove(&frame.stream_id) { - state.cancel_token.cancel(); - } - } - } - } - FRAME_WINDOW_UPDATE => { - // Edge consumed data — increase our send window for this stream - if let Some(increment) = decode_window_update(&frame.payload) { - if increment > 0 { - let s = streams.lock().await; - if let Some(state) = s.get(&frame.stream_id) { - let prev = state.send_window.fetch_add(increment, Ordering::Release); - if prev + increment > MAX_WINDOW_SIZE { - state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); - } - state.window_notify.notify_one(); - } - } - } - } - FRAME_CLOSE => { - let mut s = streams.lock().await; + stream_counter.fetch_sub(1, Ordering::Relaxed); + }); + } + FRAME_DATA => { + // Non-blocking dispatch to per-stream channel. + // With flow control, the sender should rarely exceed the channel capacity. + let mut s = streams.lock().await; + if let Some(state) = s.get(&frame.stream_id) { + if state.data_tx.try_send(frame.payload).is_err() { + log::warn!("Stream {} data channel full, closing stream", frame.stream_id); if let Some(state) = s.remove(&frame.stream_id) { state.cancel_token.cancel(); - let _ = event_tx.try_send(HubEvent::StreamClosed { - edge_id: edge_id.clone(), - stream_id: frame.stream_id, - }); } } - FRAME_PONG => { - log::debug!("Received PONG from edge {}", edge_id); - } - _ => { - log::warn!("Unexpected frame type {} from edge", frame.frame_type); - } } } - Ok(None) => { - log::info!("Edge {} disconnected (EOF)", edge_id); - disconnect_reason = "edge_eof".to_string(); - break; + FRAME_WINDOW_UPDATE => { + // Edge consumed data — increase our send window for this stream + if let Some(increment) = decode_window_update(&frame.payload) { + if increment > 0 { + let s = streams.lock().await; + if let Some(state) = s.get(&frame.stream_id) { + let prev = state.send_window.fetch_add(increment, Ordering::Release); + if prev + increment > MAX_WINDOW_SIZE { + state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release); + } + state.window_notify.notify_one(); + } + } + } } - Err(e) => { - log::error!("Edge {} frame error: {}", edge_id, e); - disconnect_reason = format!("edge_frame_error: {}", e); - break; + FRAME_CLOSE => { + let mut s = streams.lock().await; + if let Some(state) = s.remove(&frame.stream_id) { + state.cancel_token.cancel(); + let _ = event_tx.try_send(HubEvent::StreamClosed { + edge_id: edge_id.clone(), + stream_id: frame.stream_id, + }); + } + } + FRAME_PONG => { + log::debug!("Received PONG from edge {}", edge_id); + } + _ => { + log::warn!("Unexpected frame type {} from edge", frame.frame_type); } } } - _ = ping_ticker.tick() => { - let ping_frame = encode_frame(0, FRAME_PING, &[]); - if frame_writer_tx.try_send(ping_frame).is_err() { - // Control channel full — skip this PING cycle. - // The 45s liveness timeout gives margin for the channel to drain. - log::warn!("PING send to edge {} failed, control channel full — skipping", edge_id); - } - log::trace!("Sent PING to edge {}", edge_id); + remoteingress_protocol::TunnelEvent::Eof => { + log::info!("Edge {} disconnected (EOF)", edge_id); + disconnect_reason = "edge_eof".to_string(); + break; } - _ = &mut liveness_deadline => { + remoteingress_protocol::TunnelEvent::ReadError(e) => { + log::error!("Edge {} frame error: {}", edge_id, e); + disconnect_reason = format!("edge_frame_error: {}", e); + break; + } + remoteingress_protocol::TunnelEvent::WriteError(e) => { + log::error!("Tunnel write error to edge {}: {}", edge_id, e); + disconnect_reason = format!("tunnel_write_error: {}", e); + break; + } + remoteingress_protocol::TunnelEvent::LivenessTimeout => { log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting", edge_id, liveness_timeout_dur.as_secs()); disconnect_reason = "liveness_timeout".to_string(); break; } - _ = &mut writer_dead_rx => { - log::error!("Tunnel writer to edge {} died, disconnecting immediately", edge_id); - disconnect_reason = "writer_dead".to_string(); - break; - } - _ = edge_token.cancelled() => { + remoteingress_protocol::TunnelEvent::Cancelled => { log::info!("Edge {} cancelled by hub", edge_id); disconnect_reason = "cancelled_by_hub".to_string(); break; @@ -792,7 +1052,6 @@ async fn handle_edge_connection( // Cleanup: cancel edge token to propagate to all child tasks edge_token.cancel(); config_handle.abort(); - writer_handle.abort(); { let mut edges = connected.lock().await; edges.remove(&edge_id); diff --git a/rust/crates/remoteingress-protocol/Cargo.toml b/rust/crates/remoteingress-protocol/Cargo.toml index 3d4d8aa..b72b423 100644 --- a/rust/crates/remoteingress-protocol/Cargo.toml +++ b/rust/crates/remoteingress-protocol/Cargo.toml @@ -4,7 +4,9 @@ version = "2.0.0" edition = "2021" [dependencies] -tokio = { version = "1", features = ["io-util"] } +tokio = { version = "1", features = ["io-util", "sync", "time"] } +tokio-util = "0.7" +log = "0.4" [dev-dependencies] tokio = { version = "1", features = ["io-util", "macros", "rt"] } diff --git a/rust/crates/remoteingress-protocol/src/lib.rs b/rust/crates/remoteingress-protocol/src/lib.rs index 38b6089..c9323fe 100644 --- a/rust/crates/remoteingress-protocol/src/lib.rs +++ b/rust/crates/remoteingress-protocol/src/lib.rs @@ -1,4 +1,8 @@ -use tokio::io::{AsyncRead, AsyncReadExt}; +use std::collections::VecDeque; +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf}; // Frame type constants pub const FRAME_OPEN: u8 = 0x01; @@ -120,9 +124,13 @@ impl FrameReader { ]); if length > MAX_PAYLOAD_SIZE { + log::error!( + "CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}", + self.header_buf, stream_id, frame_type, length + ); return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, - format!("frame payload too large: {} bytes", length), + format!("frame payload too large: {} bytes (header={:02x?})", length, self.header_buf), )); } @@ -144,6 +152,256 @@ impl FrameReader { } } +// --------------------------------------------------------------------------- +// TunnelIo: single-owner I/O multiplexer for the TLS tunnel connection +// --------------------------------------------------------------------------- + +/// Events produced by the TunnelIo event loop. +#[derive(Debug)] +pub enum TunnelEvent { + /// A complete frame was read from the remote side. + Frame(Frame), + /// The remote side closed the connection (EOF). + Eof, + /// A read error occurred. + ReadError(std::io::Error), + /// A write error occurred. + WriteError(std::io::Error), + /// No frames received for the liveness timeout duration. + LivenessTimeout, + /// The cancellation token was triggered. + Cancelled, +} + +/// Single-owner I/O engine for the tunnel TLS connection. +/// +/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex. +/// Uses two priority write queues: ctrl frames (PONG, WINDOW_UPDATE, CLOSE, OPEN) +/// are ALWAYS written before data frames (DATA, DATA_BACK). This prevents +/// WINDOW_UPDATE starvation that causes flow control deadlocks. +pub struct TunnelIo { + stream: S, + // Read state: accumulate bytes, parse frames incrementally + read_buf: Vec, + read_pos: usize, + // Write state: dual priority queues + ctrl_queue: VecDeque>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first + data_queue: VecDeque>, // DATA, DATA_BACK — only when ctrl is empty + write_offset: usize, // progress within current frame being written + flush_needed: bool, +} + +impl TunnelIo { + pub fn new(stream: S, initial_data: Vec) -> Self { + let read_pos = initial_data.len(); + let mut read_buf = initial_data; + if read_buf.capacity() < 65536 { + read_buf.reserve(65536 - read_buf.len()); + } + Self { + stream, + read_buf, + read_pos, + ctrl_queue: VecDeque::new(), + data_queue: VecDeque::new(), + write_offset: 0, + flush_needed: false, + } + } + + /// Queue a high-priority control frame (PONG, WINDOW_UPDATE, CLOSE, OPEN). + pub fn queue_ctrl(&mut self, frame: Vec) { + self.ctrl_queue.push_back(frame); + } + + /// Queue a lower-priority data frame (DATA, DATA_BACK). + pub fn queue_data(&mut self, frame: Vec) { + self.data_queue.push_back(frame); + } + + /// Try to parse a complete frame from the read buffer. + pub fn try_parse_frame(&mut self) -> Option> { + if self.read_pos < FRAME_HEADER_SIZE { + return None; + } + + let stream_id = u32::from_be_bytes([ + self.read_buf[0], self.read_buf[1], self.read_buf[2], self.read_buf[3], + ]); + let frame_type = self.read_buf[4]; + let length = u32::from_be_bytes([ + self.read_buf[5], self.read_buf[6], self.read_buf[7], self.read_buf[8], + ]); + + if length > MAX_PAYLOAD_SIZE { + let header = [ + self.read_buf[0], self.read_buf[1], self.read_buf[2], self.read_buf[3], + self.read_buf[4], self.read_buf[5], self.read_buf[6], self.read_buf[7], + self.read_buf[8], + ]; + log::error!( + "CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}", + header, stream_id, frame_type, length + ); + return Some(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("frame payload too large: {} bytes (header={:02x?})", length, header), + ))); + } + + let total_frame_size = FRAME_HEADER_SIZE + length as usize; + if self.read_pos < total_frame_size { + return None; + } + + let payload = self.read_buf[FRAME_HEADER_SIZE..total_frame_size].to_vec(); + self.read_buf.drain(..total_frame_size); + self.read_pos -= total_frame_size; + + Some(Ok(Frame { stream_id, frame_type, payload })) + } + + fn has_write_work(&self) -> bool { + !self.ctrl_queue.is_empty() || !self.data_queue.is_empty() + } + + /// Poll-based I/O step. Returns Ready on events, Pending when idle. + /// + /// Order: write(ctrl→data) → flush → read → channels → timers + pub fn poll_step( + &mut self, + cx: &mut Context<'_>, + ctrl_rx: &mut tokio::sync::mpsc::Receiver>, + data_rx: &mut tokio::sync::mpsc::Receiver>, + liveness_deadline: &mut Pin>, + cancel_token: &tokio_util::sync::CancellationToken, + ) -> Poll { + // 1. WRITE: drain ctrl queue first, then data queue. + // TLS poll_write writes plaintext to session buffer (always Ready). + // Batch up to 16 frames per poll cycle. + let mut writes = 0; + while self.has_write_work() && writes < 16 { + // Determine which queue to write from and the frame data. + // We access the queues via raw pointers to avoid borrow conflicts with self.stream. + let from_ctrl = !self.ctrl_queue.is_empty(); + let frame_ptr: *const Vec = if from_ctrl { + self.ctrl_queue.front().unwrap() + } else { + self.data_queue.front().unwrap() + }; + // SAFETY: the frame is not modified while we hold the pointer — poll_write + // only writes to self.stream, and advance_write only runs after poll_write returns. + let frame = unsafe { &*frame_ptr }; + let remaining = &frame[self.write_offset..]; + + match Pin::new(&mut self.stream).poll_write(cx, remaining) { + Poll::Ready(Ok(0)) => { + return Poll::Ready(TunnelEvent::WriteError( + std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"), + )); + } + Poll::Ready(Ok(n)) => { + self.write_offset += n; + self.flush_needed = true; + if self.write_offset >= frame.len() { + if from_ctrl { self.ctrl_queue.pop_front(); } + else { self.data_queue.pop_front(); } + self.write_offset = 0; + writes += 1; + } + } + Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)), + Poll::Pending => break, + } + } + + // 2. FLUSH: push encrypted data from TLS session to TCP. + if self.flush_needed { + match Pin::new(&mut self.stream).poll_flush(cx) { + Poll::Ready(Ok(())) => self.flush_needed = false, + Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)), + Poll::Pending => {} // TCP waker will notify us + } + } + + // 3. READ: drain stream until Pending to ensure the TCP waker is always registered. + // Without this loop, a Ready return with partial frame data would consume + // the waker without re-registering it, causing the task to sleep until a + // timer or channel wakes it (potentially 15+ seconds of lost reads). + loop { + if self.read_buf.len() < self.read_pos + 32768 { + self.read_buf.resize(self.read_pos + 32768, 0); + } + let mut rbuf = ReadBuf::new(&mut self.read_buf[self.read_pos..]); + match Pin::new(&mut self.stream).poll_read(cx, &mut rbuf) { + Poll::Ready(Ok(())) => { + let n = rbuf.filled().len(); + if n == 0 { + return Poll::Ready(TunnelEvent::Eof); + } + self.read_pos += n; + if let Some(result) = self.try_parse_frame() { + return match result { + Ok(frame) => Poll::Ready(TunnelEvent::Frame(frame)), + Err(e) => Poll::Ready(TunnelEvent::ReadError(e)), + }; + } + // Partial data — loop to call poll_read again so the TCP + // waker is re-registered when it finally returns Pending. + } + Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::ReadError(e)), + Poll::Pending => break, + } + } + + // 4. CHANNELS: drain ctrl into ctrl_queue, data into data_queue. + let mut got_new = false; + loop { + match ctrl_rx.poll_recv(cx) { + Poll::Ready(Some(frame)) => { self.ctrl_queue.push_back(frame); got_new = true; } + Poll::Ready(None) => { + return Poll::Ready(TunnelEvent::WriteError( + std::io::Error::new(std::io::ErrorKind::BrokenPipe, "ctrl channel closed"), + )); + } + Poll::Pending => break, + } + } + loop { + match data_rx.poll_recv(cx) { + Poll::Ready(Some(frame)) => { self.data_queue.push_back(frame); got_new = true; } + Poll::Ready(None) => { + return Poll::Ready(TunnelEvent::WriteError( + std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"), + )); + } + Poll::Pending => break, + } + } + + // 5. TIMERS + if liveness_deadline.as_mut().poll(cx).is_ready() { + return Poll::Ready(TunnelEvent::LivenessTimeout); + } + if cancel_token.is_cancelled() { + return Poll::Ready(TunnelEvent::Cancelled); + } + + // 6. SELF-WAKE: only when we have frames AND flush is done. + // If flush is pending, the TCP write-readiness waker will notify us. + // If we got new channel frames, wake to write them. + if got_new || (!self.flush_needed && self.has_write_work()) { + cx.waker().wake_by_ref(); + } + + Poll::Pending + } + + pub fn into_inner(self) -> S { + self.stream + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 4b59805..9eb320c 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@serve.zone/remoteingress', - version: '4.8.1', + version: '4.8.2', description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.' }