Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1979910f6f | |||
| edfad2dffe | |||
| d907943ae5 | |||
| 4bfb1244fc | |||
| e31c3421a6 | |||
| de8422966a | |||
| a87e9578eb | |||
| b851bc7994 |
27
changelog.md
27
changelog.md
@@ -1,5 +1,32 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-03-17 - 4.8.2 - fix(rust-edge)
|
||||
refactor tunnel I/O to preserve TLS state and prioritize control frames
|
||||
|
||||
- replace split TLS handling with a single-owner TunnelIo to avoid handshake and buffered read corruption
|
||||
- prioritize control frames over data frames to prevent WINDOW_UPDATE starvation and flow-control deadlocks
|
||||
- improve tunnel reliability with incremental frame parsing, liveness/error events, and corrupt frame header logging
|
||||
|
||||
## 2026-03-17 - 4.8.1 - fix(remoteingress-core)
|
||||
remove tunnel writer timeouts from edge and hub buffered writes
|
||||
|
||||
- Drops the 30 second timeout wrapper around writer.write_all and writer.flush in both edge and hub tunnel writers.
|
||||
- Updates error logging to report write failures without referring to stalled writes.
|
||||
|
||||
## 2026-03-17 - 4.8.0 - feat(events)
|
||||
include disconnect reasons in edge and hub management events
|
||||
|
||||
- Add reason fields to tunnelDisconnected and edgeDisconnected events emitted from the Rust core and binary bridge
|
||||
- Propagate specific disconnect causes such as EOF, liveness timeout, writer failure, handshake failure, and hub cancellation
|
||||
- Update TypeScript edge and hub classes to log and forward disconnect reason data
|
||||
- Extend serialization tests to cover the new reason fields
|
||||
|
||||
## 2026-03-17 - 4.7.2 - fix(remoteingress-core)
|
||||
add tunnel write timeouts and scale initial stream windows by active stream count
|
||||
|
||||
- Wrap tunnel frame writes and flushes in a 30-second timeout on both edge and hub to detect stalled writers and trigger faster reconnect or cleanup.
|
||||
- Compute each stream's initial send window from the current active stream count instead of using a fixed window to keep total in-flight data within the 32MB budget.
|
||||
|
||||
## 2026-03-17 - 4.7.1 - fix(remoteingress-core)
|
||||
improve tunnel failure detection and reconnect handling
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@serve.zone/remoteingress",
|
||||
"version": "4.7.1",
|
||||
"version": "4.8.2",
|
||||
"private": false,
|
||||
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
|
||||
"main": "dist_ts/index.js",
|
||||
|
||||
2
rust/Cargo.lock
generated
2
rust/Cargo.lock
generated
@@ -568,7 +568,9 @@ dependencies = [
|
||||
name = "remoteingress-protocol"
|
||||
version = "2.0.0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -173,10 +173,10 @@ async fn handle_request(
|
||||
serde_json::json!({ "edgeId": edge_id, "peerAddr": peer_addr }),
|
||||
);
|
||||
}
|
||||
HubEvent::EdgeDisconnected { edge_id } => {
|
||||
HubEvent::EdgeDisconnected { edge_id, reason } => {
|
||||
send_event(
|
||||
"edgeDisconnected",
|
||||
serde_json::json!({ "edgeId": edge_id }),
|
||||
serde_json::json!({ "edgeId": edge_id, "reason": reason }),
|
||||
);
|
||||
}
|
||||
HubEvent::StreamOpened {
|
||||
@@ -295,8 +295,8 @@ async fn handle_request(
|
||||
EdgeEvent::TunnelConnected => {
|
||||
send_event("tunnelConnected", serde_json::json!({}));
|
||||
}
|
||||
EdgeEvent::TunnelDisconnected => {
|
||||
send_event("tunnelDisconnected", serde_json::json!({}));
|
||||
EdgeEvent::TunnelDisconnected { reason } => {
|
||||
send_event("tunnelDisconnected", serde_json::json!({ "reason": reason }));
|
||||
}
|
||||
EdgeEvent::PublicIpDiscovered { ip } => {
|
||||
send_event(
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::sync::{mpsc, Mutex, Notify, RwLock};
|
||||
use tokio::task::JoinHandle;
|
||||
@@ -64,7 +64,8 @@ struct ConfigUpdate {
|
||||
#[serde(tag = "type")]
|
||||
pub enum EdgeEvent {
|
||||
TunnelConnected,
|
||||
TunnelDisconnected,
|
||||
#[serde(rename_all = "camelCase")]
|
||||
TunnelDisconnected { reason: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
PublicIpDiscovered { ip: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -236,10 +237,15 @@ async fn edge_main_loop(
|
||||
}
|
||||
|
||||
*connected.write().await = false;
|
||||
// Extract reason for disconnect event
|
||||
let reason = match &result {
|
||||
EdgeLoopResult::Reconnect(r) => r.clone(),
|
||||
EdgeLoopResult::Shutdown => "shutdown".to_string(),
|
||||
};
|
||||
// Only emit disconnect event on actual disconnection, not on failed reconnects.
|
||||
// Failed reconnects never reach line 335 (handshake success), so was_connected is false.
|
||||
if was_connected {
|
||||
let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected);
|
||||
let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected { reason: reason.clone() });
|
||||
}
|
||||
active_streams.store(0, Ordering::Relaxed);
|
||||
// Reset stream ID counter for next connection cycle
|
||||
@@ -248,7 +254,7 @@ async fn edge_main_loop(
|
||||
|
||||
match result {
|
||||
EdgeLoopResult::Shutdown => break,
|
||||
EdgeLoopResult::Reconnect => {
|
||||
EdgeLoopResult::Reconnect(_) => {
|
||||
log::info!("Reconnecting in {}ms...", backoff_ms);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_millis(backoff_ms)) => {}
|
||||
@@ -263,7 +269,7 @@ async fn edge_main_loop(
|
||||
|
||||
enum EdgeLoopResult {
|
||||
Shutdown,
|
||||
Reconnect,
|
||||
Reconnect(String), // reason for disconnection
|
||||
}
|
||||
|
||||
async fn connect_to_hub_and_run(
|
||||
@@ -295,49 +301,59 @@ async fn connect_to_hub_and_run(
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to connect to hub at {}: {}", addr, e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("tcp_connect_failed: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
let server_name = rustls::pki_types::ServerName::try_from(config.hub_host.clone())
|
||||
.unwrap_or_else(|_| rustls::pki_types::ServerName::try_from("remoteingress-hub".to_string()).unwrap());
|
||||
|
||||
let tls_stream = match connector.connect(server_name, tcp).await {
|
||||
let mut tls_stream = match connector.connect(server_name, tcp).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
log::error!("TLS handshake failed: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("tls_handshake_failed: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
let (read_half, mut write_half) = tokio::io::split(tls_stream);
|
||||
|
||||
// Send auth line
|
||||
// Send auth line (we own the whole stream — no split)
|
||||
let auth_line = format!("EDGE {} {}\n", config.edge_id, config.secret);
|
||||
if write_half.write_all(auth_line.as_bytes()).await.is_err() {
|
||||
return EdgeLoopResult::Reconnect;
|
||||
if tls_stream.write_all(auth_line.as_bytes()).await.is_err() {
|
||||
return EdgeLoopResult::Reconnect("auth_write_failed".to_string());
|
||||
}
|
||||
if tls_stream.flush().await.is_err() {
|
||||
return EdgeLoopResult::Reconnect("auth_flush_failed".to_string());
|
||||
}
|
||||
|
||||
// Read handshake response line from hub (JSON with initial config)
|
||||
let mut buf_reader = BufReader::new(read_half);
|
||||
let mut handshake_line = String::new();
|
||||
match buf_reader.read_line(&mut handshake_line).await {
|
||||
Ok(0) => {
|
||||
log::error!("Hub rejected connection (EOF before handshake)");
|
||||
return EdgeLoopResult::Reconnect;
|
||||
}
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
log::error!("Failed to read handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
// Read handshake line byte-by-byte (no BufReader — into_inner corrupts TLS state)
|
||||
let mut handshake_bytes = Vec::with_capacity(512);
|
||||
let mut byte = [0u8; 1];
|
||||
loop {
|
||||
match tls_stream.read_exact(&mut byte).await {
|
||||
Ok(_) => {
|
||||
handshake_bytes.push(byte[0]);
|
||||
if byte[0] == b'\n' { break; }
|
||||
if handshake_bytes.len() > 8192 {
|
||||
return EdgeLoopResult::Reconnect("handshake_too_long".to_string());
|
||||
}
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
|
||||
log::error!("Hub rejected connection (EOF before handshake)");
|
||||
return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string());
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to read handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
let handshake_line = String::from_utf8_lossy(&handshake_bytes);
|
||||
|
||||
let handshake: HandshakeConfig = match serde_json::from_str(handshake_line.trim()) {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
log::error!("Invalid handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("handshake_invalid: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -388,52 +404,13 @@ async fn connect_to_hub_and_run(
|
||||
let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
// QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN)
|
||||
// have priority over data frames (DATA). Prevents PING starvation under load.
|
||||
// QoS dual-channel: ctrl frames have priority over data frames.
|
||||
// Stream handlers send through these channels → TunnelIo drains them.
|
||||
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
||||
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
|
||||
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
|
||||
let tw_token = connection_token.clone();
|
||||
// Oneshot to signal the reader loop when the writer dies from a write error.
|
||||
// This avoids the 45s liveness timeout delay when the tunnel is already dead.
|
||||
let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>();
|
||||
let tunnel_writer_handle = tokio::spawn(async move {
|
||||
// BufWriter coalesces small writes (frame headers, control frames) into fewer
|
||||
// TLS records and syscalls. Flushed after each frame to avoid holding data.
|
||||
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
|
||||
let mut write_error = false;
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased; // control frames always take priority over data
|
||||
ctrl = tunnel_ctrl_rx.recv() => {
|
||||
match ctrl {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { write_error = true; break; }
|
||||
if writer.flush().await.is_err() { write_error = true; break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
data = tunnel_data_rx.recv() => {
|
||||
match data {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { write_error = true; break; }
|
||||
if writer.flush().await.is_err() { write_error = true; break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = tw_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
if write_error {
|
||||
log::error!("Tunnel writer failed, signalling reader for fast reconnect");
|
||||
let _ = writer_dead_tx.send(());
|
||||
}
|
||||
});
|
||||
|
||||
// Start TCP listeners for initial ports (hot-reloadable)
|
||||
// Start TCP listeners for initial ports
|
||||
let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new();
|
||||
let bind_address = config.bind_address.as_deref().unwrap_or("0.0.0.0");
|
||||
apply_port_config(
|
||||
@@ -449,122 +426,180 @@ async fn connect_to_hub_and_run(
|
||||
bind_address,
|
||||
);
|
||||
|
||||
// Heartbeat: liveness timeout detects silent hub failures
|
||||
// Single-owner I/O engine — no tokio::io::split, no mutex
|
||||
let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());
|
||||
|
||||
let liveness_timeout_dur = Duration::from_secs(45);
|
||||
let mut last_activity = Instant::now();
|
||||
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
||||
|
||||
// Read frames from hub
|
||||
let mut frame_reader = FrameReader::new(buf_reader);
|
||||
let result = loop {
|
||||
tokio::select! {
|
||||
frame_result = frame_reader.next_frame() => {
|
||||
match frame_result {
|
||||
Ok(Some(frame)) => {
|
||||
// Reset liveness on any received frame
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
// Non-blocking dispatch to per-stream channel.
|
||||
// With flow control, the sender should rarely exceed the channel capacity.
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} back-channel full, closing stream", frame.stream_id);
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
let result = 'io_loop: loop {
|
||||
// Drain any buffered frames
|
||||
loop {
|
||||
match tunnel_io.try_parse_frame() {
|
||||
Some(Ok(frame)) => {
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
}
|
||||
FRAME_WINDOW_UPDATE_BACK => {
|
||||
// Hub consumed data — increase our send window for this stream (upload direction)
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
FRAME_WINDOW_UPDATE_BACK => {
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_CLOSE_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
}
|
||||
FRAME_CLOSE_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
FRAME_CONFIG => {
|
||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||
*listen_ports.write().await = update.listen_ports.clone();
|
||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||
listen_ports: update.listen_ports.clone(),
|
||||
});
|
||||
apply_port_config(
|
||||
&update.listen_ports,
|
||||
&mut port_listeners,
|
||||
&tunnel_writer_tx,
|
||||
&tunnel_data_tx,
|
||||
&client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
&config.edge_id,
|
||||
connection_token,
|
||||
bind_address,
|
||||
);
|
||||
}
|
||||
}
|
||||
FRAME_PING => {
|
||||
// Queue PONG directly — no channel round-trip, guaranteed delivery
|
||||
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
log::error!("Hub frame error: {}", e);
|
||||
break 'io_loop EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Poll I/O: write(ctrl→data), flush, read, channels, timers
|
||||
let event = std::future::poll_fn(|cx| {
|
||||
tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut liveness_deadline, connection_token)
|
||||
}).await;
|
||||
|
||||
match event {
|
||||
remoteingress_protocol::TunnelEvent::Frame(frame) => {
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
FRAME_CONFIG => {
|
||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||
*listen_ports.write().await = update.listen_ports.clone();
|
||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||
listen_ports: update.listen_ports.clone(),
|
||||
});
|
||||
apply_port_config(
|
||||
&update.listen_ports,
|
||||
&mut port_listeners,
|
||||
&tunnel_writer_tx,
|
||||
&tunnel_data_tx,
|
||||
&client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
&config.edge_id,
|
||||
connection_token,
|
||||
bind_address,
|
||||
);
|
||||
}
|
||||
}
|
||||
FRAME_PING => {
|
||||
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
|
||||
if tunnel_writer_tx.try_send(pong_frame).is_err() {
|
||||
// Control channel full (WINDOW_UPDATE burst from many streams).
|
||||
// DON'T disconnect — the 45s liveness timeout gives margin
|
||||
// for the channel to drain and the next PONG to succeed.
|
||||
log::warn!("PONG send failed, control channel full — skipping this cycle");
|
||||
}
|
||||
log::trace!("Received PING from hub, sent PONG");
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
log::info!("Hub disconnected (EOF)");
|
||||
break EdgeLoopResult::Reconnect;
|
||||
FRAME_WINDOW_UPDATE_BACK => {
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Hub frame error: {}", e);
|
||||
break EdgeLoopResult::Reconnect;
|
||||
FRAME_CLOSE_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
FRAME_CONFIG => {
|
||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||
*listen_ports.write().await = update.listen_ports.clone();
|
||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||
listen_ports: update.listen_ports.clone(),
|
||||
});
|
||||
apply_port_config(
|
||||
&update.listen_ports,
|
||||
&mut port_listeners,
|
||||
&tunnel_writer_tx,
|
||||
&tunnel_data_tx,
|
||||
&client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
&config.edge_id,
|
||||
connection_token,
|
||||
bind_address,
|
||||
);
|
||||
}
|
||||
}
|
||||
FRAME_PING => {
|
||||
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = &mut liveness_deadline => {
|
||||
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting",
|
||||
liveness_timeout_dur.as_secs());
|
||||
break EdgeLoopResult::Reconnect;
|
||||
remoteingress_protocol::TunnelEvent::Eof => {
|
||||
log::info!("Hub disconnected (EOF)");
|
||||
break EdgeLoopResult::Reconnect("hub_eof".to_string());
|
||||
}
|
||||
_ = &mut writer_dead_rx => {
|
||||
log::error!("Tunnel writer died, reconnecting immediately");
|
||||
break EdgeLoopResult::Reconnect;
|
||||
remoteingress_protocol::TunnelEvent::ReadError(e) => {
|
||||
log::error!("Hub frame read error: {}", e);
|
||||
break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||
}
|
||||
_ = connection_token.cancelled() => {
|
||||
log::info!("Connection cancelled");
|
||||
break EdgeLoopResult::Shutdown;
|
||||
remoteingress_protocol::TunnelEvent::WriteError(e) => {
|
||||
log::error!("Tunnel write error: {}", e);
|
||||
break EdgeLoopResult::Reconnect(format!("tunnel_write_error: {}", e));
|
||||
}
|
||||
_ = shutdown_rx.recv() => {
|
||||
remoteingress_protocol::TunnelEvent::LivenessTimeout => {
|
||||
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting", liveness_timeout_dur.as_secs());
|
||||
break EdgeLoopResult::Reconnect("liveness_timeout".to_string());
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::Cancelled => {
|
||||
if shutdown_rx.try_recv().is_ok() {
|
||||
break EdgeLoopResult::Shutdown;
|
||||
}
|
||||
break EdgeLoopResult::Shutdown;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Cancel connection token to propagate to all child tasks BEFORE aborting
|
||||
// Cleanup
|
||||
connection_token.cancel();
|
||||
stun_handle.abort();
|
||||
tunnel_writer_handle.abort();
|
||||
for (_, h) in port_listeners.drain() {
|
||||
h.abort();
|
||||
}
|
||||
@@ -711,8 +746,13 @@ async fn handle_client_connection(
|
||||
}
|
||||
|
||||
// Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
|
||||
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
|
||||
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(1024);
|
||||
// Adaptive initial window: scale with current stream count to keep total in-flight
|
||||
// data within the 32MB budget. Prevents burst flooding when many streams open.
|
||||
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||
active_streams.load(Ordering::Relaxed),
|
||||
);
|
||||
let send_window = Arc::new(AtomicU32::new(initial_window));
|
||||
let window_notify = Arc::new(Notify::new());
|
||||
{
|
||||
let mut writers = client_writers.lock().await;
|
||||
@@ -951,9 +991,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_edge_event_tunnel_disconnected() {
|
||||
let event = EdgeEvent::TunnelDisconnected;
|
||||
let event = EdgeEvent::TunnelDisconnected { reason: "hub_eof".to_string() };
|
||||
let json = serde_json::to_value(&event).unwrap();
|
||||
assert_eq!(json["type"], "tunnelDisconnected");
|
||||
assert_eq!(json["reason"], "hub_eof");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,9 @@ version = "2.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1", features = ["io-util"] }
|
||||
tokio = { version = "1", features = ["io-util", "sync", "time"] }
|
||||
tokio-util = "0.7"
|
||||
log = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["io-util", "macros", "rt"] }
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
use std::collections::VecDeque;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
|
||||
|
||||
// Frame type constants
|
||||
pub const FRAME_OPEN: u8 = 0x01;
|
||||
@@ -120,9 +124,13 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
||||
]);
|
||||
|
||||
if length > MAX_PAYLOAD_SIZE {
|
||||
log::error!(
|
||||
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||
self.header_buf, stream_id, frame_type, length
|
||||
);
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("frame payload too large: {} bytes", length),
|
||||
format!("frame payload too large: {} bytes (header={:02x?})", length, self.header_buf),
|
||||
));
|
||||
}
|
||||
|
||||
@@ -144,6 +152,256 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TunnelIo: single-owner I/O multiplexer for the TLS tunnel connection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Events produced by the TunnelIo event loop.
|
||||
#[derive(Debug)]
|
||||
pub enum TunnelEvent {
|
||||
/// A complete frame was read from the remote side.
|
||||
Frame(Frame),
|
||||
/// The remote side closed the connection (EOF).
|
||||
Eof,
|
||||
/// A read error occurred.
|
||||
ReadError(std::io::Error),
|
||||
/// A write error occurred.
|
||||
WriteError(std::io::Error),
|
||||
/// No frames received for the liveness timeout duration.
|
||||
LivenessTimeout,
|
||||
/// The cancellation token was triggered.
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
/// Single-owner I/O engine for the tunnel TLS connection.
|
||||
///
|
||||
/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex.
|
||||
/// Uses two priority write queues: ctrl frames (PONG, WINDOW_UPDATE, CLOSE, OPEN)
|
||||
/// are ALWAYS written before data frames (DATA, DATA_BACK). This prevents
|
||||
/// WINDOW_UPDATE starvation that causes flow control deadlocks.
|
||||
pub struct TunnelIo<S> {
|
||||
stream: S,
|
||||
// Read state: accumulate bytes, parse frames incrementally
|
||||
read_buf: Vec<u8>,
|
||||
read_pos: usize,
|
||||
// Write state: dual priority queues
|
||||
ctrl_queue: VecDeque<Vec<u8>>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
|
||||
data_queue: VecDeque<Vec<u8>>, // DATA, DATA_BACK — only when ctrl is empty
|
||||
write_offset: usize, // progress within current frame being written
|
||||
flush_needed: bool,
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
pub fn new(stream: S, initial_data: Vec<u8>) -> Self {
|
||||
let read_pos = initial_data.len();
|
||||
let mut read_buf = initial_data;
|
||||
if read_buf.capacity() < 65536 {
|
||||
read_buf.reserve(65536 - read_buf.len());
|
||||
}
|
||||
Self {
|
||||
stream,
|
||||
read_buf,
|
||||
read_pos,
|
||||
ctrl_queue: VecDeque::new(),
|
||||
data_queue: VecDeque::new(),
|
||||
write_offset: 0,
|
||||
flush_needed: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Queue a high-priority control frame (PONG, WINDOW_UPDATE, CLOSE, OPEN).
|
||||
pub fn queue_ctrl(&mut self, frame: Vec<u8>) {
|
||||
self.ctrl_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Queue a lower-priority data frame (DATA, DATA_BACK).
|
||||
pub fn queue_data(&mut self, frame: Vec<u8>) {
|
||||
self.data_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Try to parse a complete frame from the read buffer.
|
||||
pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> {
|
||||
if self.read_pos < FRAME_HEADER_SIZE {
|
||||
return None;
|
||||
}
|
||||
|
||||
let stream_id = u32::from_be_bytes([
|
||||
self.read_buf[0], self.read_buf[1], self.read_buf[2], self.read_buf[3],
|
||||
]);
|
||||
let frame_type = self.read_buf[4];
|
||||
let length = u32::from_be_bytes([
|
||||
self.read_buf[5], self.read_buf[6], self.read_buf[7], self.read_buf[8],
|
||||
]);
|
||||
|
||||
if length > MAX_PAYLOAD_SIZE {
|
||||
let header = [
|
||||
self.read_buf[0], self.read_buf[1], self.read_buf[2], self.read_buf[3],
|
||||
self.read_buf[4], self.read_buf[5], self.read_buf[6], self.read_buf[7],
|
||||
self.read_buf[8],
|
||||
];
|
||||
log::error!(
|
||||
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||
header, stream_id, frame_type, length
|
||||
);
|
||||
return Some(Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("frame payload too large: {} bytes (header={:02x?})", length, header),
|
||||
)));
|
||||
}
|
||||
|
||||
let total_frame_size = FRAME_HEADER_SIZE + length as usize;
|
||||
if self.read_pos < total_frame_size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let payload = self.read_buf[FRAME_HEADER_SIZE..total_frame_size].to_vec();
|
||||
self.read_buf.drain(..total_frame_size);
|
||||
self.read_pos -= total_frame_size;
|
||||
|
||||
Some(Ok(Frame { stream_id, frame_type, payload }))
|
||||
}
|
||||
|
||||
fn has_write_work(&self) -> bool {
|
||||
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty()
|
||||
}
|
||||
|
||||
/// Poll-based I/O step. Returns Ready on events, Pending when idle.
|
||||
///
|
||||
/// Order: write(ctrl→data) → flush → read → channels → timers
|
||||
pub fn poll_step(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
ctrl_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
|
||||
data_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
|
||||
liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
|
||||
cancel_token: &tokio_util::sync::CancellationToken,
|
||||
) -> Poll<TunnelEvent> {
|
||||
// 1. WRITE: drain ctrl queue first, then data queue.
|
||||
// TLS poll_write writes plaintext to session buffer (always Ready).
|
||||
// Batch up to 16 frames per poll cycle.
|
||||
let mut writes = 0;
|
||||
while self.has_write_work() && writes < 16 {
|
||||
// Determine which queue to write from and the frame data.
|
||||
// We access the queues via raw pointers to avoid borrow conflicts with self.stream.
|
||||
let from_ctrl = !self.ctrl_queue.is_empty();
|
||||
let frame_ptr: *const Vec<u8> = if from_ctrl {
|
||||
self.ctrl_queue.front().unwrap()
|
||||
} else {
|
||||
self.data_queue.front().unwrap()
|
||||
};
|
||||
// SAFETY: the frame is not modified while we hold the pointer — poll_write
|
||||
// only writes to self.stream, and advance_write only runs after poll_write returns.
|
||||
let frame = unsafe { &*frame_ptr };
|
||||
let remaining = &frame[self.write_offset..];
|
||||
|
||||
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||
));
|
||||
}
|
||||
Poll::Ready(Ok(n)) => {
|
||||
self.write_offset += n;
|
||||
self.flush_needed = true;
|
||||
if self.write_offset >= frame.len() {
|
||||
if from_ctrl { self.ctrl_queue.pop_front(); }
|
||||
else { self.data_queue.pop_front(); }
|
||||
self.write_offset = 0;
|
||||
writes += 1;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 2. FLUSH: push encrypted data from TLS session to TCP.
|
||||
if self.flush_needed {
|
||||
match Pin::new(&mut self.stream).poll_flush(cx) {
|
||||
Poll::Ready(Ok(())) => self.flush_needed = false,
|
||||
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
|
||||
Poll::Pending => {} // TCP waker will notify us
|
||||
}
|
||||
}
|
||||
|
||||
// 3. READ: drain stream until Pending to ensure the TCP waker is always registered.
|
||||
// Without this loop, a Ready return with partial frame data would consume
|
||||
// the waker without re-registering it, causing the task to sleep until a
|
||||
// timer or channel wakes it (potentially 15+ seconds of lost reads).
|
||||
loop {
|
||||
if self.read_buf.len() < self.read_pos + 32768 {
|
||||
self.read_buf.resize(self.read_pos + 32768, 0);
|
||||
}
|
||||
let mut rbuf = ReadBuf::new(&mut self.read_buf[self.read_pos..]);
|
||||
match Pin::new(&mut self.stream).poll_read(cx, &mut rbuf) {
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = rbuf.filled().len();
|
||||
if n == 0 {
|
||||
return Poll::Ready(TunnelEvent::Eof);
|
||||
}
|
||||
self.read_pos += n;
|
||||
if let Some(result) = self.try_parse_frame() {
|
||||
return match result {
|
||||
Ok(frame) => Poll::Ready(TunnelEvent::Frame(frame)),
|
||||
Err(e) => Poll::Ready(TunnelEvent::ReadError(e)),
|
||||
};
|
||||
}
|
||||
// Partial data — loop to call poll_read again so the TCP
|
||||
// waker is re-registered when it finally returns Pending.
|
||||
}
|
||||
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::ReadError(e)),
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 4. CHANNELS: drain ctrl into ctrl_queue, data into data_queue.
|
||||
let mut got_new = false;
|
||||
loop {
|
||||
match ctrl_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.ctrl_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "ctrl channel closed"),
|
||||
));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
loop {
|
||||
match data_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.data_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"),
|
||||
));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 5. TIMERS
|
||||
if liveness_deadline.as_mut().poll(cx).is_ready() {
|
||||
return Poll::Ready(TunnelEvent::LivenessTimeout);
|
||||
}
|
||||
if cancel_token.is_cancelled() {
|
||||
return Poll::Ready(TunnelEvent::Cancelled);
|
||||
}
|
||||
|
||||
// 6. SELF-WAKE: only when we have frames AND flush is done.
|
||||
// If flush is pending, the TCP write-readiness waker will notify us.
|
||||
// If we got new channel frames, wake to write them.
|
||||
if got_new || (!self.flush_needed && self.has_write_work()) {
|
||||
cx.waker().wake_by_ref();
|
||||
}
|
||||
|
||||
Poll::Pending
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> S {
|
||||
self.stream
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@serve.zone/remoteingress',
|
||||
version: '4.7.1',
|
||||
version: '4.8.2',
|
||||
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
|
||||
}
|
||||
|
||||
@@ -83,8 +83,10 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
this.bridge.on('management:tunnelConnected', () => {
|
||||
this.emit('tunnelConnected');
|
||||
});
|
||||
this.bridge.on('management:tunnelDisconnected', () => {
|
||||
this.emit('tunnelDisconnected');
|
||||
this.bridge.on('management:tunnelDisconnected', (data: { reason?: string }) => {
|
||||
const reason = data?.reason ?? 'unknown';
|
||||
console.log(`[RemoteIngressEdge] Tunnel disconnected: ${reason}`);
|
||||
this.emit('tunnelDisconnected', data);
|
||||
});
|
||||
this.bridge.on('management:publicIpDiscovered', (data: { ip: string }) => {
|
||||
this.emit('publicIpDiscovered', data);
|
||||
|
||||
@@ -93,7 +93,9 @@ export class RemoteIngressHub extends EventEmitter {
|
||||
this.bridge.on('management:edgeConnected', (data: { edgeId: string; peerAddr: string }) => {
|
||||
this.emit('edgeConnected', data);
|
||||
});
|
||||
this.bridge.on('management:edgeDisconnected', (data: { edgeId: string }) => {
|
||||
this.bridge.on('management:edgeDisconnected', (data: { edgeId: string; reason?: string }) => {
|
||||
const reason = data?.reason ?? 'unknown';
|
||||
console.log(`[RemoteIngressHub] Edge ${data.edgeId} disconnected: ${reason}`);
|
||||
this.emit('edgeDisconnected', data);
|
||||
});
|
||||
this.bridge.on('management:streamOpened', (data: { edgeId: string; streamId: number }) => {
|
||||
|
||||
Reference in New Issue
Block a user