Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 04586aab39 | |||
| f9a739858d | |||
| da01fbeecd | |||
| 264e8eeb97 | |||
| 9922c3b020 | |||
| 38cde37cff | |||
| 64572827e5 | |||
| c4e26198b9 | |||
| 0b5d72de28 | |||
| e8431c0174 | |||
| d57d6395dd | |||
| 2e5ceeaf5c | |||
| 1979910f6f | |||
| edfad2dffe | |||
| d907943ae5 | |||
| 4bfb1244fc |
49
changelog.md
49
changelog.md
@@ -1,5 +1,54 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.8 - fix(remoteingress-core)
|
||||||
|
cancel stale edge connections when an edge reconnects
|
||||||
|
|
||||||
|
- Remove any existing edge entry before registering a reconnected edge
|
||||||
|
- Trigger the previous connection's cancellation token so stale sessions shut down immediately instead of waiting for TCP keepalive
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.7 - fix(remoteingress-core)
|
||||||
|
perform graceful TLS shutdown on edge and hub tunnel streams
|
||||||
|
|
||||||
|
- Send TLS close_notify before cleanup to avoid peer disconnect warnings on both tunnel endpoints
|
||||||
|
- Wrap stream shutdown in a 2 second timeout so connection teardown does not block cleanup
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.6 - fix(remoteingress-core)
|
||||||
|
initialize disconnect reason only when set in hub loop break paths
|
||||||
|
|
||||||
|
- Replace the default "unknown" disconnect reason with an explicitly assigned string and document that all hub loop exits set it before use
|
||||||
|
- Add an allow attribute for unused assignments to avoid warnings around the deferred initialization pattern
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.5 - fix(repo)
|
||||||
|
no changes to commit
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.4 - fix(remoteingress-core)
|
||||||
|
prevent stream stalls by guaranteeing flow-control updates and avoiding bounded per-stream channel overflows
|
||||||
|
|
||||||
|
- Replace bounded per-stream data channels with unbounded channels on edge and hub, relying on existing WINDOW_UPDATE flow control to limit bytes in flight
|
||||||
|
- Use awaited sends for FRAME_WINDOW_UPDATE and FRAME_WINDOW_UPDATE_BACK so updates are not dropped and streams do not deadlock under backpressure
|
||||||
|
- Clean up stream state when channel receivers have already exited instead of closing active streams because a bounded queue filled
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.3 - fix(protocol,edge)
|
||||||
|
optimize tunnel frame handling and zero-copy uploads in edge I/O
|
||||||
|
|
||||||
|
- extract hub frame processing into a shared edge handler to remove duplicated tunnel logic
|
||||||
|
- add zero-copy frame header encoding and read payloads directly into framed buffers for client-to-hub uploads
|
||||||
|
- refactor TunnelIo read/write state to avoid unsafe queue access and reduce buffer churn with incremental parsing
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.2 - fix(rust-edge)
|
||||||
|
refactor tunnel I/O to preserve TLS state and prioritize control frames
|
||||||
|
|
||||||
|
- replace split TLS handling with a single-owner TunnelIo to avoid handshake and buffered read corruption
|
||||||
|
- prioritize control frames over data frames to prevent WINDOW_UPDATE starvation and flow-control deadlocks
|
||||||
|
- improve tunnel reliability with incremental frame parsing, liveness/error events, and corrupt frame header logging
|
||||||
|
|
||||||
|
## 2026-03-17 - 4.8.1 - fix(remoteingress-core)
|
||||||
|
remove tunnel writer timeouts from edge and hub buffered writes
|
||||||
|
|
||||||
|
- Drops the 30 second timeout wrapper around writer.write_all and writer.flush in both edge and hub tunnel writers.
|
||||||
|
- Updates error logging to report write failures without referring to stalled writes.
|
||||||
|
|
||||||
## 2026-03-17 - 4.8.0 - feat(events)
|
## 2026-03-17 - 4.8.0 - feat(events)
|
||||||
include disconnect reasons in edge and hub management events
|
include disconnect reasons in edge and hub management events
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@serve.zone/remoteingress",
|
"name": "@serve.zone/remoteingress",
|
||||||
"version": "4.8.0",
|
"version": "4.8.8",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
|
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
|
|||||||
2
rust/Cargo.lock
generated
2
rust/Cargo.lock
generated
@@ -568,7 +568,9 @@ dependencies = [
|
|||||||
name = "remoteingress-protocol"
|
name = "remoteingress-protocol"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"log",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
|||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tokio::sync::{mpsc, Mutex, Notify, RwLock};
|
use tokio::sync::{mpsc, Mutex, Notify, RwLock};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
@@ -13,10 +13,20 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use remoteingress_protocol::*;
|
use remoteingress_protocol::*;
|
||||||
|
|
||||||
|
type EdgeTlsStream = tokio_rustls::client::TlsStream<TcpStream>;
|
||||||
|
|
||||||
|
/// Result of processing a frame (shared with hub.rs pattern).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
enum EdgeFrameAction {
|
||||||
|
Continue,
|
||||||
|
Disconnect(String),
|
||||||
|
}
|
||||||
|
|
||||||
/// Per-stream state tracked in the edge's client_writers map.
|
/// Per-stream state tracked in the edge's client_writers map.
|
||||||
struct EdgeStreamState {
|
struct EdgeStreamState {
|
||||||
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
/// Unbounded channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
||||||
back_tx: mpsc::Sender<Vec<u8>>,
|
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
|
||||||
|
back_tx: mpsc::UnboundedSender<Vec<u8>>,
|
||||||
/// Send window for FRAME_DATA (upload direction).
|
/// Send window for FRAME_DATA (upload direction).
|
||||||
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
|
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
|
||||||
send_window: Arc<AtomicU32>,
|
send_window: Arc<AtomicU32>,
|
||||||
@@ -272,6 +282,86 @@ enum EdgeLoopResult {
|
|||||||
Reconnect(String), // reason for disconnection
|
Reconnect(String), // reason for disconnection
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Process a single frame received from the hub side of the tunnel.
|
||||||
|
/// Handles FRAME_DATA_BACK, FRAME_WINDOW_UPDATE_BACK, FRAME_CLOSE_BACK, FRAME_CONFIG, FRAME_PING.
|
||||||
|
async fn handle_edge_frame(
|
||||||
|
frame: Frame,
|
||||||
|
tunnel_io: &mut remoteingress_protocol::TunnelIo<EdgeTlsStream>,
|
||||||
|
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
|
||||||
|
listen_ports: &Arc<RwLock<Vec<u16>>>,
|
||||||
|
event_tx: &mpsc::Sender<EdgeEvent>,
|
||||||
|
tunnel_writer_tx: &mpsc::Sender<Vec<u8>>,
|
||||||
|
tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
|
||||||
|
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
|
||||||
|
active_streams: &Arc<AtomicU32>,
|
||||||
|
next_stream_id: &Arc<AtomicU32>,
|
||||||
|
edge_id: &str,
|
||||||
|
connection_token: &CancellationToken,
|
||||||
|
bind_address: &str,
|
||||||
|
) -> EdgeFrameAction {
|
||||||
|
match frame.frame_type {
|
||||||
|
FRAME_DATA_BACK => {
|
||||||
|
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
|
||||||
|
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
|
||||||
|
// fails if the receiver is dropped (hub_to_client task already exited).
|
||||||
|
let mut writers = client_writers.lock().await;
|
||||||
|
if let Some(state) = writers.get(&frame.stream_id) {
|
||||||
|
if state.back_tx.send(frame.payload).is_err() {
|
||||||
|
// Receiver dropped — hub_to_client task already exited, clean up
|
||||||
|
writers.remove(&frame.stream_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_WINDOW_UPDATE_BACK => {
|
||||||
|
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||||
|
if increment > 0 {
|
||||||
|
let writers = client_writers.lock().await;
|
||||||
|
if let Some(state) = writers.get(&frame.stream_id) {
|
||||||
|
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||||
|
if prev + increment > MAX_WINDOW_SIZE {
|
||||||
|
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||||
|
}
|
||||||
|
state.window_notify.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_CLOSE_BACK => {
|
||||||
|
let mut writers = client_writers.lock().await;
|
||||||
|
writers.remove(&frame.stream_id);
|
||||||
|
}
|
||||||
|
FRAME_CONFIG => {
|
||||||
|
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||||
|
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||||
|
*listen_ports.write().await = update.listen_ports.clone();
|
||||||
|
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||||
|
listen_ports: update.listen_ports.clone(),
|
||||||
|
});
|
||||||
|
apply_port_config(
|
||||||
|
&update.listen_ports,
|
||||||
|
port_listeners,
|
||||||
|
tunnel_writer_tx,
|
||||||
|
tunnel_data_tx,
|
||||||
|
client_writers,
|
||||||
|
active_streams,
|
||||||
|
next_stream_id,
|
||||||
|
edge_id,
|
||||||
|
connection_token,
|
||||||
|
bind_address,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_PING => {
|
||||||
|
// Queue PONG directly — no channel round-trip, guaranteed delivery
|
||||||
|
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EdgeFrameAction::Continue
|
||||||
|
}
|
||||||
|
|
||||||
async fn connect_to_hub_and_run(
|
async fn connect_to_hub_and_run(
|
||||||
config: &EdgeConfig,
|
config: &EdgeConfig,
|
||||||
connected: &Arc<RwLock<bool>>,
|
connected: &Arc<RwLock<bool>>,
|
||||||
@@ -308,7 +398,7 @@ async fn connect_to_hub_and_run(
|
|||||||
let server_name = rustls::pki_types::ServerName::try_from(config.hub_host.clone())
|
let server_name = rustls::pki_types::ServerName::try_from(config.hub_host.clone())
|
||||||
.unwrap_or_else(|_| rustls::pki_types::ServerName::try_from("remoteingress-hub".to_string()).unwrap());
|
.unwrap_or_else(|_| rustls::pki_types::ServerName::try_from("remoteingress-hub".to_string()).unwrap());
|
||||||
|
|
||||||
let tls_stream = match connector.connect(server_name, tcp).await {
|
let mut tls_stream = match connector.connect(server_name, tcp).await {
|
||||||
Ok(s) => s,
|
Ok(s) => s,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("TLS handshake failed: {}", e);
|
log::error!("TLS handshake failed: {}", e);
|
||||||
@@ -316,28 +406,38 @@ async fn connect_to_hub_and_run(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let (read_half, mut write_half) = tokio::io::split(tls_stream);
|
// Send auth line (we own the whole stream — no split)
|
||||||
|
|
||||||
// Send auth line
|
|
||||||
let auth_line = format!("EDGE {} {}\n", config.edge_id, config.secret);
|
let auth_line = format!("EDGE {} {}\n", config.edge_id, config.secret);
|
||||||
if write_half.write_all(auth_line.as_bytes()).await.is_err() {
|
if tls_stream.write_all(auth_line.as_bytes()).await.is_err() {
|
||||||
return EdgeLoopResult::Reconnect("auth_write_failed".to_string());
|
return EdgeLoopResult::Reconnect("auth_write_failed".to_string());
|
||||||
}
|
}
|
||||||
|
if tls_stream.flush().await.is_err() {
|
||||||
|
return EdgeLoopResult::Reconnect("auth_flush_failed".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
// Read handshake response line from hub (JSON with initial config)
|
// Read handshake line byte-by-byte (no BufReader — into_inner corrupts TLS state)
|
||||||
let mut buf_reader = BufReader::new(read_half);
|
let mut handshake_bytes = Vec::with_capacity(512);
|
||||||
let mut handshake_line = String::new();
|
let mut byte = [0u8; 1];
|
||||||
match buf_reader.read_line(&mut handshake_line).await {
|
loop {
|
||||||
Ok(0) => {
|
match tls_stream.read_exact(&mut byte).await {
|
||||||
log::error!("Hub rejected connection (EOF before handshake)");
|
Ok(_) => {
|
||||||
return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string());
|
handshake_bytes.push(byte[0]);
|
||||||
}
|
if byte[0] == b'\n' { break; }
|
||||||
Ok(_) => {}
|
if handshake_bytes.len() > 8192 {
|
||||||
Err(e) => {
|
return EdgeLoopResult::Reconnect("handshake_too_long".to_string());
|
||||||
log::error!("Failed to read handshake response: {}", e);
|
}
|
||||||
return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e));
|
}
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
|
||||||
|
log::error!("Hub rejected connection (EOF before handshake)");
|
||||||
|
return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Failed to read handshake response: {}", e);
|
||||||
|
return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let handshake_line = String::from_utf8_lossy(&handshake_bytes);
|
||||||
|
|
||||||
let handshake: HandshakeConfig = match serde_json::from_str(handshake_line.trim()) {
|
let handshake: HandshakeConfig = match serde_json::from_str(handshake_line.trim()) {
|
||||||
Ok(h) => h,
|
Ok(h) => h,
|
||||||
@@ -394,59 +494,13 @@ async fn connect_to_hub_and_run(
|
|||||||
let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
|
let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
Arc::new(Mutex::new(HashMap::new()));
|
||||||
|
|
||||||
// QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN)
|
// QoS dual-channel: ctrl frames have priority over data frames.
|
||||||
// have priority over data frames (DATA). Prevents PING starvation under load.
|
// Stream handlers send through these channels → TunnelIo drains them.
|
||||||
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
|
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||||
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
||||||
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
|
|
||||||
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
|
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
|
||||||
let tw_token = connection_token.clone();
|
|
||||||
// Oneshot to signal the reader loop when the writer dies from a write error.
|
|
||||||
// This avoids the 45s liveness timeout delay when the tunnel is already dead.
|
|
||||||
let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>();
|
|
||||||
let tunnel_writer_handle = tokio::spawn(async move {
|
|
||||||
// BufWriter coalesces small writes (frame headers, control frames) into fewer
|
|
||||||
// TLS records and syscalls. Flushed after each frame to avoid holding data.
|
|
||||||
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
|
|
||||||
let mut write_error = false;
|
|
||||||
let write_timeout = Duration::from_secs(30);
|
|
||||||
loop {
|
|
||||||
tokio::select! {
|
|
||||||
biased; // control frames always take priority over data
|
|
||||||
ctrl = tunnel_ctrl_rx.recv() => {
|
|
||||||
match ctrl {
|
|
||||||
Some(frame_data) => {
|
|
||||||
let ok = tokio::time::timeout(write_timeout, async {
|
|
||||||
writer.write_all(&frame_data).await?;
|
|
||||||
writer.flush().await
|
|
||||||
}).await;
|
|
||||||
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
data = tunnel_data_rx.recv() => {
|
|
||||||
match data {
|
|
||||||
Some(frame_data) => {
|
|
||||||
let ok = tokio::time::timeout(write_timeout, async {
|
|
||||||
writer.write_all(&frame_data).await?;
|
|
||||||
writer.flush().await
|
|
||||||
}).await;
|
|
||||||
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = tw_token.cancelled() => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if write_error {
|
|
||||||
log::error!("Tunnel writer failed or stalled, signalling reader for fast reconnect");
|
|
||||||
let _ = writer_dead_tx.send(());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Start TCP listeners for initial ports (hot-reloadable)
|
// Start TCP listeners for initial ports
|
||||||
let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new();
|
let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new();
|
||||||
let bind_address = config.bind_address.as_deref().unwrap_or("0.0.0.0");
|
let bind_address = config.bind_address.as_deref().unwrap_or("0.0.0.0");
|
||||||
apply_port_config(
|
apply_port_config(
|
||||||
@@ -462,122 +516,88 @@ async fn connect_to_hub_and_run(
|
|||||||
bind_address,
|
bind_address,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Heartbeat: liveness timeout detects silent hub failures
|
// Single-owner I/O engine — no tokio::io::split, no mutex
|
||||||
|
let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());
|
||||||
|
|
||||||
let liveness_timeout_dur = Duration::from_secs(45);
|
let liveness_timeout_dur = Duration::from_secs(45);
|
||||||
let mut last_activity = Instant::now();
|
let mut last_activity = Instant::now();
|
||||||
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
||||||
|
|
||||||
// Read frames from hub
|
let result = 'io_loop: loop {
|
||||||
let mut frame_reader = FrameReader::new(buf_reader);
|
// Drain any buffered frames
|
||||||
let result = loop {
|
loop {
|
||||||
tokio::select! {
|
let frame = match tunnel_io.try_parse_frame() {
|
||||||
frame_result = frame_reader.next_frame() => {
|
Some(Ok(f)) => f,
|
||||||
match frame_result {
|
Some(Err(e)) => {
|
||||||
Ok(Some(frame)) => {
|
log::error!("Hub frame error: {}", e);
|
||||||
// Reset liveness on any received frame
|
break 'io_loop EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||||
last_activity = Instant::now();
|
}
|
||||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
None => break,
|
||||||
|
};
|
||||||
|
last_activity = Instant::now();
|
||||||
|
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||||
|
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
|
||||||
|
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
|
||||||
|
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
|
||||||
|
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
|
||||||
|
).await {
|
||||||
|
break 'io_loop EdgeLoopResult::Reconnect(reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match frame.frame_type {
|
// Poll I/O: write(ctrl→data), flush, read, channels, timers
|
||||||
FRAME_DATA_BACK => {
|
let event = std::future::poll_fn(|cx| {
|
||||||
// Non-blocking dispatch to per-stream channel.
|
tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut liveness_deadline, connection_token)
|
||||||
// With flow control, the sender should rarely exceed the channel capacity.
|
}).await;
|
||||||
let mut writers = client_writers.lock().await;
|
|
||||||
if let Some(state) = writers.get(&frame.stream_id) {
|
match event {
|
||||||
if state.back_tx.try_send(frame.payload).is_err() {
|
remoteingress_protocol::TunnelEvent::Frame(frame) => {
|
||||||
log::warn!("Stream {} back-channel full, closing stream", frame.stream_id);
|
last_activity = Instant::now();
|
||||||
writers.remove(&frame.stream_id);
|
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||||
}
|
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
|
||||||
}
|
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
|
||||||
}
|
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners,
|
||||||
FRAME_WINDOW_UPDATE_BACK => {
|
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
|
||||||
// Hub consumed data — increase our send window for this stream (upload direction)
|
).await {
|
||||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
break EdgeLoopResult::Reconnect(reason);
|
||||||
if increment > 0 {
|
|
||||||
let writers = client_writers.lock().await;
|
|
||||||
if let Some(state) = writers.get(&frame.stream_id) {
|
|
||||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
|
||||||
if prev + increment > MAX_WINDOW_SIZE {
|
|
||||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
|
||||||
}
|
|
||||||
state.window_notify.notify_one();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FRAME_CLOSE_BACK => {
|
|
||||||
let mut writers = client_writers.lock().await;
|
|
||||||
writers.remove(&frame.stream_id);
|
|
||||||
}
|
|
||||||
FRAME_CONFIG => {
|
|
||||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
|
||||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
|
||||||
*listen_ports.write().await = update.listen_ports.clone();
|
|
||||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
|
||||||
listen_ports: update.listen_ports.clone(),
|
|
||||||
});
|
|
||||||
apply_port_config(
|
|
||||||
&update.listen_ports,
|
|
||||||
&mut port_listeners,
|
|
||||||
&tunnel_writer_tx,
|
|
||||||
&tunnel_data_tx,
|
|
||||||
&client_writers,
|
|
||||||
active_streams,
|
|
||||||
next_stream_id,
|
|
||||||
&config.edge_id,
|
|
||||||
connection_token,
|
|
||||||
bind_address,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FRAME_PING => {
|
|
||||||
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
|
|
||||||
if tunnel_writer_tx.try_send(pong_frame).is_err() {
|
|
||||||
// Control channel full (WINDOW_UPDATE burst from many streams).
|
|
||||||
// DON'T disconnect — the 45s liveness timeout gives margin
|
|
||||||
// for the channel to drain and the next PONG to succeed.
|
|
||||||
log::warn!("PONG send failed, control channel full — skipping this cycle");
|
|
||||||
}
|
|
||||||
log::trace!("Received PING from hub, sent PONG");
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(None) => {
|
|
||||||
log::info!("Hub disconnected (EOF)");
|
|
||||||
break EdgeLoopResult::Reconnect("hub_eof".to_string());
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::error!("Hub frame error: {}", e);
|
|
||||||
break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ = &mut liveness_deadline => {
|
remoteingress_protocol::TunnelEvent::Eof => {
|
||||||
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting",
|
log::info!("Hub disconnected (EOF)");
|
||||||
liveness_timeout_dur.as_secs());
|
break EdgeLoopResult::Reconnect("hub_eof".to_string());
|
||||||
|
}
|
||||||
|
remoteingress_protocol::TunnelEvent::ReadError(e) => {
|
||||||
|
log::error!("Hub frame read error: {}", e);
|
||||||
|
break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||||
|
}
|
||||||
|
remoteingress_protocol::TunnelEvent::WriteError(e) => {
|
||||||
|
log::error!("Tunnel write error: {}", e);
|
||||||
|
break EdgeLoopResult::Reconnect(format!("tunnel_write_error: {}", e));
|
||||||
|
}
|
||||||
|
remoteingress_protocol::TunnelEvent::LivenessTimeout => {
|
||||||
|
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting", liveness_timeout_dur.as_secs());
|
||||||
break EdgeLoopResult::Reconnect("liveness_timeout".to_string());
|
break EdgeLoopResult::Reconnect("liveness_timeout".to_string());
|
||||||
}
|
}
|
||||||
_ = &mut writer_dead_rx => {
|
remoteingress_protocol::TunnelEvent::Cancelled => {
|
||||||
log::error!("Tunnel writer died, reconnecting immediately");
|
if shutdown_rx.try_recv().is_ok() {
|
||||||
break EdgeLoopResult::Reconnect("writer_dead".to_string());
|
break EdgeLoopResult::Shutdown;
|
||||||
}
|
}
|
||||||
_ = connection_token.cancelled() => {
|
|
||||||
log::info!("Connection cancelled");
|
|
||||||
break EdgeLoopResult::Shutdown;
|
|
||||||
}
|
|
||||||
_ = shutdown_rx.recv() => {
|
|
||||||
break EdgeLoopResult::Shutdown;
|
break EdgeLoopResult::Shutdown;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Cancel connection token to propagate to all child tasks BEFORE aborting
|
// Graceful TLS shutdown: send close_notify so the hub sees a clean disconnect
|
||||||
|
// instead of "peer closed connection without sending TLS close_notify".
|
||||||
|
let mut tls_stream = tunnel_io.into_inner();
|
||||||
|
let _ = tokio::time::timeout(
|
||||||
|
Duration::from_secs(2),
|
||||||
|
tls_stream.shutdown(),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
connection_token.cancel();
|
connection_token.cancel();
|
||||||
stun_handle.abort();
|
stun_handle.abort();
|
||||||
tunnel_writer_handle.abort();
|
|
||||||
for (_, h) in port_listeners.drain() {
|
for (_, h) in port_listeners.drain() {
|
||||||
h.abort();
|
h.abort();
|
||||||
}
|
}
|
||||||
@@ -723,8 +743,10 @@ async fn handle_client_connection(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
|
// Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
|
||||||
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
|
// bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
|
||||||
|
// streams due to channel overflow — backpressure slows streams, never kills them.
|
||||||
|
let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Vec<u8>>();
|
||||||
// Adaptive initial window: scale with current stream count to keep total in-flight
|
// Adaptive initial window: scale with current stream count to keep total in-flight
|
||||||
// data within the 32MB budget. Prevents burst flooding when many streams open.
|
// data within the 32MB budget. Prevents burst flooding when many streams open.
|
||||||
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
|
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||||
@@ -771,10 +793,16 @@ async fn handle_client_connection(
|
|||||||
if consumed_since_update >= threshold {
|
if consumed_since_update >= threshold {
|
||||||
let increment = consumed_since_update.min(adaptive_window);
|
let increment = consumed_since_update.min(adaptive_window);
|
||||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, increment);
|
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, increment);
|
||||||
if wu_tx.try_send(frame).is_ok() {
|
// Use send().await for guaranteed delivery — dropping WINDOW_UPDATEs
|
||||||
consumed_since_update -= increment;
|
// causes permanent flow stalls. Safe: runs in per-stream task, not main loop.
|
||||||
|
tokio::select! {
|
||||||
|
result = wu_tx.send(frame) => {
|
||||||
|
if result.is_ok() {
|
||||||
|
consumed_since_update -= increment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = hub_to_client_token.cancelled() => break,
|
||||||
}
|
}
|
||||||
// If try_send fails, keep accumulating — retry on next threshold
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => break,
|
None => break,
|
||||||
@@ -786,20 +814,26 @@ async fn handle_client_connection(
|
|||||||
// Send final window update for any remaining consumed bytes
|
// Send final window update for any remaining consumed bytes
|
||||||
if consumed_since_update > 0 {
|
if consumed_since_update > 0 {
|
||||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
|
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
|
||||||
let _ = wu_tx.try_send(frame);
|
let _ = wu_tx.send(frame).await;
|
||||||
}
|
}
|
||||||
let _ = client_write.shutdown().await;
|
let _ = client_write.shutdown().await;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Task: client -> hub (upload direction) with per-stream flow control
|
// Task: client -> hub (upload direction) with per-stream flow control.
|
||||||
let mut buf = vec![0u8; 32768];
|
// Zero-copy: read payload directly after the header, then prepend header.
|
||||||
|
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
|
||||||
loop {
|
loop {
|
||||||
// Wait for send window to have capacity (with stall timeout)
|
// Wait for send window to have capacity (with stall timeout).
|
||||||
|
// Safe pattern: register notified BEFORE checking the condition
|
||||||
|
// to avoid missing a notify_one that fires between load and select.
|
||||||
loop {
|
loop {
|
||||||
|
let notified = window_notify.notified();
|
||||||
|
tokio::pin!(notified);
|
||||||
|
notified.as_mut().enable();
|
||||||
let w = send_window.load(Ordering::Acquire);
|
let w = send_window.load(Ordering::Acquire);
|
||||||
if w > 0 { break; }
|
if w > 0 { break; }
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = window_notify.notified() => continue,
|
_ = notified => continue,
|
||||||
_ = client_token.cancelled() => break,
|
_ = client_token.cancelled() => break,
|
||||||
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
||||||
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
|
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
|
||||||
@@ -822,15 +856,16 @@ async fn handle_client_connection(
|
|||||||
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
|
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
|
||||||
active_streams.load(Ordering::Relaxed),
|
active_streams.load(Ordering::Relaxed),
|
||||||
) as usize;
|
) as usize;
|
||||||
let max_read = w.min(buf.len()).min(adaptive_cap);
|
let max_read = w.min(32768).min(adaptive_cap);
|
||||||
|
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
read_result = client_read.read(&mut buf[..max_read]) => {
|
read_result = client_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
|
||||||
match read_result {
|
match read_result {
|
||||||
Ok(0) => break,
|
Ok(0) => break,
|
||||||
Ok(n) => {
|
Ok(n) => {
|
||||||
send_window.fetch_sub(n as u32, Ordering::Release);
|
send_window.fetch_sub(n as u32, Ordering::Release);
|
||||||
let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]);
|
encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
|
||||||
|
let data_frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
|
||||||
if tunnel_data_tx.send(data_frame).await.is_err() {
|
if tunnel_data_tx.send(data_frame).await.is_err() {
|
||||||
log::warn!("Stream {} data channel closed, closing", stream_id);
|
log::warn!("Stream {} data channel closed, closing", stream_id);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore};
|
use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore};
|
||||||
use tokio::time::{interval, sleep_until, Instant};
|
use tokio::time::{interval, sleep_until, Instant};
|
||||||
@@ -12,10 +12,21 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use remoteingress_protocol::*;
|
use remoteingress_protocol::*;
|
||||||
|
|
||||||
|
type HubTlsStream = tokio_rustls::server::TlsStream<TcpStream>;
|
||||||
|
|
||||||
|
/// Result of processing a frame.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
enum FrameAction {
|
||||||
|
Continue,
|
||||||
|
Disconnect(String),
|
||||||
|
}
|
||||||
|
|
||||||
/// Per-stream state tracked in the hub's stream map.
|
/// Per-stream state tracked in the hub's stream map.
|
||||||
struct HubStreamState {
|
struct HubStreamState {
|
||||||
/// Channel to deliver FRAME_DATA payloads to the upstream writer task.
|
/// Unbounded channel to deliver FRAME_DATA payloads to the upstream writer task.
|
||||||
data_tx: mpsc::Sender<Vec<u8>>,
|
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
|
||||||
|
/// A bounded channel would kill streams instead of applying backpressure.
|
||||||
|
data_tx: mpsc::UnboundedSender<Vec<u8>>,
|
||||||
/// Cancellation token for this stream.
|
/// Cancellation token for this stream.
|
||||||
cancel_token: CancellationToken,
|
cancel_token: CancellationToken,
|
||||||
/// Send window for FRAME_DATA_BACK (download direction).
|
/// Send window for FRAME_DATA_BACK (download direction).
|
||||||
@@ -123,9 +134,9 @@ pub struct TunnelHub {
|
|||||||
struct ConnectedEdgeInfo {
|
struct ConnectedEdgeInfo {
|
||||||
connected_at: u64,
|
connected_at: u64,
|
||||||
peer_addr: String,
|
peer_addr: String,
|
||||||
active_streams: Arc<Mutex<HashMap<u32, HubStreamState>>>,
|
edge_stream_count: Arc<AtomicU32>,
|
||||||
config_tx: mpsc::Sender<EdgeConfigUpdate>,
|
config_tx: mpsc::Sender<EdgeConfigUpdate>,
|
||||||
#[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed
|
/// Used to cancel the old connection when an edge reconnects.
|
||||||
cancel_token: CancellationToken,
|
cancel_token: CancellationToken,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,11 +200,10 @@ impl TunnelHub {
|
|||||||
|
|
||||||
let mut connected = Vec::new();
|
let mut connected = Vec::new();
|
||||||
for (id, info) in edges.iter() {
|
for (id, info) in edges.iter() {
|
||||||
let streams = info.active_streams.lock().await;
|
|
||||||
connected.push(ConnectedEdgeStatus {
|
connected.push(ConnectedEdgeStatus {
|
||||||
edge_id: id.clone(),
|
edge_id: id.clone(),
|
||||||
connected_at: info.connected_at,
|
connected_at: info.connected_at,
|
||||||
active_streams: streams.len(),
|
active_streams: info.edge_stream_count.load(Ordering::Relaxed) as usize,
|
||||||
peer_addr: info.peer_addr.clone(),
|
peer_addr: info.peer_addr.clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -287,6 +297,290 @@ impl Drop for TunnelHub {
|
|||||||
/// Maximum concurrent streams per edge connection.
|
/// Maximum concurrent streams per edge connection.
|
||||||
const MAX_STREAMS_PER_EDGE: usize = 1024;
|
const MAX_STREAMS_PER_EDGE: usize = 1024;
|
||||||
|
|
||||||
|
/// Process a single frame received from the edge side of the tunnel.
|
||||||
|
/// Handles FRAME_OPEN, FRAME_DATA, FRAME_WINDOW_UPDATE, FRAME_CLOSE, and FRAME_PONG.
|
||||||
|
async fn handle_hub_frame(
|
||||||
|
frame: Frame,
|
||||||
|
tunnel_io: &mut remoteingress_protocol::TunnelIo<HubTlsStream>,
|
||||||
|
streams: &mut HashMap<u32, HubStreamState>,
|
||||||
|
stream_semaphore: &Arc<Semaphore>,
|
||||||
|
edge_stream_count: &Arc<AtomicU32>,
|
||||||
|
edge_id: &str,
|
||||||
|
event_tx: &mpsc::Sender<HubEvent>,
|
||||||
|
ctrl_tx: &mpsc::Sender<Vec<u8>>,
|
||||||
|
data_tx: &mpsc::Sender<Vec<u8>>,
|
||||||
|
target_host: &str,
|
||||||
|
edge_token: &CancellationToken,
|
||||||
|
cleanup_tx: &mpsc::Sender<u32>,
|
||||||
|
) -> FrameAction {
|
||||||
|
match frame.frame_type {
|
||||||
|
FRAME_OPEN => {
|
||||||
|
// A4: Check stream limit before processing
|
||||||
|
let permit = match stream_semaphore.clone().try_acquire_owned() {
|
||||||
|
Ok(p) => p,
|
||||||
|
Err(_) => {
|
||||||
|
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
|
||||||
|
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
|
||||||
|
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
|
||||||
|
tunnel_io.queue_ctrl(close_frame);
|
||||||
|
return FrameAction::Continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Payload is PROXY v1 header line
|
||||||
|
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
|
||||||
|
|
||||||
|
// Parse destination port from PROXY header
|
||||||
|
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
|
||||||
|
|
||||||
|
let stream_id = frame.stream_id;
|
||||||
|
let cleanup = cleanup_tx.clone();
|
||||||
|
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
|
||||||
|
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
|
||||||
|
let target = target_host.to_string();
|
||||||
|
let stream_token = edge_token.child_token();
|
||||||
|
|
||||||
|
let _ = event_tx.try_send(HubEvent::StreamOpened {
|
||||||
|
edge_id: edge_id.to_string(),
|
||||||
|
stream_id,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create channel for data from edge to this stream
|
||||||
|
let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Vec<u8>>();
|
||||||
|
// Adaptive initial window: scale with current stream count
|
||||||
|
// to keep total in-flight data within the 32MB budget.
|
||||||
|
let initial_window = compute_window_for_stream_count(
|
||||||
|
edge_stream_count.load(Ordering::Relaxed),
|
||||||
|
);
|
||||||
|
let send_window = Arc::new(AtomicU32::new(initial_window));
|
||||||
|
let window_notify = Arc::new(Notify::new());
|
||||||
|
streams.insert(stream_id, HubStreamState {
|
||||||
|
data_tx: stream_data_tx,
|
||||||
|
cancel_token: stream_token.clone(),
|
||||||
|
send_window: Arc::clone(&send_window),
|
||||||
|
window_notify: Arc::clone(&window_notify),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
|
||||||
|
let stream_counter = Arc::clone(edge_stream_count);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let _permit = permit; // hold semaphore permit until stream completes
|
||||||
|
stream_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
|
let result = async {
|
||||||
|
// A2: Connect to SmartProxy with timeout
|
||||||
|
let mut upstream = tokio::time::timeout(
|
||||||
|
Duration::from_secs(10),
|
||||||
|
TcpStream::connect((target.as_str(), dest_port)),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
|
||||||
|
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
|
||||||
|
})??;
|
||||||
|
|
||||||
|
upstream.set_nodelay(true)?;
|
||||||
|
upstream.write_all(proxy_header.as_bytes()).await?;
|
||||||
|
|
||||||
|
let (mut up_read, mut up_write) =
|
||||||
|
upstream.into_split();
|
||||||
|
|
||||||
|
// Forward data from edge (via channel) to SmartProxy
|
||||||
|
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
|
||||||
|
let writer_token = stream_token.clone();
|
||||||
|
let wub_tx = writer_tx.clone();
|
||||||
|
let stream_counter_w = Arc::clone(&stream_counter);
|
||||||
|
let writer_for_edge_data = tokio::spawn(async move {
|
||||||
|
let mut consumed_since_update: u32 = 0;
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
data = stream_data_rx.recv() => {
|
||||||
|
match data {
|
||||||
|
Some(data) => {
|
||||||
|
let len = data.len() as u32;
|
||||||
|
// Check cancellation alongside the write so we respond
|
||||||
|
// promptly to FRAME_CLOSE instead of blocking up to 60s.
|
||||||
|
let write_result = tokio::select! {
|
||||||
|
r = tokio::time::timeout(
|
||||||
|
Duration::from_secs(60),
|
||||||
|
up_write.write_all(&data),
|
||||||
|
) => r,
|
||||||
|
_ = writer_token.cancelled() => break,
|
||||||
|
};
|
||||||
|
match write_result {
|
||||||
|
Ok(Ok(())) => {}
|
||||||
|
Ok(Err(_)) => break,
|
||||||
|
Err(_) => {
|
||||||
|
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Track consumption for adaptive flow control.
|
||||||
|
// Increment capped to adaptive window to limit per-stream in-flight data.
|
||||||
|
consumed_since_update += len;
|
||||||
|
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||||
|
stream_counter_w.load(Ordering::Relaxed),
|
||||||
|
);
|
||||||
|
let threshold = adaptive_window / 2;
|
||||||
|
if consumed_since_update >= threshold {
|
||||||
|
let increment = consumed_since_update.min(adaptive_window);
|
||||||
|
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
|
||||||
|
// Use send().await for guaranteed delivery — dropping WINDOW_UPDATEs
|
||||||
|
// causes permanent flow stalls. Safe: runs in per-stream task, not main loop.
|
||||||
|
tokio::select! {
|
||||||
|
result = wub_tx.send(frame) => {
|
||||||
|
if result.is_ok() {
|
||||||
|
consumed_since_update -= increment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = writer_token.cancelled() => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = writer_token.cancelled() => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Send final window update for remaining consumed bytes
|
||||||
|
if consumed_since_update > 0 {
|
||||||
|
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
|
||||||
|
let _ = wub_tx.send(frame).await;
|
||||||
|
}
|
||||||
|
let _ = up_write.shutdown().await;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Forward data from SmartProxy back to edge via writer channel
|
||||||
|
// with per-stream flow control (check send_window before reading).
|
||||||
|
// Zero-copy: read payload directly after the header, then prepend header.
|
||||||
|
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
|
||||||
|
loop {
|
||||||
|
// Wait for send window to have capacity (with stall timeout).
|
||||||
|
// Safe pattern: register notified BEFORE checking the condition
|
||||||
|
// to avoid missing a notify_one that fires between load and select.
|
||||||
|
loop {
|
||||||
|
let notified = window_notify.notified();
|
||||||
|
tokio::pin!(notified);
|
||||||
|
notified.as_mut().enable();
|
||||||
|
let w = send_window.load(Ordering::Acquire);
|
||||||
|
if w > 0 { break; }
|
||||||
|
tokio::select! {
|
||||||
|
_ = notified => continue,
|
||||||
|
_ = stream_token.cancelled() => break,
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
||||||
|
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if stream_token.is_cancelled() { break; }
|
||||||
|
|
||||||
|
// Limit read size to available window.
|
||||||
|
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
|
||||||
|
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
|
||||||
|
// which would be falsely interpreted as EOF.
|
||||||
|
let w = send_window.load(Ordering::Acquire) as usize;
|
||||||
|
if w == 0 {
|
||||||
|
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Adaptive: cap read to current per-stream target window
|
||||||
|
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
|
||||||
|
stream_counter.load(Ordering::Relaxed),
|
||||||
|
) as usize;
|
||||||
|
let max_read = w.min(32768).min(adaptive_cap);
|
||||||
|
|
||||||
|
tokio::select! {
|
||||||
|
read_result = up_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
|
||||||
|
match read_result {
|
||||||
|
Ok(0) => break,
|
||||||
|
Ok(n) => {
|
||||||
|
send_window.fetch_sub(n as u32, Ordering::Release);
|
||||||
|
encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
|
||||||
|
let frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
|
||||||
|
if data_writer_tx.send(frame).await.is_err() {
|
||||||
|
log::warn!("Stream {} data channel closed, closing", stream_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = stream_token.cancelled() => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
|
||||||
|
// Use send().await to guarantee delivery (try_send silently drops if full).
|
||||||
|
if !stream_token.is_cancelled() {
|
||||||
|
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||||
|
let _ = data_writer_tx.send(close_frame).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer_for_edge_data.abort();
|
||||||
|
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
|
||||||
|
}
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Err(e) = result {
|
||||||
|
log::error!("Stream {} error: {}", stream_id, e);
|
||||||
|
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
|
||||||
|
// Use send().await to guarantee delivery.
|
||||||
|
if !stream_token.is_cancelled() {
|
||||||
|
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||||
|
let _ = data_writer_tx.send(close_frame).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal main loop to remove stream from the map
|
||||||
|
let _ = cleanup.send(stream_id).await;
|
||||||
|
stream_counter.fetch_sub(1, Ordering::Relaxed);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
FRAME_DATA => {
|
||||||
|
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
|
||||||
|
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
|
||||||
|
// fails if the receiver is dropped (stream handler already exited).
|
||||||
|
if let Some(state) = streams.get(&frame.stream_id) {
|
||||||
|
if state.data_tx.send(frame.payload).is_err() {
|
||||||
|
// Receiver dropped — stream handler already exited, clean up
|
||||||
|
streams.remove(&frame.stream_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_WINDOW_UPDATE => {
|
||||||
|
// Edge consumed data — increase our send window for this stream
|
||||||
|
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||||
|
if increment > 0 {
|
||||||
|
if let Some(state) = streams.get(&frame.stream_id) {
|
||||||
|
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||||
|
if prev + increment > MAX_WINDOW_SIZE {
|
||||||
|
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||||
|
}
|
||||||
|
state.window_notify.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_CLOSE => {
|
||||||
|
if let Some(state) = streams.remove(&frame.stream_id) {
|
||||||
|
state.cancel_token.cancel();
|
||||||
|
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
||||||
|
edge_id: edge_id.to_string(),
|
||||||
|
stream_id: frame.stream_id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FRAME_PONG => {
|
||||||
|
log::debug!("Received PONG from edge {}", edge_id);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FrameAction::Continue
|
||||||
|
}
|
||||||
|
|
||||||
/// Handle a single edge connection: authenticate, then enter frame loop.
|
/// Handle a single edge connection: authenticate, then enter frame loop.
|
||||||
async fn handle_edge_connection(
|
async fn handle_edge_connection(
|
||||||
stream: TcpStream,
|
stream: TcpStream,
|
||||||
@@ -307,13 +601,24 @@ async fn handle_edge_connection(
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
let ka = ka.with_interval(Duration::from_secs(10));
|
let ka = ka.with_interval(Duration::from_secs(10));
|
||||||
let _ = socket2::SockRef::from(&stream).set_tcp_keepalive(&ka);
|
let _ = socket2::SockRef::from(&stream).set_tcp_keepalive(&ka);
|
||||||
let tls_stream = acceptor.accept(stream).await?;
|
let mut tls_stream = acceptor.accept(stream).await?;
|
||||||
let (read_half, mut write_half) = tokio::io::split(tls_stream);
|
|
||||||
let mut buf_reader = BufReader::new(read_half);
|
|
||||||
|
|
||||||
// Read auth line: "EDGE <edgeId> <secret>\n"
|
// Byte-by-byte auth line reading (no BufReader).
|
||||||
let mut auth_line = String::new();
|
// Auth line: "EDGE <edgeId> <secret>\n"
|
||||||
buf_reader.read_line(&mut auth_line).await?;
|
let mut auth_buf = Vec::with_capacity(512);
|
||||||
|
loop {
|
||||||
|
let mut byte = [0u8; 1];
|
||||||
|
tls_stream.read_exact(&mut byte).await?;
|
||||||
|
if byte[0] == b'\n' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auth_buf.push(byte[0]);
|
||||||
|
if auth_buf.len() > 4096 {
|
||||||
|
return Err("auth line too long".into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let auth_line = String::from_utf8(auth_buf)
|
||||||
|
.map_err(|_| "auth line not valid UTF-8")?;
|
||||||
let auth_line = auth_line.trim();
|
let auth_line = auth_line.trim();
|
||||||
|
|
||||||
let parts: Vec<&str> = auth_line.splitn(3, ' ').collect();
|
let parts: Vec<&str> = auth_line.splitn(3, ' ').collect();
|
||||||
@@ -353,11 +658,15 @@ async fn handle_edge_connection(
|
|||||||
};
|
};
|
||||||
let mut handshake_json = serde_json::to_string(&handshake)?;
|
let mut handshake_json = serde_json::to_string(&handshake)?;
|
||||||
handshake_json.push('\n');
|
handshake_json.push('\n');
|
||||||
write_half.write_all(handshake_json.as_bytes()).await?;
|
tls_stream.write_all(handshake_json.as_bytes()).await?;
|
||||||
|
tls_stream.flush().await?;
|
||||||
|
|
||||||
// Track this edge
|
// Track this edge
|
||||||
let streams: Arc<Mutex<HashMap<u32, HubStreamState>>> =
|
let mut streams: HashMap<u32, HubStreamState> = HashMap::new();
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
// Per-edge active stream counter for adaptive flow control
|
||||||
|
let edge_stream_count = Arc::new(AtomicU32::new(0));
|
||||||
|
// Cleanup channel: spawned stream tasks send stream_id here when done
|
||||||
|
let (cleanup_tx, mut cleanup_rx) = mpsc::channel::<u32>(256);
|
||||||
let now = std::time::SystemTime::now()
|
let now = std::time::SystemTime::now()
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
@@ -368,73 +677,32 @@ async fn handle_edge_connection(
|
|||||||
|
|
||||||
{
|
{
|
||||||
let mut edges = connected.lock().await;
|
let mut edges = connected.lock().await;
|
||||||
|
// If this edge already has an active connection (reconnect scenario),
|
||||||
|
// cancel the old connection so it shuts down immediately instead of
|
||||||
|
// lingering until TCP keepalive detects the dead socket.
|
||||||
|
if let Some(old) = edges.remove(&edge_id) {
|
||||||
|
log::info!("Edge {} reconnected, cancelling old connection", edge_id);
|
||||||
|
old.cancel_token.cancel();
|
||||||
|
}
|
||||||
edges.insert(
|
edges.insert(
|
||||||
edge_id.clone(),
|
edge_id.clone(),
|
||||||
ConnectedEdgeInfo {
|
ConnectedEdgeInfo {
|
||||||
connected_at: now,
|
connected_at: now,
|
||||||
peer_addr,
|
peer_addr,
|
||||||
active_streams: streams.clone(),
|
edge_stream_count: edge_stream_count.clone(),
|
||||||
config_tx,
|
config_tx,
|
||||||
cancel_token: edge_token.clone(),
|
cancel_token: edge_token.clone(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per-edge active stream counter for adaptive flow control
|
// QoS dual-channel: ctrl frames have priority over data frames.
|
||||||
let edge_stream_count = Arc::new(AtomicU32::new(0));
|
// Stream handlers send through these channels -> TunnelIo drains them.
|
||||||
|
|
||||||
// QoS dual-channel tunnel writer: control frames (PING/PONG/WINDOW_UPDATE/CLOSE)
|
|
||||||
// have priority over data frames (DATA_BACK). This prevents PING starvation under load.
|
|
||||||
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
|
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||||
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
||||||
// Legacy alias for code that sends both control and data (will be migrated)
|
|
||||||
let frame_writer_tx = ctrl_tx.clone();
|
|
||||||
let writer_token = edge_token.clone();
|
|
||||||
let (writer_dead_tx, mut writer_dead_rx) = tokio::sync::oneshot::channel::<()>();
|
|
||||||
let writer_handle = tokio::spawn(async move {
|
|
||||||
// BufWriter coalesces small writes (frame headers, control frames) into fewer
|
|
||||||
// TLS records and syscalls. Flushed after each frame to avoid holding data.
|
|
||||||
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
|
|
||||||
let mut write_error = false;
|
|
||||||
let write_timeout = Duration::from_secs(30);
|
|
||||||
loop {
|
|
||||||
tokio::select! {
|
|
||||||
biased; // control frames always take priority over data
|
|
||||||
ctrl = ctrl_rx.recv() => {
|
|
||||||
match ctrl {
|
|
||||||
Some(frame_data) => {
|
|
||||||
let ok = tokio::time::timeout(write_timeout, async {
|
|
||||||
writer.write_all(&frame_data).await?;
|
|
||||||
writer.flush().await
|
|
||||||
}).await;
|
|
||||||
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
data = data_rx.recv() => {
|
|
||||||
match data {
|
|
||||||
Some(frame_data) => {
|
|
||||||
let ok = tokio::time::timeout(write_timeout, async {
|
|
||||||
writer.write_all(&frame_data).await?;
|
|
||||||
writer.flush().await
|
|
||||||
}).await;
|
|
||||||
if !matches!(ok, Ok(Ok(()))) { write_error = true; break; }
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = writer_token.cancelled() => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if write_error {
|
|
||||||
log::error!("Tunnel writer to edge failed or stalled, signalling reader for fast cleanup");
|
|
||||||
let _ = writer_dead_tx.send(());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Spawn task to forward config updates as FRAME_CONFIG frames
|
// Spawn task to forward config updates as FRAME_CONFIG frames
|
||||||
let config_writer_tx = frame_writer_tx.clone();
|
let config_writer_tx = ctrl_tx.clone();
|
||||||
let config_edge_id = edge_id.clone();
|
let config_edge_id = edge_id.clone();
|
||||||
let config_token = edge_token.clone();
|
let config_token = edge_token.clone();
|
||||||
let config_handle = tokio::spawn(async move {
|
let config_handle = tokio::spawn(async move {
|
||||||
@@ -471,324 +739,91 @@ async fn handle_edge_connection(
|
|||||||
let mut last_activity = Instant::now();
|
let mut last_activity = Instant::now();
|
||||||
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
||||||
|
|
||||||
// Frame reading loop
|
// Single-owner I/O engine — no tokio::io::split, no mutex
|
||||||
let mut frame_reader = FrameReader::new(buf_reader);
|
let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());
|
||||||
let mut disconnect_reason = "unknown".to_string();
|
|
||||||
|
|
||||||
loop {
|
// Assigned in every break path of the hub_loop before use at the end.
|
||||||
tokio::select! {
|
#[allow(unused_assignments)]
|
||||||
frame_result = frame_reader.next_frame() => {
|
let mut disconnect_reason = String::new();
|
||||||
match frame_result {
|
|
||||||
Ok(Some(frame)) => {
|
|
||||||
// Reset liveness on any received frame
|
|
||||||
last_activity = Instant::now();
|
|
||||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
|
||||||
|
|
||||||
match frame.frame_type {
|
'hub_loop: loop {
|
||||||
FRAME_OPEN => {
|
// Drain completed stream cleanups from spawned tasks
|
||||||
// A4: Check stream limit before processing
|
while let Ok(stream_id) = cleanup_rx.try_recv() {
|
||||||
let permit = match stream_semaphore.clone().try_acquire_owned() {
|
if streams.remove(&stream_id).is_some() {
|
||||||
Ok(p) => p,
|
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
||||||
Err(_) => {
|
edge_id: edge_id.clone(),
|
||||||
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
|
stream_id,
|
||||||
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
|
});
|
||||||
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
|
}
|
||||||
let _ = frame_writer_tx.try_send(close_frame);
|
}
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Payload is PROXY v1 header line
|
// Drain any buffered frames
|
||||||
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
|
loop {
|
||||||
|
let frame = match tunnel_io.try_parse_frame() {
|
||||||
|
Some(Ok(f)) => f,
|
||||||
|
Some(Err(e)) => {
|
||||||
|
log::error!("Edge {} frame error: {}", edge_id, e);
|
||||||
|
disconnect_reason = format!("edge_frame_error: {}", e);
|
||||||
|
break 'hub_loop;
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
last_activity = Instant::now();
|
||||||
|
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||||
|
if let FrameAction::Disconnect(reason) = handle_hub_frame(
|
||||||
|
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
|
||||||
|
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token,
|
||||||
|
&cleanup_tx,
|
||||||
|
).await {
|
||||||
|
disconnect_reason = reason;
|
||||||
|
break 'hub_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Parse destination port from PROXY header
|
// Poll I/O: write(ctrl->data), flush, read, channels, timers
|
||||||
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
|
let event = std::future::poll_fn(|cx| {
|
||||||
|
// Queue PING if ticker fires
|
||||||
|
if ping_ticker.poll_tick(cx).is_ready() {
|
||||||
|
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PING, &[]));
|
||||||
|
}
|
||||||
|
tunnel_io.poll_step(cx, &mut ctrl_rx, &mut data_rx, &mut liveness_deadline, &edge_token)
|
||||||
|
}).await;
|
||||||
|
|
||||||
let stream_id = frame.stream_id;
|
match event {
|
||||||
let edge_id_clone = edge_id.clone();
|
remoteingress_protocol::TunnelEvent::Frame(frame) => {
|
||||||
let event_tx_clone = event_tx.clone();
|
last_activity = Instant::now();
|
||||||
let streams_clone = streams.clone();
|
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||||
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
|
if let FrameAction::Disconnect(reason) = handle_hub_frame(
|
||||||
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
|
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
|
||||||
let target = target_host.clone();
|
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token,
|
||||||
let stream_token = edge_token.child_token();
|
&cleanup_tx,
|
||||||
|
).await {
|
||||||
let _ = event_tx.try_send(HubEvent::StreamOpened {
|
disconnect_reason = reason;
|
||||||
edge_id: edge_id.clone(),
|
break;
|
||||||
stream_id,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
|
|
||||||
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
|
|
||||||
// Adaptive initial window: scale with current stream count
|
|
||||||
// to keep total in-flight data within the 32MB budget.
|
|
||||||
let initial_window = compute_window_for_stream_count(
|
|
||||||
edge_stream_count.load(Ordering::Relaxed),
|
|
||||||
);
|
|
||||||
let send_window = Arc::new(AtomicU32::new(initial_window));
|
|
||||||
let window_notify = Arc::new(Notify::new());
|
|
||||||
{
|
|
||||||
let mut s = streams.lock().await;
|
|
||||||
s.insert(stream_id, HubStreamState {
|
|
||||||
data_tx,
|
|
||||||
cancel_token: stream_token.clone(),
|
|
||||||
send_window: Arc::clone(&send_window),
|
|
||||||
window_notify: Arc::clone(&window_notify),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
|
|
||||||
let stream_counter = Arc::clone(&edge_stream_count);
|
|
||||||
tokio::spawn(async move {
|
|
||||||
let _permit = permit; // hold semaphore permit until stream completes
|
|
||||||
stream_counter.fetch_add(1, Ordering::Relaxed);
|
|
||||||
|
|
||||||
let result = async {
|
|
||||||
// A2: Connect to SmartProxy with timeout
|
|
||||||
let mut upstream = tokio::time::timeout(
|
|
||||||
Duration::from_secs(10),
|
|
||||||
TcpStream::connect((target.as_str(), dest_port)),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
|
|
||||||
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
|
|
||||||
})??;
|
|
||||||
|
|
||||||
upstream.set_nodelay(true)?;
|
|
||||||
upstream.write_all(proxy_header.as_bytes()).await?;
|
|
||||||
|
|
||||||
let (mut up_read, mut up_write) =
|
|
||||||
upstream.into_split();
|
|
||||||
|
|
||||||
// Forward data from edge (via channel) to SmartProxy
|
|
||||||
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
|
|
||||||
let writer_token = stream_token.clone();
|
|
||||||
let wub_tx = writer_tx.clone();
|
|
||||||
let stream_counter_w = Arc::clone(&stream_counter);
|
|
||||||
let writer_for_edge_data = tokio::spawn(async move {
|
|
||||||
let mut consumed_since_update: u32 = 0;
|
|
||||||
loop {
|
|
||||||
tokio::select! {
|
|
||||||
data = data_rx.recv() => {
|
|
||||||
match data {
|
|
||||||
Some(data) => {
|
|
||||||
let len = data.len() as u32;
|
|
||||||
// Check cancellation alongside the write so we respond
|
|
||||||
// promptly to FRAME_CLOSE instead of blocking up to 60s.
|
|
||||||
let write_result = tokio::select! {
|
|
||||||
r = tokio::time::timeout(
|
|
||||||
Duration::from_secs(60),
|
|
||||||
up_write.write_all(&data),
|
|
||||||
) => r,
|
|
||||||
_ = writer_token.cancelled() => break,
|
|
||||||
};
|
|
||||||
match write_result {
|
|
||||||
Ok(Ok(())) => {}
|
|
||||||
Ok(Err(_)) => break,
|
|
||||||
Err(_) => {
|
|
||||||
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Track consumption for adaptive flow control.
|
|
||||||
// Increment capped to adaptive window to limit per-stream in-flight data.
|
|
||||||
consumed_since_update += len;
|
|
||||||
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
|
|
||||||
stream_counter_w.load(Ordering::Relaxed),
|
|
||||||
);
|
|
||||||
let threshold = adaptive_window / 2;
|
|
||||||
if consumed_since_update >= threshold {
|
|
||||||
let increment = consumed_since_update.min(adaptive_window);
|
|
||||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
|
|
||||||
if wub_tx.try_send(frame).is_ok() {
|
|
||||||
consumed_since_update -= increment;
|
|
||||||
}
|
|
||||||
// If try_send fails, keep accumulating — retry on next threshold
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = writer_token.cancelled() => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Send final window update for remaining consumed bytes
|
|
||||||
if consumed_since_update > 0 {
|
|
||||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
|
|
||||||
let _ = wub_tx.try_send(frame);
|
|
||||||
}
|
|
||||||
let _ = up_write.shutdown().await;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Forward data from SmartProxy back to edge via writer channel
|
|
||||||
// with per-stream flow control (check send_window before reading)
|
|
||||||
let mut buf = vec![0u8; 32768];
|
|
||||||
loop {
|
|
||||||
// Wait for send window to have capacity (with stall timeout)
|
|
||||||
loop {
|
|
||||||
let w = send_window.load(Ordering::Acquire);
|
|
||||||
if w > 0 { break; }
|
|
||||||
tokio::select! {
|
|
||||||
_ = window_notify.notified() => continue,
|
|
||||||
_ = stream_token.cancelled() => break,
|
|
||||||
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
|
||||||
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if stream_token.is_cancelled() { break; }
|
|
||||||
|
|
||||||
// Limit read size to available window.
|
|
||||||
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
|
|
||||||
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
|
|
||||||
// which would be falsely interpreted as EOF.
|
|
||||||
let w = send_window.load(Ordering::Acquire) as usize;
|
|
||||||
if w == 0 {
|
|
||||||
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Adaptive: cap read to current per-stream target window
|
|
||||||
let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
|
|
||||||
stream_counter.load(Ordering::Relaxed),
|
|
||||||
) as usize;
|
|
||||||
let max_read = w.min(buf.len()).min(adaptive_cap);
|
|
||||||
|
|
||||||
tokio::select! {
|
|
||||||
read_result = up_read.read(&mut buf[..max_read]) => {
|
|
||||||
match read_result {
|
|
||||||
Ok(0) => break,
|
|
||||||
Ok(n) => {
|
|
||||||
send_window.fetch_sub(n as u32, Ordering::Release);
|
|
||||||
let frame =
|
|
||||||
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
|
|
||||||
if data_writer_tx.send(frame).await.is_err() {
|
|
||||||
log::warn!("Stream {} data channel closed, closing", stream_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = stream_token.cancelled() => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
|
|
||||||
// Use send().await to guarantee delivery (try_send silently drops if full).
|
|
||||||
if !stream_token.is_cancelled() {
|
|
||||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
|
||||||
let _ = data_writer_tx.send(close_frame).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
writer_for_edge_data.abort();
|
|
||||||
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
|
|
||||||
}
|
|
||||||
.await;
|
|
||||||
|
|
||||||
if let Err(e) = result {
|
|
||||||
log::error!("Stream {} error: {}", stream_id, e);
|
|
||||||
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
|
|
||||||
// Use send().await to guarantee delivery.
|
|
||||||
if !stream_token.is_cancelled() {
|
|
||||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
|
||||||
let _ = data_writer_tx.send(close_frame).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up stream (guard against duplicate if FRAME_CLOSE already removed it)
|
|
||||||
let was_present = {
|
|
||||||
let mut s = streams_clone.lock().await;
|
|
||||||
s.remove(&stream_id).is_some()
|
|
||||||
};
|
|
||||||
if was_present {
|
|
||||||
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
|
|
||||||
edge_id: edge_id_clone,
|
|
||||||
stream_id,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
stream_counter.fetch_sub(1, Ordering::Relaxed);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
FRAME_DATA => {
|
|
||||||
// Non-blocking dispatch to per-stream channel.
|
|
||||||
// With flow control, the sender should rarely exceed the channel capacity.
|
|
||||||
let mut s = streams.lock().await;
|
|
||||||
if let Some(state) = s.get(&frame.stream_id) {
|
|
||||||
if state.data_tx.try_send(frame.payload).is_err() {
|
|
||||||
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
|
|
||||||
if let Some(state) = s.remove(&frame.stream_id) {
|
|
||||||
state.cancel_token.cancel();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FRAME_WINDOW_UPDATE => {
|
|
||||||
// Edge consumed data — increase our send window for this stream
|
|
||||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
|
||||||
if increment > 0 {
|
|
||||||
let s = streams.lock().await;
|
|
||||||
if let Some(state) = s.get(&frame.stream_id) {
|
|
||||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
|
||||||
if prev + increment > MAX_WINDOW_SIZE {
|
|
||||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
|
||||||
}
|
|
||||||
state.window_notify.notify_one();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FRAME_CLOSE => {
|
|
||||||
let mut s = streams.lock().await;
|
|
||||||
if let Some(state) = s.remove(&frame.stream_id) {
|
|
||||||
state.cancel_token.cancel();
|
|
||||||
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
|
||||||
edge_id: edge_id.clone(),
|
|
||||||
stream_id: frame.stream_id,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FRAME_PONG => {
|
|
||||||
log::debug!("Received PONG from edge {}", edge_id);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(None) => {
|
|
||||||
log::info!("Edge {} disconnected (EOF)", edge_id);
|
|
||||||
disconnect_reason = "edge_eof".to_string();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::error!("Edge {} frame error: {}", edge_id, e);
|
|
||||||
disconnect_reason = format!("edge_frame_error: {}", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ = ping_ticker.tick() => {
|
remoteingress_protocol::TunnelEvent::Eof => {
|
||||||
let ping_frame = encode_frame(0, FRAME_PING, &[]);
|
log::info!("Edge {} disconnected (EOF)", edge_id);
|
||||||
if frame_writer_tx.try_send(ping_frame).is_err() {
|
disconnect_reason = "edge_eof".to_string();
|
||||||
// Control channel full — skip this PING cycle.
|
break;
|
||||||
// The 45s liveness timeout gives margin for the channel to drain.
|
|
||||||
log::warn!("PING send to edge {} failed, control channel full — skipping", edge_id);
|
|
||||||
}
|
|
||||||
log::trace!("Sent PING to edge {}", edge_id);
|
|
||||||
}
|
}
|
||||||
_ = &mut liveness_deadline => {
|
remoteingress_protocol::TunnelEvent::ReadError(e) => {
|
||||||
|
log::error!("Edge {} frame error: {}", edge_id, e);
|
||||||
|
disconnect_reason = format!("edge_frame_error: {}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
remoteingress_protocol::TunnelEvent::WriteError(e) => {
|
||||||
|
log::error!("Tunnel write error to edge {}: {}", edge_id, e);
|
||||||
|
disconnect_reason = format!("tunnel_write_error: {}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
remoteingress_protocol::TunnelEvent::LivenessTimeout => {
|
||||||
log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting",
|
log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting",
|
||||||
edge_id, liveness_timeout_dur.as_secs());
|
edge_id, liveness_timeout_dur.as_secs());
|
||||||
disconnect_reason = "liveness_timeout".to_string();
|
disconnect_reason = "liveness_timeout".to_string();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ = &mut writer_dead_rx => {
|
remoteingress_protocol::TunnelEvent::Cancelled => {
|
||||||
log::error!("Tunnel writer to edge {} died, disconnecting immediately", edge_id);
|
|
||||||
disconnect_reason = "writer_dead".to_string();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
_ = edge_token.cancelled() => {
|
|
||||||
log::info!("Edge {} cancelled by hub", edge_id);
|
log::info!("Edge {} cancelled by hub", edge_id);
|
||||||
disconnect_reason = "cancelled_by_hub".to_string();
|
disconnect_reason = "cancelled_by_hub".to_string();
|
||||||
break;
|
break;
|
||||||
@@ -796,10 +831,17 @@ async fn handle_edge_connection(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Graceful TLS shutdown: send close_notify so the edge sees a clean disconnect
|
||||||
|
// instead of "peer closed connection without sending TLS close_notify".
|
||||||
|
let mut tls_stream = tunnel_io.into_inner();
|
||||||
|
let _ = tokio::time::timeout(
|
||||||
|
Duration::from_secs(2),
|
||||||
|
tls_stream.shutdown(),
|
||||||
|
).await;
|
||||||
|
|
||||||
// Cleanup: cancel edge token to propagate to all child tasks
|
// Cleanup: cancel edge token to propagate to all child tasks
|
||||||
edge_token.cancel();
|
edge_token.cancel();
|
||||||
config_handle.abort();
|
config_handle.abort();
|
||||||
writer_handle.abort();
|
|
||||||
{
|
{
|
||||||
let mut edges = connected.lock().await;
|
let mut edges = connected.lock().await;
|
||||||
edges.remove(&edge_id);
|
edges.remove(&edge_id);
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ version = "2.0.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tokio = { version = "1", features = ["io-util"] }
|
tokio = { version = "1", features = ["io-util", "sync", "time"] }
|
||||||
|
tokio-util = "0.7"
|
||||||
|
log = "0.4"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio = { version = "1", features = ["io-util", "macros", "rt"] }
|
tokio = { version = "1", features = ["io-util", "macros", "rt"] }
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
use std::collections::VecDeque;
|
||||||
|
use std::future::Future;
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::task::{Context, Poll};
|
||||||
|
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
|
||||||
|
|
||||||
// Frame type constants
|
// Frame type constants
|
||||||
pub const FRAME_OPEN: u8 = 0x01;
|
pub const FRAME_OPEN: u8 = 0x01;
|
||||||
@@ -68,6 +72,16 @@ pub fn encode_frame(stream_id: u32, frame_type: u8, payload: &[u8]) -> Vec<u8> {
|
|||||||
buf
|
buf
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write a frame header into `buf[0..FRAME_HEADER_SIZE]`.
|
||||||
|
/// The caller must ensure payload is already at `buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + payload_len]`.
|
||||||
|
/// This enables zero-copy encoding: read directly into `buf[FRAME_HEADER_SIZE..]`, then
|
||||||
|
/// prepend the header without copying the payload.
|
||||||
|
pub fn encode_frame_header(buf: &mut [u8], stream_id: u32, frame_type: u8, payload_len: usize) {
|
||||||
|
buf[0..4].copy_from_slice(&stream_id.to_be_bytes());
|
||||||
|
buf[4] = frame_type;
|
||||||
|
buf[5..9].copy_from_slice(&(payload_len as u32).to_be_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
/// Build a PROXY protocol v1 header line.
|
/// Build a PROXY protocol v1 header line.
|
||||||
/// Format: `PROXY TCP4 <client_ip> <edge_ip> <client_port> <dest_port>\r\n`
|
/// Format: `PROXY TCP4 <client_ip> <edge_ip> <client_port> <dest_port>\r\n`
|
||||||
pub fn build_proxy_v1_header(
|
pub fn build_proxy_v1_header(
|
||||||
@@ -120,9 +134,13 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
if length > MAX_PAYLOAD_SIZE {
|
if length > MAX_PAYLOAD_SIZE {
|
||||||
|
log::error!(
|
||||||
|
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||||
|
self.header_buf, stream_id, frame_type, length
|
||||||
|
);
|
||||||
return Err(std::io::Error::new(
|
return Err(std::io::Error::new(
|
||||||
std::io::ErrorKind::InvalidData,
|
std::io::ErrorKind::InvalidData,
|
||||||
format!("frame payload too large: {} bytes", length),
|
format!("frame payload too large: {} bytes (header={:02x?})", length, self.header_buf),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,10 +162,303 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// TunnelIo: single-owner I/O multiplexer for the TLS tunnel connection
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Events produced by the TunnelIo event loop.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum TunnelEvent {
|
||||||
|
/// A complete frame was read from the remote side.
|
||||||
|
Frame(Frame),
|
||||||
|
/// The remote side closed the connection (EOF).
|
||||||
|
Eof,
|
||||||
|
/// A read error occurred.
|
||||||
|
ReadError(std::io::Error),
|
||||||
|
/// A write error occurred.
|
||||||
|
WriteError(std::io::Error),
|
||||||
|
/// No frames received for the liveness timeout duration.
|
||||||
|
LivenessTimeout,
|
||||||
|
/// The cancellation token was triggered.
|
||||||
|
Cancelled,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write state extracted into a sub-struct so the borrow checker can see
|
||||||
|
/// disjoint field access between `self.write` and `self.stream`.
|
||||||
|
struct WriteState {
|
||||||
|
ctrl_queue: VecDeque<Vec<u8>>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
|
||||||
|
data_queue: VecDeque<Vec<u8>>, // DATA, DATA_BACK — only when ctrl is empty
|
||||||
|
offset: usize, // progress within current frame being written
|
||||||
|
flush_needed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WriteState {
|
||||||
|
fn has_work(&self) -> bool {
|
||||||
|
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-owner I/O engine for the tunnel TLS connection.
|
||||||
|
///
|
||||||
|
/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex.
|
||||||
|
/// Uses two priority write queues: ctrl frames (PONG, WINDOW_UPDATE, CLOSE, OPEN)
|
||||||
|
/// are ALWAYS written before data frames (DATA, DATA_BACK). This prevents
|
||||||
|
/// WINDOW_UPDATE starvation that causes flow control deadlocks.
|
||||||
|
pub struct TunnelIo<S> {
|
||||||
|
stream: S,
|
||||||
|
// Read state: accumulate bytes, parse frames incrementally
|
||||||
|
read_buf: Vec<u8>,
|
||||||
|
read_pos: usize,
|
||||||
|
parse_pos: usize,
|
||||||
|
// Write state: extracted sub-struct for safe disjoint borrows
|
||||||
|
write: WriteState,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||||
|
pub fn new(stream: S, initial_data: Vec<u8>) -> Self {
|
||||||
|
let read_pos = initial_data.len();
|
||||||
|
let mut read_buf = initial_data;
|
||||||
|
if read_buf.capacity() < 65536 {
|
||||||
|
read_buf.reserve(65536 - read_buf.len());
|
||||||
|
}
|
||||||
|
Self {
|
||||||
|
stream,
|
||||||
|
read_buf,
|
||||||
|
read_pos,
|
||||||
|
parse_pos: 0,
|
||||||
|
write: WriteState {
|
||||||
|
ctrl_queue: VecDeque::new(),
|
||||||
|
data_queue: VecDeque::new(),
|
||||||
|
offset: 0,
|
||||||
|
flush_needed: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Queue a high-priority control frame (PONG, WINDOW_UPDATE, CLOSE, OPEN).
|
||||||
|
pub fn queue_ctrl(&mut self, frame: Vec<u8>) {
|
||||||
|
self.write.ctrl_queue.push_back(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Queue a lower-priority data frame (DATA, DATA_BACK).
|
||||||
|
pub fn queue_data(&mut self, frame: Vec<u8>) {
|
||||||
|
self.write.data_queue.push_back(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to parse a complete frame from the read buffer.
|
||||||
|
/// Uses a parse_pos cursor to avoid drain() on every frame.
|
||||||
|
pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> {
|
||||||
|
let available = self.read_pos - self.parse_pos;
|
||||||
|
if available < FRAME_HEADER_SIZE {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let base = self.parse_pos;
|
||||||
|
let stream_id = u32::from_be_bytes([
|
||||||
|
self.read_buf[base], self.read_buf[base + 1],
|
||||||
|
self.read_buf[base + 2], self.read_buf[base + 3],
|
||||||
|
]);
|
||||||
|
let frame_type = self.read_buf[base + 4];
|
||||||
|
let length = u32::from_be_bytes([
|
||||||
|
self.read_buf[base + 5], self.read_buf[base + 6],
|
||||||
|
self.read_buf[base + 7], self.read_buf[base + 8],
|
||||||
|
]);
|
||||||
|
|
||||||
|
if length > MAX_PAYLOAD_SIZE {
|
||||||
|
let header = [
|
||||||
|
self.read_buf[base], self.read_buf[base + 1],
|
||||||
|
self.read_buf[base + 2], self.read_buf[base + 3],
|
||||||
|
self.read_buf[base + 4], self.read_buf[base + 5],
|
||||||
|
self.read_buf[base + 6], self.read_buf[base + 7],
|
||||||
|
self.read_buf[base + 8],
|
||||||
|
];
|
||||||
|
log::error!(
|
||||||
|
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||||
|
header, stream_id, frame_type, length
|
||||||
|
);
|
||||||
|
return Some(Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!("frame payload too large: {} bytes (header={:02x?})", length, header),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_frame_size = FRAME_HEADER_SIZE + length as usize;
|
||||||
|
if available < total_frame_size {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let payload = self.read_buf[base + FRAME_HEADER_SIZE..base + total_frame_size].to_vec();
|
||||||
|
self.parse_pos += total_frame_size;
|
||||||
|
|
||||||
|
// Compact when parse_pos > half the data to reclaim memory
|
||||||
|
if self.parse_pos > self.read_pos / 2 && self.parse_pos > 0 {
|
||||||
|
self.read_buf.drain(..self.parse_pos);
|
||||||
|
self.read_pos -= self.parse_pos;
|
||||||
|
self.parse_pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Ok(Frame { stream_id, frame_type, payload }))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Poll-based I/O step. Returns Ready on events, Pending when idle.
|
||||||
|
///
|
||||||
|
/// Order: write(ctrl→data) → flush → read → channels → timers
|
||||||
|
pub fn poll_step(
|
||||||
|
&mut self,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
ctrl_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
|
||||||
|
data_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
|
||||||
|
liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
|
||||||
|
cancel_token: &tokio_util::sync::CancellationToken,
|
||||||
|
) -> Poll<TunnelEvent> {
|
||||||
|
// 1. WRITE: drain ctrl queue first, then data queue.
|
||||||
|
// TLS poll_write writes plaintext to session buffer (always Ready).
|
||||||
|
// Batch up to 16 frames per poll cycle.
|
||||||
|
// Safe: `self.write` and `self.stream` are disjoint fields.
|
||||||
|
let mut writes = 0;
|
||||||
|
while self.write.has_work() && writes < 16 {
|
||||||
|
let from_ctrl = !self.write.ctrl_queue.is_empty();
|
||||||
|
let frame = if from_ctrl {
|
||||||
|
self.write.ctrl_queue.front().unwrap()
|
||||||
|
} else {
|
||||||
|
self.write.data_queue.front().unwrap()
|
||||||
|
};
|
||||||
|
let remaining = &frame[self.write.offset..];
|
||||||
|
|
||||||
|
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||||
|
Poll::Ready(Ok(0)) => {
|
||||||
|
return Poll::Ready(TunnelEvent::WriteError(
|
||||||
|
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Poll::Ready(Ok(n)) => {
|
||||||
|
self.write.offset += n;
|
||||||
|
self.write.flush_needed = true;
|
||||||
|
if self.write.offset >= frame.len() {
|
||||||
|
if from_ctrl { self.write.ctrl_queue.pop_front(); }
|
||||||
|
else { self.write.data_queue.pop_front(); }
|
||||||
|
self.write.offset = 0;
|
||||||
|
writes += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
|
||||||
|
Poll::Pending => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. FLUSH: push encrypted data from TLS session to TCP.
|
||||||
|
if self.write.flush_needed {
|
||||||
|
match Pin::new(&mut self.stream).poll_flush(cx) {
|
||||||
|
Poll::Ready(Ok(())) => self.write.flush_needed = false,
|
||||||
|
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
|
||||||
|
Poll::Pending => {} // TCP waker will notify us
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. READ: drain stream until Pending to ensure the TCP waker is always registered.
|
||||||
|
// Without this loop, a Ready return with partial frame data would consume
|
||||||
|
// the waker without re-registering it, causing the task to sleep until a
|
||||||
|
// timer or channel wakes it (potentially 15+ seconds of lost reads).
|
||||||
|
loop {
|
||||||
|
// Compact if needed to make room for reads
|
||||||
|
if self.parse_pos > 0 && self.read_buf.len() - self.read_pos < 32768 {
|
||||||
|
self.read_buf.drain(..self.parse_pos);
|
||||||
|
self.read_pos -= self.parse_pos;
|
||||||
|
self.parse_pos = 0;
|
||||||
|
}
|
||||||
|
if self.read_buf.len() < self.read_pos + 32768 {
|
||||||
|
self.read_buf.resize(self.read_pos + 32768, 0);
|
||||||
|
}
|
||||||
|
let mut rbuf = ReadBuf::new(&mut self.read_buf[self.read_pos..]);
|
||||||
|
match Pin::new(&mut self.stream).poll_read(cx, &mut rbuf) {
|
||||||
|
Poll::Ready(Ok(())) => {
|
||||||
|
let n = rbuf.filled().len();
|
||||||
|
if n == 0 {
|
||||||
|
return Poll::Ready(TunnelEvent::Eof);
|
||||||
|
}
|
||||||
|
self.read_pos += n;
|
||||||
|
if let Some(result) = self.try_parse_frame() {
|
||||||
|
return match result {
|
||||||
|
Ok(frame) => Poll::Ready(TunnelEvent::Frame(frame)),
|
||||||
|
Err(e) => Poll::Ready(TunnelEvent::ReadError(e)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Partial data — loop to call poll_read again so the TCP
|
||||||
|
// waker is re-registered when it finally returns Pending.
|
||||||
|
}
|
||||||
|
Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::ReadError(e)),
|
||||||
|
Poll::Pending => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. CHANNELS: drain ctrl into ctrl_queue, data into data_queue.
|
||||||
|
let mut got_new = false;
|
||||||
|
loop {
|
||||||
|
match ctrl_rx.poll_recv(cx) {
|
||||||
|
Poll::Ready(Some(frame)) => { self.write.ctrl_queue.push_back(frame); got_new = true; }
|
||||||
|
Poll::Ready(None) => {
|
||||||
|
return Poll::Ready(TunnelEvent::WriteError(
|
||||||
|
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "ctrl channel closed"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Poll::Pending => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
match data_rx.poll_recv(cx) {
|
||||||
|
Poll::Ready(Some(frame)) => { self.write.data_queue.push_back(frame); got_new = true; }
|
||||||
|
Poll::Ready(None) => {
|
||||||
|
return Poll::Ready(TunnelEvent::WriteError(
|
||||||
|
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Poll::Pending => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. TIMERS
|
||||||
|
if liveness_deadline.as_mut().poll(cx).is_ready() {
|
||||||
|
return Poll::Ready(TunnelEvent::LivenessTimeout);
|
||||||
|
}
|
||||||
|
if cancel_token.is_cancelled() {
|
||||||
|
return Poll::Ready(TunnelEvent::Cancelled);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. SELF-WAKE: only when we have frames AND flush is done.
|
||||||
|
// If flush is pending, the TCP write-readiness waker will notify us.
|
||||||
|
// If we got new channel frames, wake to write them.
|
||||||
|
if got_new || (!self.write.flush_needed && self.write.has_work()) {
|
||||||
|
cx.waker().wake_by_ref();
|
||||||
|
}
|
||||||
|
|
||||||
|
Poll::Pending
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_inner(self) -> S {
|
||||||
|
self.stream
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_encode_frame_header() {
|
||||||
|
let payload = b"hello";
|
||||||
|
let mut buf = vec![0u8; FRAME_HEADER_SIZE + payload.len()];
|
||||||
|
buf[FRAME_HEADER_SIZE..].copy_from_slice(payload);
|
||||||
|
encode_frame_header(&mut buf, 42, FRAME_DATA, payload.len());
|
||||||
|
assert_eq!(buf, encode_frame(42, FRAME_DATA, payload));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_encode_frame_header_empty_payload() {
|
||||||
|
let mut buf = vec![0u8; FRAME_HEADER_SIZE];
|
||||||
|
encode_frame_header(&mut buf, 99, FRAME_CLOSE, 0);
|
||||||
|
assert_eq!(buf, encode_frame(99, FRAME_CLOSE, &[]));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_encode_frame() {
|
fn test_encode_frame() {
|
||||||
let data = b"hello";
|
let data = b"hello";
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@serve.zone/remoteingress',
|
name: '@serve.zone/remoteingress',
|
||||||
version: '4.8.0',
|
version: '4.8.8',
|
||||||
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
|
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user