Compare commits
55 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d89d1cfbbf | |||
| 6cbe8bee5e | |||
| a63247af3e | |||
| 28a0c769d9 | |||
| ce7ccd83dc | |||
| 93578d7034 | |||
| 4cfc518301 | |||
| 124df129ec | |||
| 0b8420aac9 | |||
| afd193336a | |||
| e8d429f117 | |||
| 3c2299430a | |||
| 8b5df9a0b7 | |||
| 236d6d16ee | |||
| 81bbb33016 | |||
| 79af6fd425 | |||
| f71b2f1876 | |||
| 0161a2589c | |||
| bfd9e58b4f | |||
| 9a8760c18d | |||
| c77caa89fc | |||
| 04586aab39 | |||
| f9a739858d | |||
| da01fbeecd | |||
| 264e8eeb97 | |||
| 9922c3b020 | |||
| 38cde37cff | |||
| 64572827e5 | |||
| c4e26198b9 | |||
| 0b5d72de28 | |||
| e8431c0174 | |||
| d57d6395dd | |||
| 2e5ceeaf5c | |||
| 1979910f6f | |||
| edfad2dffe | |||
| d907943ae5 | |||
| 4bfb1244fc | |||
| e31c3421a6 | |||
| de8422966a | |||
| a87e9578eb | |||
| b851bc7994 | |||
| 1284bb5b73 | |||
| 1afd0e5347 | |||
| 96e7ab00cf | |||
| 17d1a795cd | |||
| 982f648928 | |||
| 3a2a060a85 | |||
| e0c469147e | |||
| 0fdcdf566e | |||
| a808d4c9de | |||
| f8a0171ef3 | |||
| 1d59a48648 | |||
| af2ec11a2d | |||
| b6e66a7fa6 | |||
| 1391b39601 |
171
changelog.md
171
changelog.md
@@ -1,5 +1,176 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-03-18 - 4.9.0 - feat(protocol)
|
||||
add sustained-stream tunnel scheduling to isolate high-throughput traffic
|
||||
|
||||
- Introduce a third low-priority sustained queue in TunnelIo with a forced drain budget to prevent long-lived high-bandwidth streams from starving control and normal data frames.
|
||||
- Classify upload and download streams as sustained after exceeding the throughput threshold for the minimum duration, and route their DATA and CLOSE frames through the sustained channel.
|
||||
- Wire the new sustained channel through edge and hub stream handling so sustained traffic is scheduled consistently on both sides of the tunnel.
|
||||
|
||||
## 2026-03-18 - 4.8.19 - fix(remoteingress-protocol)
|
||||
reduce per-stream flow control windows and increase control channel buffering
|
||||
|
||||
- Lower the initial and maximum per-stream window from 16MB to 4MB and scale adaptive windows against a 200MB total budget with a 1MB minimum.
|
||||
- Increase edge and hub control frame channel capacity from 256 to 512 to better handle prioritized control traffic.
|
||||
- Update flow-control tests and comments to reflect the new window sizing and budget behavior.
|
||||
|
||||
## 2026-03-17 - 4.8.18 - fix(rust-protocol)
|
||||
switch tunnel frame buffers from Vec<u8> to Bytes to reduce copying and memory overhead
|
||||
|
||||
- Add the bytes crate to core and protocol crates
|
||||
- Update frame encoding, reader payloads, channel queues, and stream backchannels to use Bytes
|
||||
- Adjust edge and hub data/control paths to send framed payloads as Bytes
|
||||
|
||||
## 2026-03-17 - 4.8.17 - fix(protocol)
|
||||
increase per-stream flow control windows and remove adaptive read caps
|
||||
|
||||
- Raise the initial per-stream window from 4MB to 16MB and expand the adaptive window budget to 800MB with a 4MB floor
|
||||
- Stop limiting edge and hub reads by the adaptive per-stream target window, keeping reads capped only by the current window and 32KB chunk size
|
||||
- Update protocol tests to match the new adaptive window scaling and budget boundaries
|
||||
|
||||
## 2026-03-17 - 4.8.16 - fix(release)
|
||||
bump package version to 4.8.15
|
||||
|
||||
- Updates the package.json version field from 4.8.13 to 4.8.15.
|
||||
|
||||
## 2026-03-17 - 4.8.13 - fix(remoteingress-protocol)
|
||||
require a flush after each written frame to bound TLS buffer growth
|
||||
|
||||
- Remove the unflushed byte threshold and stop queueing additional writes while a flush is pending
|
||||
- Simplify write and flush error logging after dropping unflushed byte tracking
|
||||
- Update tunnel I/O comments to reflect the stricter flush behavior that avoids OOM and connection resets
|
||||
|
||||
## 2026-03-17 - 4.8.12 - fix(tunnel)
|
||||
prevent tunnel backpressure buffering from exhausting memory and cancel stream handlers before TLS shutdown
|
||||
|
||||
- stop self-waking and writing new frames while a flush is pending to avoid unbounded TLS session buffer growth under load
|
||||
- reorder edge and hub shutdown cleanup so stream cancellation happens before TLS close_notify, preventing handlers from blocking on dead channels
|
||||
- add load tests covering sustained large transfers, burst traffic, and rapid stream churn to verify tunnel stability
|
||||
|
||||
## 2026-03-17 - 4.8.11 - fix(remoteingress-core)
|
||||
stop data frame send loops promptly when stream cancellation is triggered
|
||||
|
||||
- Use cancellation-aware tokio::select! around data channel sends in both edge and hub stream forwarding paths
|
||||
- Prevent stalled or noisy shutdown behavior when stream or client cancellation happens while awaiting frame delivery
|
||||
|
||||
## 2026-03-17 - 4.8.10 - fix(remoteingress-core)
|
||||
guard tunnel frame sends with cancellation to prevent async send deadlocks
|
||||
|
||||
- Wrap OPEN, CLOSE, CLOSE_BACK, WINDOW_UPDATE, and cleanup channel sends in cancellation-aware tokio::select! blocks.
|
||||
- Avoid indefinite blocking when tunnel, stream, or writer tasks are cancelled while awaiting channel capacity.
|
||||
- Improve shutdown reliability for edge and hub stream handling under tunnel failure conditions.
|
||||
|
||||
## 2026-03-17 - 4.8.9 - fix(repo)
|
||||
no changes to commit
|
||||
|
||||
|
||||
## 2026-03-17 - 4.8.8 - fix(remoteingress-core)
|
||||
cancel stale edge connections when an edge reconnects
|
||||
|
||||
- Remove any existing edge entry before registering a reconnected edge
|
||||
- Trigger the previous connection's cancellation token so stale sessions shut down immediately instead of waiting for TCP keepalive
|
||||
|
||||
## 2026-03-17 - 4.8.7 - fix(remoteingress-core)
|
||||
perform graceful TLS shutdown on edge and hub tunnel streams
|
||||
|
||||
- Send TLS close_notify before cleanup to avoid peer disconnect warnings on both tunnel endpoints
|
||||
- Wrap stream shutdown in a 2 second timeout so connection teardown does not block cleanup
|
||||
|
||||
## 2026-03-17 - 4.8.6 - fix(remoteingress-core)
|
||||
initialize disconnect reason only when set in hub loop break paths
|
||||
|
||||
- Replace the default "unknown" disconnect reason with an explicitly assigned string and document that all hub loop exits set it before use
|
||||
- Add an allow attribute for unused assignments to avoid warnings around the deferred initialization pattern
|
||||
|
||||
## 2026-03-17 - 4.8.5 - fix(repo)
|
||||
no changes to commit
|
||||
|
||||
|
||||
## 2026-03-17 - 4.8.4 - fix(remoteingress-core)
|
||||
prevent stream stalls by guaranteeing flow-control updates and avoiding bounded per-stream channel overflows
|
||||
|
||||
- Replace bounded per-stream data channels with unbounded channels on edge and hub, relying on existing WINDOW_UPDATE flow control to limit bytes in flight
|
||||
- Use awaited sends for FRAME_WINDOW_UPDATE and FRAME_WINDOW_UPDATE_BACK so updates are not dropped and streams do not deadlock under backpressure
|
||||
- Clean up stream state when channel receivers have already exited instead of closing active streams because a bounded queue filled
|
||||
|
||||
## 2026-03-17 - 4.8.3 - fix(protocol,edge)
|
||||
optimize tunnel frame handling and zero-copy uploads in edge I/O
|
||||
|
||||
- extract hub frame processing into a shared edge handler to remove duplicated tunnel logic
|
||||
- add zero-copy frame header encoding and read payloads directly into framed buffers for client-to-hub uploads
|
||||
- refactor TunnelIo read/write state to avoid unsafe queue access and reduce buffer churn with incremental parsing
|
||||
|
||||
## 2026-03-17 - 4.8.2 - fix(rust-edge)
|
||||
refactor tunnel I/O to preserve TLS state and prioritize control frames
|
||||
|
||||
- replace split TLS handling with a single-owner TunnelIo to avoid handshake and buffered read corruption
|
||||
- prioritize control frames over data frames to prevent WINDOW_UPDATE starvation and flow-control deadlocks
|
||||
- improve tunnel reliability with incremental frame parsing, liveness/error events, and corrupt frame header logging
|
||||
|
||||
## 2026-03-17 - 4.8.1 - fix(remoteingress-core)
|
||||
remove tunnel writer timeouts from edge and hub buffered writes
|
||||
|
||||
- Drops the 30 second timeout wrapper around writer.write_all and writer.flush in both edge and hub tunnel writers.
|
||||
- Updates error logging to report write failures without referring to stalled writes.
|
||||
|
||||
## 2026-03-17 - 4.8.0 - feat(events)
|
||||
include disconnect reasons in edge and hub management events
|
||||
|
||||
- Add reason fields to tunnelDisconnected and edgeDisconnected events emitted from the Rust core and binary bridge
|
||||
- Propagate specific disconnect causes such as EOF, liveness timeout, writer failure, handshake failure, and hub cancellation
|
||||
- Update TypeScript edge and hub classes to log and forward disconnect reason data
|
||||
- Extend serialization tests to cover the new reason fields
|
||||
|
||||
## 2026-03-17 - 4.7.2 - fix(remoteingress-core)
|
||||
add tunnel write timeouts and scale initial stream windows by active stream count
|
||||
|
||||
- Wrap tunnel frame writes and flushes in a 30-second timeout on both edge and hub to detect stalled writers and trigger faster reconnect or cleanup.
|
||||
- Compute each stream's initial send window from the current active stream count instead of using a fixed window to keep total in-flight data within the 32MB budget.
|
||||
|
||||
## 2026-03-17 - 4.7.1 - fix(remoteingress-core)
|
||||
improve tunnel failure detection and reconnect handling
|
||||
|
||||
- Enable TCP keepalive on edge and hub connections to detect silent network failures sooner
|
||||
- Trigger immediate reconnect or disconnect when tunnel writer tasks fail instead of waiting for liveness timeouts
|
||||
- Prevent active stream counter underflow during concurrent connection cleanup
|
||||
|
||||
## 2026-03-16 - 4.7.0 - feat(edge,protocol,test)
|
||||
add configurable edge bind address and expand flow-control test coverage
|
||||
|
||||
- adds an optional bindAddress configuration for edge TCP listeners, defaulting to 0.0.0.0 when not provided
|
||||
- passes bindAddress through the TypeScript edge client and Rust edge runtime so local test setups can bind to localhost
|
||||
- adds protocol unit tests for adaptive stream window sizing and window update frame encoding/decoding
|
||||
- introduces end-to-end flow-control tests and updates the test script to build before running tests
|
||||
|
||||
## 2026-03-16 - 4.6.1 - fix(remoteingress-core)
|
||||
avoid spurious tunnel disconnect events and increase control channel capacity
|
||||
|
||||
- Emit TunnelDisconnected only after an established connection is actually lost, preventing false disconnect events during failed reconnect attempts.
|
||||
- Increase edge and hub control-channel buffer sizes from 64 to 256 to better prioritize control frames under load.
|
||||
|
||||
## 2026-03-16 - 4.6.0 - feat(remoteingress-core)
|
||||
add adaptive per-stream flow control based on active stream counts
|
||||
|
||||
- Track active stream counts on edge and hub connections to size per-stream flow control windows dynamically.
|
||||
- Cap WINDOW_UPDATE increments and read sizes to the adaptive window so bandwidth is shared more evenly across concurrent streams.
|
||||
- Apply the adaptive logic to both upload and download paths on edge and hub stream handlers.
|
||||
|
||||
## 2026-03-16 - 4.5.12 - fix(remoteingress-core)
|
||||
improve tunnel liveness handling and enable TCP keepalive for accepted client sockets
|
||||
|
||||
- Avoid disconnecting edges when PING or PONG frames cannot be queued because the control channel is temporarily full.
|
||||
- Enable TCP_NODELAY and TCP keepalive on accepted client connections to help detect stale or dropped clients.
|
||||
|
||||
## 2026-03-16 - 4.5.11 - fix(repo)
|
||||
no changes to commit
|
||||
|
||||
|
||||
## 2026-03-16 - 4.5.10 - fix(remoteingress-core)
|
||||
guard zero-window reads to avoid false EOF handling on stalled streams
|
||||
|
||||
- Prevent upload and download loops from calling read on an empty buffer when flow-control window remains at 0 after stall timeout
|
||||
- Log a warning and close the affected stream instead of misinterpreting Ok(0) as end-of-file
|
||||
|
||||
## 2026-03-16 - 4.5.9 - fix(remoteingress-core)
|
||||
delay stream close until downstream response draining finishes to prevent truncated transfers
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@serve.zone/remoteingress",
|
||||
"version": "4.5.9",
|
||||
"version": "4.9.0",
|
||||
"private": false,
|
||||
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
|
||||
"main": "dist_ts/index.js",
|
||||
@@ -9,7 +9,7 @@
|
||||
"author": "Task Venture Capital GmbH",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"test": "(tstest test/ --verbose --logfile --timeout 60)",
|
||||
"test": "(pnpm run build && tstest test/ --verbose --logfile --timeout 60)",
|
||||
"build": "(tsbuild tsfolders --allowimplicitany && tsrust)",
|
||||
"buildDocs": "(tsdoc)"
|
||||
},
|
||||
|
||||
17
rust/Cargo.lock
generated
17
rust/Cargo.lock
generated
@@ -551,6 +551,7 @@ dependencies = [
|
||||
name = "remoteingress-core"
|
||||
version = "2.0.0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"log",
|
||||
"rcgen",
|
||||
"remoteingress-protocol",
|
||||
@@ -558,6 +559,7 @@ dependencies = [
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"socket2 0.5.10",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
@@ -567,7 +569,10 @@ dependencies = [
|
||||
name = "remoteingress-protocol"
|
||||
version = "2.0.0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"log",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -701,6 +706,16 @@ version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.6.2"
|
||||
@@ -765,7 +780,7 @@ dependencies = [
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
"socket2 0.6.2",
|
||||
"tokio-macros",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
@@ -173,10 +173,10 @@ async fn handle_request(
|
||||
serde_json::json!({ "edgeId": edge_id, "peerAddr": peer_addr }),
|
||||
);
|
||||
}
|
||||
HubEvent::EdgeDisconnected { edge_id } => {
|
||||
HubEvent::EdgeDisconnected { edge_id, reason } => {
|
||||
send_event(
|
||||
"edgeDisconnected",
|
||||
serde_json::json!({ "edgeId": edge_id }),
|
||||
serde_json::json!({ "edgeId": edge_id, "reason": reason }),
|
||||
);
|
||||
}
|
||||
HubEvent::StreamOpened {
|
||||
@@ -295,8 +295,8 @@ async fn handle_request(
|
||||
EdgeEvent::TunnelConnected => {
|
||||
send_event("tunnelConnected", serde_json::json!({}));
|
||||
}
|
||||
EdgeEvent::TunnelDisconnected => {
|
||||
send_event("tunnelDisconnected", serde_json::json!({}));
|
||||
EdgeEvent::TunnelDisconnected { reason } => {
|
||||
send_event("tunnelDisconnected", serde_json::json!({ "reason": reason }));
|
||||
}
|
||||
EdgeEvent::PublicIpDiscovered { ip } => {
|
||||
send_event(
|
||||
|
||||
@@ -7,6 +7,7 @@ edition = "2021"
|
||||
remoteingress-protocol = { path = "../remoteingress-protocol" }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-rustls = "0.26"
|
||||
bytes = "1"
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
|
||||
rcgen = "0.13"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
@@ -14,3 +15,4 @@ serde_json = "1"
|
||||
log = "0.4"
|
||||
rustls-pemfile = "2"
|
||||
tokio-util = "0.7"
|
||||
socket2 = "0.5"
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::sync::{mpsc, Mutex, Notify, RwLock};
|
||||
use tokio::task::JoinHandle;
|
||||
@@ -11,12 +11,23 @@ use tokio_rustls::TlsConnector;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use bytes::Bytes;
|
||||
use remoteingress_protocol::*;
|
||||
|
||||
type EdgeTlsStream = tokio_rustls::client::TlsStream<TcpStream>;
|
||||
|
||||
/// Result of processing a frame (shared with hub.rs pattern).
|
||||
#[allow(dead_code)]
|
||||
enum EdgeFrameAction {
|
||||
Continue,
|
||||
Disconnect(String),
|
||||
}
|
||||
|
||||
/// Per-stream state tracked in the edge's client_writers map.
|
||||
struct EdgeStreamState {
|
||||
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
||||
back_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Unbounded channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
||||
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
|
||||
back_tx: mpsc::UnboundedSender<Bytes>,
|
||||
/// Send window for FRAME_DATA (upload direction).
|
||||
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
|
||||
send_window: Arc<AtomicU32>,
|
||||
@@ -32,6 +43,10 @@ pub struct EdgeConfig {
|
||||
pub hub_port: u16,
|
||||
pub edge_id: String,
|
||||
pub secret: String,
|
||||
/// Optional bind address for TCP listeners (defaults to "0.0.0.0").
|
||||
/// Useful for testing on localhost where edge and upstream share the same machine.
|
||||
#[serde(default)]
|
||||
pub bind_address: Option<String>,
|
||||
}
|
||||
|
||||
/// Handshake config received from hub after authentication.
|
||||
@@ -60,7 +75,8 @@ struct ConfigUpdate {
|
||||
#[serde(tag = "type")]
|
||||
pub enum EdgeEvent {
|
||||
TunnelConnected,
|
||||
TunnelDisconnected,
|
||||
#[serde(rename_all = "camelCase")]
|
||||
TunnelDisconnected { reason: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
PublicIpDiscovered { ip: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -232,7 +248,16 @@ async fn edge_main_loop(
|
||||
}
|
||||
|
||||
*connected.write().await = false;
|
||||
let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected);
|
||||
// Extract reason for disconnect event
|
||||
let reason = match &result {
|
||||
EdgeLoopResult::Reconnect(r) => r.clone(),
|
||||
EdgeLoopResult::Shutdown => "shutdown".to_string(),
|
||||
};
|
||||
// Only emit disconnect event on actual disconnection, not on failed reconnects.
|
||||
// Failed reconnects never reach line 335 (handshake success), so was_connected is false.
|
||||
if was_connected {
|
||||
let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected { reason: reason.clone() });
|
||||
}
|
||||
active_streams.store(0, Ordering::Relaxed);
|
||||
// Reset stream ID counter for next connection cycle
|
||||
next_stream_id.store(1, Ordering::Relaxed);
|
||||
@@ -240,7 +265,7 @@ async fn edge_main_loop(
|
||||
|
||||
match result {
|
||||
EdgeLoopResult::Shutdown => break,
|
||||
EdgeLoopResult::Reconnect => {
|
||||
EdgeLoopResult::Reconnect(_) => {
|
||||
log::info!("Reconnecting in {}ms...", backoff_ms);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_millis(backoff_ms)) => {}
|
||||
@@ -255,7 +280,89 @@ async fn edge_main_loop(
|
||||
|
||||
enum EdgeLoopResult {
|
||||
Shutdown,
|
||||
Reconnect,
|
||||
Reconnect(String), // reason for disconnection
|
||||
}
|
||||
|
||||
/// Process a single frame received from the hub side of the tunnel.
|
||||
/// Handles FRAME_DATA_BACK, FRAME_WINDOW_UPDATE_BACK, FRAME_CLOSE_BACK, FRAME_CONFIG, FRAME_PING.
|
||||
async fn handle_edge_frame(
|
||||
frame: Frame,
|
||||
tunnel_io: &mut remoteingress_protocol::TunnelIo<EdgeTlsStream>,
|
||||
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
|
||||
listen_ports: &Arc<RwLock<Vec<u16>>>,
|
||||
event_tx: &mpsc::Sender<EdgeEvent>,
|
||||
tunnel_writer_tx: &mpsc::Sender<Bytes>,
|
||||
tunnel_data_tx: &mpsc::Sender<Bytes>,
|
||||
tunnel_sustained_tx: &mpsc::Sender<Bytes>,
|
||||
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
|
||||
active_streams: &Arc<AtomicU32>,
|
||||
next_stream_id: &Arc<AtomicU32>,
|
||||
edge_id: &str,
|
||||
connection_token: &CancellationToken,
|
||||
bind_address: &str,
|
||||
) -> EdgeFrameAction {
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
|
||||
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
|
||||
// fails if the receiver is dropped (hub_to_client task already exited).
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.send(frame.payload).is_err() {
|
||||
// Receiver dropped — hub_to_client task already exited, clean up
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_WINDOW_UPDATE_BACK => {
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_CLOSE_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
FRAME_CONFIG => {
|
||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||
*listen_ports.write().await = update.listen_ports.clone();
|
||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||
listen_ports: update.listen_ports.clone(),
|
||||
});
|
||||
apply_port_config(
|
||||
&update.listen_ports,
|
||||
port_listeners,
|
||||
tunnel_writer_tx,
|
||||
tunnel_data_tx,
|
||||
tunnel_sustained_tx,
|
||||
client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
edge_id,
|
||||
connection_token,
|
||||
bind_address,
|
||||
);
|
||||
}
|
||||
}
|
||||
FRAME_PING => {
|
||||
// Queue PONG directly — no channel round-trip, guaranteed delivery
|
||||
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PONG, &[]));
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||
}
|
||||
}
|
||||
EdgeFrameAction::Continue
|
||||
}
|
||||
|
||||
async fn connect_to_hub_and_run(
|
||||
@@ -276,53 +383,70 @@ async fn connect_to_hub_and_run(
|
||||
Ok(s) => {
|
||||
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
|
||||
let _ = s.set_nodelay(true);
|
||||
// TCP keepalive detects silent network failures (NAT timeout, path change)
|
||||
// faster than the 45s application-level liveness timeout.
|
||||
let ka = socket2::TcpKeepalive::new()
|
||||
.with_time(Duration::from_secs(30));
|
||||
#[cfg(target_os = "linux")]
|
||||
let ka = ka.with_interval(Duration::from_secs(10));
|
||||
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(&ka);
|
||||
s
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to connect to hub at {}: {}", addr, e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("tcp_connect_failed: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
let server_name = rustls::pki_types::ServerName::try_from(config.hub_host.clone())
|
||||
.unwrap_or_else(|_| rustls::pki_types::ServerName::try_from("remoteingress-hub".to_string()).unwrap());
|
||||
|
||||
let tls_stream = match connector.connect(server_name, tcp).await {
|
||||
let mut tls_stream = match connector.connect(server_name, tcp).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
log::error!("TLS handshake failed: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("tls_handshake_failed: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
let (read_half, mut write_half) = tokio::io::split(tls_stream);
|
||||
|
||||
// Send auth line
|
||||
// Send auth line (we own the whole stream — no split)
|
||||
let auth_line = format!("EDGE {} {}\n", config.edge_id, config.secret);
|
||||
if write_half.write_all(auth_line.as_bytes()).await.is_err() {
|
||||
return EdgeLoopResult::Reconnect;
|
||||
if tls_stream.write_all(auth_line.as_bytes()).await.is_err() {
|
||||
return EdgeLoopResult::Reconnect("auth_write_failed".to_string());
|
||||
}
|
||||
if tls_stream.flush().await.is_err() {
|
||||
return EdgeLoopResult::Reconnect("auth_flush_failed".to_string());
|
||||
}
|
||||
|
||||
// Read handshake response line from hub (JSON with initial config)
|
||||
let mut buf_reader = BufReader::new(read_half);
|
||||
let mut handshake_line = String::new();
|
||||
match buf_reader.read_line(&mut handshake_line).await {
|
||||
Ok(0) => {
|
||||
log::error!("Hub rejected connection (EOF before handshake)");
|
||||
return EdgeLoopResult::Reconnect;
|
||||
}
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
log::error!("Failed to read handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
// Read handshake line byte-by-byte (no BufReader — into_inner corrupts TLS state)
|
||||
let mut handshake_bytes = Vec::with_capacity(512);
|
||||
let mut byte = [0u8; 1];
|
||||
loop {
|
||||
match tls_stream.read_exact(&mut byte).await {
|
||||
Ok(_) => {
|
||||
handshake_bytes.push(byte[0]);
|
||||
if byte[0] == b'\n' { break; }
|
||||
if handshake_bytes.len() > 8192 {
|
||||
return EdgeLoopResult::Reconnect("handshake_too_long".to_string());
|
||||
}
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
|
||||
log::error!("Hub rejected connection (EOF before handshake)");
|
||||
return EdgeLoopResult::Reconnect("hub_rejected_eof".to_string());
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Failed to read handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect(format!("handshake_read_failed: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
let handshake_line = String::from_utf8_lossy(&handshake_bytes);
|
||||
|
||||
let handshake: HandshakeConfig = match serde_json::from_str(handshake_line.trim()) {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
log::error!("Invalid handshake response: {}", e);
|
||||
return EdgeLoopResult::Reconnect;
|
||||
return EdgeLoopResult::Reconnect(format!("handshake_invalid: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -373,170 +497,119 @@ async fn connect_to_hub_and_run(
|
||||
let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
// QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN)
|
||||
// have priority over data frames (DATA). Prevents PING starvation under load.
|
||||
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
|
||||
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
||||
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
|
||||
// QoS dual-channel: ctrl frames have priority over data frames.
|
||||
// Stream handlers send through these channels → TunnelIo drains them.
|
||||
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Bytes>(512);
|
||||
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Bytes>(4096);
|
||||
let (tunnel_sustained_tx, mut tunnel_sustained_rx) = mpsc::channel::<Bytes>(4096);
|
||||
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
|
||||
let tw_token = connection_token.clone();
|
||||
let tunnel_writer_handle = tokio::spawn(async move {
|
||||
// BufWriter coalesces small writes (frame headers, control frames) into fewer
|
||||
// TLS records and syscalls. Flushed after each frame to avoid holding data.
|
||||
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased; // control frames always take priority over data
|
||||
ctrl = tunnel_ctrl_rx.recv() => {
|
||||
match ctrl {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { break; }
|
||||
if writer.flush().await.is_err() { break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
data = tunnel_data_rx.recv() => {
|
||||
match data {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { break; }
|
||||
if writer.flush().await.is_err() { break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = tw_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Start TCP listeners for initial ports (hot-reloadable)
|
||||
// Start TCP listeners for initial ports
|
||||
let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new();
|
||||
let bind_address = config.bind_address.as_deref().unwrap_or("0.0.0.0");
|
||||
apply_port_config(
|
||||
&handshake.listen_ports,
|
||||
&mut port_listeners,
|
||||
&tunnel_writer_tx,
|
||||
&tunnel_data_tx,
|
||||
&tunnel_sustained_tx,
|
||||
&client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
&config.edge_id,
|
||||
connection_token,
|
||||
bind_address,
|
||||
);
|
||||
|
||||
// Heartbeat: liveness timeout detects silent hub failures
|
||||
// Single-owner I/O engine — no tokio::io::split, no mutex
|
||||
let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());
|
||||
|
||||
|
||||
let liveness_timeout_dur = Duration::from_secs(45);
|
||||
let mut last_activity = Instant::now();
|
||||
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
||||
|
||||
// Read frames from hub
|
||||
let mut frame_reader = FrameReader::new(buf_reader);
|
||||
let result = loop {
|
||||
tokio::select! {
|
||||
frame_result = frame_reader.next_frame() => {
|
||||
match frame_result {
|
||||
Ok(Some(frame)) => {
|
||||
// Reset liveness on any received frame
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
let result = 'io_loop: loop {
|
||||
// Drain any buffered frames
|
||||
loop {
|
||||
let frame = match tunnel_io.try_parse_frame() {
|
||||
Some(Ok(f)) => f,
|
||||
Some(Err(e)) => {
|
||||
log::error!("Hub frame error: {}", e);
|
||||
break 'io_loop EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
|
||||
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
|
||||
&tunnel_writer_tx, &tunnel_data_tx, &tunnel_sustained_tx, &mut port_listeners,
|
||||
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
|
||||
).await {
|
||||
break 'io_loop EdgeLoopResult::Reconnect(reason);
|
||||
}
|
||||
}
|
||||
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
// Non-blocking dispatch to per-stream channel.
|
||||
// With flow control, the sender should rarely exceed the channel capacity.
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} back-channel full, closing stream", frame.stream_id);
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_WINDOW_UPDATE_BACK => {
|
||||
// Hub consumed data — increase our send window for this stream (upload direction)
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_CLOSE_BACK => {
|
||||
let mut writers = client_writers.lock().await;
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
FRAME_CONFIG => {
|
||||
if let Ok(update) = serde_json::from_slice::<ConfigUpdate>(&frame.payload) {
|
||||
log::info!("Config update from hub: ports {:?}", update.listen_ports);
|
||||
*listen_ports.write().await = update.listen_ports.clone();
|
||||
let _ = event_tx.try_send(EdgeEvent::PortsUpdated {
|
||||
listen_ports: update.listen_ports.clone(),
|
||||
});
|
||||
apply_port_config(
|
||||
&update.listen_ports,
|
||||
&mut port_listeners,
|
||||
&tunnel_writer_tx,
|
||||
&tunnel_data_tx,
|
||||
&client_writers,
|
||||
active_streams,
|
||||
next_stream_id,
|
||||
&config.edge_id,
|
||||
connection_token,
|
||||
);
|
||||
}
|
||||
}
|
||||
FRAME_PING => {
|
||||
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
|
||||
if tunnel_writer_tx.try_send(pong_frame).is_err() {
|
||||
log::warn!("Failed to send PONG, writer channel full/closed");
|
||||
break EdgeLoopResult::Reconnect;
|
||||
}
|
||||
log::trace!("Received PING from hub, sent PONG");
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from hub", frame.frame_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
log::info!("Hub disconnected (EOF)");
|
||||
break EdgeLoopResult::Reconnect;
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Hub frame error: {}", e);
|
||||
break EdgeLoopResult::Reconnect;
|
||||
}
|
||||
// Poll I/O: write(ctrl→data), flush, read, channels, timers
|
||||
let event = std::future::poll_fn(|cx| {
|
||||
tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut tunnel_sustained_rx, &mut liveness_deadline, connection_token)
|
||||
}).await;
|
||||
|
||||
match event {
|
||||
remoteingress_protocol::TunnelEvent::Frame(frame) => {
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
|
||||
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
|
||||
&tunnel_writer_tx, &tunnel_data_tx, &tunnel_sustained_tx, &mut port_listeners,
|
||||
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
|
||||
).await {
|
||||
break EdgeLoopResult::Reconnect(reason);
|
||||
}
|
||||
}
|
||||
_ = &mut liveness_deadline => {
|
||||
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting",
|
||||
liveness_timeout_dur.as_secs());
|
||||
break EdgeLoopResult::Reconnect;
|
||||
remoteingress_protocol::TunnelEvent::Eof => {
|
||||
log::info!("Hub disconnected (EOF)");
|
||||
break EdgeLoopResult::Reconnect("hub_eof".to_string());
|
||||
}
|
||||
_ = connection_token.cancelled() => {
|
||||
log::info!("Connection cancelled");
|
||||
break EdgeLoopResult::Shutdown;
|
||||
remoteingress_protocol::TunnelEvent::ReadError(e) => {
|
||||
log::error!("Hub frame read error: {}", e);
|
||||
break EdgeLoopResult::Reconnect(format!("hub_frame_error: {}", e));
|
||||
}
|
||||
_ = shutdown_rx.recv() => {
|
||||
remoteingress_protocol::TunnelEvent::WriteError(e) => {
|
||||
log::error!("Tunnel write error: {}", e);
|
||||
break EdgeLoopResult::Reconnect(format!("tunnel_write_error: {}", e));
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::LivenessTimeout => {
|
||||
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting", liveness_timeout_dur.as_secs());
|
||||
break EdgeLoopResult::Reconnect("liveness_timeout".to_string());
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::Cancelled => {
|
||||
if shutdown_rx.try_recv().is_ok() {
|
||||
break EdgeLoopResult::Shutdown;
|
||||
}
|
||||
break EdgeLoopResult::Shutdown;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Cancel connection token to propagate to all child tasks BEFORE aborting
|
||||
// Cancel stream tokens FIRST so stream handlers exit immediately.
|
||||
// If we TLS-shutdown first, stream handlers are stuck sending to dead channels
|
||||
// for up to 2 seconds while the shutdown times out on a dead connection.
|
||||
connection_token.cancel();
|
||||
stun_handle.abort();
|
||||
tunnel_writer_handle.abort();
|
||||
for (_, h) in port_listeners.drain() {
|
||||
h.abort();
|
||||
}
|
||||
|
||||
// Graceful TLS shutdown: send close_notify so the hub sees a clean disconnect.
|
||||
// Stream handlers are already cancelled, so no new data is being produced.
|
||||
let mut tls_stream = tunnel_io.into_inner();
|
||||
let _ = tokio::time::timeout(
|
||||
Duration::from_secs(2),
|
||||
tls_stream.shutdown(),
|
||||
).await;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
@@ -544,13 +617,15 @@ async fn connect_to_hub_and_run(
|
||||
fn apply_port_config(
|
||||
new_ports: &[u16],
|
||||
port_listeners: &mut HashMap<u16, JoinHandle<()>>,
|
||||
tunnel_ctrl_tx: &mpsc::Sender<Vec<u8>>,
|
||||
tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
|
||||
tunnel_ctrl_tx: &mpsc::Sender<Bytes>,
|
||||
tunnel_data_tx: &mpsc::Sender<Bytes>,
|
||||
tunnel_sustained_tx: &mpsc::Sender<Bytes>,
|
||||
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
|
||||
active_streams: &Arc<AtomicU32>,
|
||||
next_stream_id: &Arc<AtomicU32>,
|
||||
edge_id: &str,
|
||||
connection_token: &CancellationToken,
|
||||
bind_address: &str,
|
||||
) {
|
||||
let new_set: std::collections::HashSet<u16> = new_ports.iter().copied().collect();
|
||||
let old_set: std::collections::HashSet<u16> = port_listeners.keys().copied().collect();
|
||||
@@ -567,14 +642,16 @@ fn apply_port_config(
|
||||
for &port in new_set.difference(&old_set) {
|
||||
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
|
||||
let tunnel_data_tx = tunnel_data_tx.clone();
|
||||
let tunnel_sustained_tx = tunnel_sustained_tx.clone();
|
||||
let client_writers = client_writers.clone();
|
||||
let active_streams = active_streams.clone();
|
||||
let next_stream_id = next_stream_id.clone();
|
||||
let edge_id = edge_id.to_string();
|
||||
let port_token = connection_token.child_token();
|
||||
|
||||
let bind_addr = bind_address.to_string();
|
||||
let handle = tokio::spawn(async move {
|
||||
let listener = match TcpListener::bind(("0.0.0.0", port)).await {
|
||||
let listener = match TcpListener::bind((bind_addr.as_str(), port)).await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
log::error!("Failed to bind port {}: {}", port, e);
|
||||
@@ -588,9 +665,19 @@ fn apply_port_config(
|
||||
accept_result = listener.accept() => {
|
||||
match accept_result {
|
||||
Ok((client_stream, client_addr)) => {
|
||||
// TCP keepalive detects dead clients that disappear without FIN.
|
||||
// Without this, zombie streams accumulate and never get cleaned up.
|
||||
let _ = client_stream.set_nodelay(true);
|
||||
let ka = socket2::TcpKeepalive::new()
|
||||
.with_time(Duration::from_secs(60));
|
||||
#[cfg(target_os = "linux")]
|
||||
let ka = ka.with_interval(Duration::from_secs(60));
|
||||
let _ = socket2::SockRef::from(&client_stream).set_tcp_keepalive(&ka);
|
||||
|
||||
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
|
||||
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
|
||||
let tunnel_data_tx = tunnel_data_tx.clone();
|
||||
let tunnel_sustained_tx = tunnel_sustained_tx.clone();
|
||||
let client_writers = client_writers.clone();
|
||||
let active_streams = active_streams.clone();
|
||||
let edge_id = edge_id.clone();
|
||||
@@ -607,11 +694,24 @@ fn apply_port_config(
|
||||
&edge_id,
|
||||
tunnel_ctrl_tx,
|
||||
tunnel_data_tx,
|
||||
tunnel_sustained_tx,
|
||||
client_writers,
|
||||
client_token,
|
||||
Arc::clone(&active_streams),
|
||||
)
|
||||
.await;
|
||||
active_streams.fetch_sub(1, Ordering::Relaxed);
|
||||
// Saturating decrement: prevent underflow when
|
||||
// edge_main_loop's store(0) races with task cleanup.
|
||||
loop {
|
||||
let current = active_streams.load(Ordering::Relaxed);
|
||||
if current == 0 { break; }
|
||||
if active_streams.compare_exchange_weak(
|
||||
current, current - 1,
|
||||
Ordering::Relaxed, Ordering::Relaxed,
|
||||
).is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -636,10 +736,12 @@ async fn handle_client_connection(
|
||||
stream_id: u32,
|
||||
dest_port: u16,
|
||||
edge_id: &str,
|
||||
tunnel_ctrl_tx: mpsc::Sender<Vec<u8>>,
|
||||
tunnel_data_tx: mpsc::Sender<Vec<u8>>,
|
||||
tunnel_ctrl_tx: mpsc::Sender<Bytes>,
|
||||
tunnel_data_tx: mpsc::Sender<Bytes>,
|
||||
tunnel_sustained_tx: mpsc::Sender<Bytes>,
|
||||
client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
|
||||
client_token: CancellationToken,
|
||||
active_streams: Arc<AtomicU32>,
|
||||
) {
|
||||
let client_ip = client_addr.ip().to_string();
|
||||
let client_port = client_addr.port();
|
||||
@@ -650,13 +752,24 @@ async fn handle_client_connection(
|
||||
// Send OPEN frame with PROXY v1 header via control channel
|
||||
let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port);
|
||||
let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes());
|
||||
if tunnel_ctrl_tx.send(open_frame).await.is_err() {
|
||||
let send_ok = tokio::select! {
|
||||
result = tunnel_ctrl_tx.send(open_frame) => result.is_ok(),
|
||||
_ = client_token.cancelled() => false,
|
||||
};
|
||||
if !send_ok {
|
||||
return;
|
||||
}
|
||||
|
||||
// Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
|
||||
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
|
||||
// Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
|
||||
// bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
|
||||
// streams due to channel overflow — backpressure slows streams, never kills them.
|
||||
let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Bytes>();
|
||||
// Adaptive initial window: scale with current stream count to keep total in-flight
|
||||
// data within the 200MB budget. Prevents burst flooding when many streams open.
|
||||
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||
active_streams.load(Ordering::Relaxed),
|
||||
);
|
||||
let send_window = Arc::new(AtomicU32::new(initial_window));
|
||||
let window_notify = Arc::new(Notify::new());
|
||||
{
|
||||
let mut writers = client_writers.lock().await;
|
||||
@@ -673,6 +786,7 @@ async fn handle_client_connection(
|
||||
// After writing to client TCP, send WINDOW_UPDATE to hub so it can send more
|
||||
let hub_to_client_token = client_token.clone();
|
||||
let wu_tx = tunnel_ctrl_tx.clone();
|
||||
let active_streams_h2c = Arc::clone(&active_streams);
|
||||
let mut hub_to_client = tokio::spawn(async move {
|
||||
let mut consumed_since_update: u32 = 0;
|
||||
loop {
|
||||
@@ -684,14 +798,28 @@ async fn handle_client_connection(
|
||||
if client_write.write_all(&data).await.is_err() {
|
||||
break;
|
||||
}
|
||||
// Track consumption for flow control
|
||||
// Track consumption for adaptive flow control.
|
||||
// The increment is capped to the adaptive window so the sender's
|
||||
// effective window shrinks to match current demand (fewer streams
|
||||
// = larger window, more streams = smaller window per stream).
|
||||
consumed_since_update += len;
|
||||
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
|
||||
if wu_tx.try_send(frame).is_ok() {
|
||||
consumed_since_update = 0;
|
||||
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||
active_streams_h2c.load(Ordering::Relaxed),
|
||||
);
|
||||
let threshold = adaptive_window / 2;
|
||||
if consumed_since_update >= threshold {
|
||||
let increment = consumed_since_update.min(adaptive_window);
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, increment);
|
||||
// Use send().await for guaranteed delivery — dropping WINDOW_UPDATEs
|
||||
// causes permanent flow stalls. Safe: runs in per-stream task, not main loop.
|
||||
tokio::select! {
|
||||
result = wu_tx.send(frame) => {
|
||||
if result.is_ok() {
|
||||
consumed_since_update -= increment;
|
||||
}
|
||||
}
|
||||
_ = hub_to_client_token.cancelled() => break,
|
||||
}
|
||||
// If try_send fails, keep accumulating — retry on next threshold
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
@@ -703,20 +831,32 @@ async fn handle_client_connection(
|
||||
// Send final window update for any remaining consumed bytes
|
||||
if consumed_since_update > 0 {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
|
||||
let _ = wu_tx.try_send(frame);
|
||||
tokio::select! {
|
||||
_ = wu_tx.send(frame) => {}
|
||||
_ = hub_to_client_token.cancelled() => {}
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
});
|
||||
|
||||
// Task: client -> hub (upload direction) with per-stream flow control
|
||||
let mut buf = vec![0u8; 32768];
|
||||
// Task: client -> hub (upload direction) with per-stream flow control.
|
||||
// Zero-copy: read payload directly after the header, then prepend header.
|
||||
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
|
||||
let mut stream_bytes_sent: u64 = 0;
|
||||
let stream_start = tokio::time::Instant::now();
|
||||
let mut is_sustained = false;
|
||||
loop {
|
||||
// Wait for send window to have capacity (with stall timeout)
|
||||
// Wait for send window to have capacity (with stall timeout).
|
||||
// Safe pattern: register notified BEFORE checking the condition
|
||||
// to avoid missing a notify_one that fires between load and select.
|
||||
loop {
|
||||
let notified = window_notify.notified();
|
||||
tokio::pin!(notified);
|
||||
notified.as_mut().enable();
|
||||
let w = send_window.load(Ordering::Acquire);
|
||||
if w > 0 { break; }
|
||||
tokio::select! {
|
||||
_ = window_notify.notified() => continue,
|
||||
_ = notified => continue,
|
||||
_ = client_token.cancelled() => break,
|
||||
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
||||
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
|
||||
@@ -726,21 +866,43 @@ async fn handle_client_connection(
|
||||
}
|
||||
if client_token.is_cancelled() { break; }
|
||||
|
||||
// Limit read size to available window
|
||||
// Limit read size to available window.
|
||||
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
|
||||
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
|
||||
// which would be falsely interpreted as EOF.
|
||||
let w = send_window.load(Ordering::Acquire) as usize;
|
||||
let max_read = w.min(buf.len());
|
||||
if w == 0 {
|
||||
log::warn!("Stream {} upload: window still 0 after stall timeout, closing", stream_id);
|
||||
break;
|
||||
}
|
||||
let max_read = w.min(32768);
|
||||
|
||||
tokio::select! {
|
||||
read_result = client_read.read(&mut buf[..max_read]) => {
|
||||
read_result = client_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
|
||||
match read_result {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
send_window.fetch_sub(n as u32, Ordering::Release);
|
||||
let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]);
|
||||
if tunnel_data_tx.send(data_frame).await.is_err() {
|
||||
log::warn!("Stream {} data channel closed, closing", stream_id);
|
||||
break;
|
||||
encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
|
||||
let data_frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
|
||||
// Sustained classification: >2.5 MB/s for >10 seconds
|
||||
stream_bytes_sent += n as u64;
|
||||
if !is_sustained {
|
||||
let elapsed = stream_start.elapsed().as_secs();
|
||||
if elapsed >= remoteingress_protocol::SUSTAINED_MIN_DURATION_SECS
|
||||
&& stream_bytes_sent / elapsed >= remoteingress_protocol::SUSTAINED_THRESHOLD_BPS
|
||||
{
|
||||
is_sustained = true;
|
||||
log::debug!("Stream {} classified as sustained (upload, {} bytes in {}s)",
|
||||
stream_id, stream_bytes_sent, elapsed);
|
||||
}
|
||||
}
|
||||
let tx = if is_sustained { &tunnel_sustained_tx } else { &tunnel_data_tx };
|
||||
let sent = tokio::select! {
|
||||
result = tx.send(data_frame) => result.is_ok(),
|
||||
_ = client_token.cancelled() => false,
|
||||
};
|
||||
if !sent { break; }
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@@ -761,9 +923,14 @@ async fn handle_client_connection(
|
||||
).await;
|
||||
|
||||
// NOW send CLOSE — the response has been fully delivered (or timed out).
|
||||
// select! with cancellation guard prevents indefinite blocking if tunnel dies.
|
||||
if !client_token.is_cancelled() {
|
||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]);
|
||||
let _ = tunnel_data_tx.send(close_frame).await;
|
||||
let tx = if is_sustained { &tunnel_sustained_tx } else { &tunnel_data_tx };
|
||||
tokio::select! {
|
||||
_ = tx.send(close_frame) => {}
|
||||
_ = client_token.cancelled() => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
@@ -803,6 +970,7 @@ mod tests {
|
||||
hub_port: 9999,
|
||||
edge_id: "e1".to_string(),
|
||||
secret: "sec".to_string(),
|
||||
bind_address: None,
|
||||
};
|
||||
let json = serde_json::to_string(&config).unwrap();
|
||||
let back: EdgeConfig = serde_json::from_str(&json).unwrap();
|
||||
@@ -874,9 +1042,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_edge_event_tunnel_disconnected() {
|
||||
let event = EdgeEvent::TunnelDisconnected;
|
||||
let event = EdgeEvent::TunnelDisconnected { reason: "hub_eof".to_string() };
|
||||
let json = serde_json::to_value(&event).unwrap();
|
||||
assert_eq!(json["type"], "tunnelDisconnected");
|
||||
assert_eq!(json["reason"], "hub_eof");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -918,6 +1087,7 @@ mod tests {
|
||||
hub_port: 8443,
|
||||
edge_id: "test-edge".to_string(),
|
||||
secret: "test-secret".to_string(),
|
||||
bind_address: None,
|
||||
});
|
||||
let status = edge.get_status().await;
|
||||
assert!(!status.running);
|
||||
@@ -934,6 +1104,7 @@ mod tests {
|
||||
hub_port: 8443,
|
||||
edge_id: "e".to_string(),
|
||||
secret: "s".to_string(),
|
||||
bind_address: None,
|
||||
});
|
||||
let rx1 = edge.take_event_rx().await;
|
||||
assert!(rx1.is_some());
|
||||
@@ -948,6 +1119,7 @@ mod tests {
|
||||
hub_port: 8443,
|
||||
edge_id: "e".to_string(),
|
||||
secret: "s".to_string(),
|
||||
bind_address: None,
|
||||
});
|
||||
edge.stop().await; // should not panic
|
||||
let status = edge.get_status().await;
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore};
|
||||
use tokio::time::{interval, sleep_until, Instant};
|
||||
@@ -10,12 +10,24 @@ use tokio_rustls::TlsAcceptor;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use bytes::Bytes;
|
||||
use remoteingress_protocol::*;
|
||||
|
||||
type HubTlsStream = tokio_rustls::server::TlsStream<TcpStream>;
|
||||
|
||||
/// Result of processing a frame.
|
||||
#[allow(dead_code)]
|
||||
enum FrameAction {
|
||||
Continue,
|
||||
Disconnect(String),
|
||||
}
|
||||
|
||||
/// Per-stream state tracked in the hub's stream map.
|
||||
struct HubStreamState {
|
||||
/// Channel to deliver FRAME_DATA payloads to the upstream writer task.
|
||||
data_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Unbounded channel to deliver FRAME_DATA payloads to the upstream writer task.
|
||||
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
|
||||
/// A bounded channel would kill streams instead of applying backpressure.
|
||||
data_tx: mpsc::UnboundedSender<Bytes>,
|
||||
/// Cancellation token for this stream.
|
||||
cancel_token: CancellationToken,
|
||||
/// Send window for FRAME_DATA_BACK (download direction).
|
||||
@@ -92,7 +104,7 @@ pub enum HubEvent {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
EdgeConnected { edge_id: String, peer_addr: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
EdgeDisconnected { edge_id: String },
|
||||
EdgeDisconnected { edge_id: String, reason: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
StreamOpened { edge_id: String, stream_id: u32 },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -123,9 +135,9 @@ pub struct TunnelHub {
|
||||
struct ConnectedEdgeInfo {
|
||||
connected_at: u64,
|
||||
peer_addr: String,
|
||||
active_streams: Arc<Mutex<HashMap<u32, HubStreamState>>>,
|
||||
edge_stream_count: Arc<AtomicU32>,
|
||||
config_tx: mpsc::Sender<EdgeConfigUpdate>,
|
||||
#[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed
|
||||
/// Used to cancel the old connection when an edge reconnects.
|
||||
cancel_token: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -189,11 +201,10 @@ impl TunnelHub {
|
||||
|
||||
let mut connected = Vec::new();
|
||||
for (id, info) in edges.iter() {
|
||||
let streams = info.active_streams.lock().await;
|
||||
connected.push(ConnectedEdgeStatus {
|
||||
edge_id: id.clone(),
|
||||
connected_at: info.connected_at,
|
||||
active_streams: streams.len(),
|
||||
active_streams: info.edge_stream_count.load(Ordering::Relaxed) as usize,
|
||||
peer_addr: info.peer_addr.clone(),
|
||||
});
|
||||
}
|
||||
@@ -287,6 +298,320 @@ impl Drop for TunnelHub {
|
||||
/// Maximum concurrent streams per edge connection.
|
||||
const MAX_STREAMS_PER_EDGE: usize = 1024;
|
||||
|
||||
/// Process a single frame received from the edge side of the tunnel.
|
||||
/// Handles FRAME_OPEN, FRAME_DATA, FRAME_WINDOW_UPDATE, FRAME_CLOSE, and FRAME_PONG.
|
||||
async fn handle_hub_frame(
|
||||
frame: Frame,
|
||||
tunnel_io: &mut remoteingress_protocol::TunnelIo<HubTlsStream>,
|
||||
streams: &mut HashMap<u32, HubStreamState>,
|
||||
stream_semaphore: &Arc<Semaphore>,
|
||||
edge_stream_count: &Arc<AtomicU32>,
|
||||
edge_id: &str,
|
||||
event_tx: &mpsc::Sender<HubEvent>,
|
||||
ctrl_tx: &mpsc::Sender<Bytes>,
|
||||
data_tx: &mpsc::Sender<Bytes>,
|
||||
sustained_tx: &mpsc::Sender<Bytes>,
|
||||
target_host: &str,
|
||||
edge_token: &CancellationToken,
|
||||
cleanup_tx: &mpsc::Sender<u32>,
|
||||
) -> FrameAction {
|
||||
match frame.frame_type {
|
||||
FRAME_OPEN => {
|
||||
// A4: Check stream limit before processing
|
||||
let permit = match stream_semaphore.clone().try_acquire_owned() {
|
||||
Ok(p) => p,
|
||||
Err(_) => {
|
||||
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
|
||||
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
|
||||
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
tunnel_io.queue_ctrl(close_frame);
|
||||
return FrameAction::Continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Payload is PROXY v1 header line
|
||||
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
|
||||
|
||||
// Parse destination port from PROXY header
|
||||
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
|
||||
|
||||
let stream_id = frame.stream_id;
|
||||
let cleanup = cleanup_tx.clone();
|
||||
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
|
||||
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
|
||||
let sustained_writer_tx = sustained_tx.clone(); // sustained: DATA_BACK from elephant flows
|
||||
let target = target_host.to_string();
|
||||
let stream_token = edge_token.child_token();
|
||||
|
||||
let _ = event_tx.try_send(HubEvent::StreamOpened {
|
||||
edge_id: edge_id.to_string(),
|
||||
stream_id,
|
||||
});
|
||||
|
||||
// Create channel for data from edge to this stream
|
||||
let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Bytes>();
|
||||
// Adaptive initial window: scale with current stream count
|
||||
// to keep total in-flight data within the 200MB budget.
|
||||
let initial_window = compute_window_for_stream_count(
|
||||
edge_stream_count.load(Ordering::Relaxed),
|
||||
);
|
||||
let send_window = Arc::new(AtomicU32::new(initial_window));
|
||||
let window_notify = Arc::new(Notify::new());
|
||||
streams.insert(stream_id, HubStreamState {
|
||||
data_tx: stream_data_tx,
|
||||
cancel_token: stream_token.clone(),
|
||||
send_window: Arc::clone(&send_window),
|
||||
window_notify: Arc::clone(&window_notify),
|
||||
});
|
||||
|
||||
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
|
||||
let stream_counter = Arc::clone(edge_stream_count);
|
||||
tokio::spawn(async move {
|
||||
let _permit = permit; // hold semaphore permit until stream completes
|
||||
stream_counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let result = async {
|
||||
// A2: Connect to SmartProxy with timeout
|
||||
let mut upstream = tokio::time::timeout(
|
||||
Duration::from_secs(10),
|
||||
TcpStream::connect((target.as_str(), dest_port)),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
|
||||
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
|
||||
})??;
|
||||
|
||||
upstream.set_nodelay(true)?;
|
||||
upstream.write_all(proxy_header.as_bytes()).await?;
|
||||
|
||||
let (mut up_read, mut up_write) =
|
||||
upstream.into_split();
|
||||
|
||||
// Forward data from edge (via channel) to SmartProxy
|
||||
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
|
||||
let writer_token = stream_token.clone();
|
||||
let wub_tx = writer_tx.clone();
|
||||
let stream_counter_w = Arc::clone(&stream_counter);
|
||||
let writer_for_edge_data = tokio::spawn(async move {
|
||||
let mut consumed_since_update: u32 = 0;
|
||||
loop {
|
||||
tokio::select! {
|
||||
data = stream_data_rx.recv() => {
|
||||
match data {
|
||||
Some(data) => {
|
||||
let len = data.len() as u32;
|
||||
// Check cancellation alongside the write so we respond
|
||||
// promptly to FRAME_CLOSE instead of blocking up to 60s.
|
||||
let write_result = tokio::select! {
|
||||
r = tokio::time::timeout(
|
||||
Duration::from_secs(60),
|
||||
up_write.write_all(&data),
|
||||
) => r,
|
||||
_ = writer_token.cancelled() => break,
|
||||
};
|
||||
match write_result {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(_)) => break,
|
||||
Err(_) => {
|
||||
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Track consumption for adaptive flow control.
|
||||
// Increment capped to adaptive window to limit per-stream in-flight data.
|
||||
consumed_since_update += len;
|
||||
let adaptive_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||
stream_counter_w.load(Ordering::Relaxed),
|
||||
);
|
||||
let threshold = adaptive_window / 2;
|
||||
if consumed_since_update >= threshold {
|
||||
let increment = consumed_since_update.min(adaptive_window);
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, increment);
|
||||
// Use send().await for guaranteed delivery — dropping WINDOW_UPDATEs
|
||||
// causes permanent flow stalls. Safe: runs in per-stream task, not main loop.
|
||||
tokio::select! {
|
||||
result = wub_tx.send(frame) => {
|
||||
if result.is_ok() {
|
||||
consumed_since_update -= increment;
|
||||
}
|
||||
}
|
||||
_ = writer_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = writer_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
// Send final window update for remaining consumed bytes
|
||||
if consumed_since_update > 0 {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
|
||||
tokio::select! {
|
||||
_ = wub_tx.send(frame) => {}
|
||||
_ = writer_token.cancelled() => {}
|
||||
}
|
||||
}
|
||||
let _ = up_write.shutdown().await;
|
||||
});
|
||||
|
||||
// Forward data from SmartProxy back to edge via writer channel
|
||||
// with per-stream flow control (check send_window before reading).
|
||||
// Zero-copy: read payload directly after the header, then prepend header.
|
||||
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
|
||||
let mut dl_bytes_sent: u64 = 0;
|
||||
let dl_start = tokio::time::Instant::now();
|
||||
let mut is_sustained = false;
|
||||
loop {
|
||||
// Wait for send window to have capacity (with stall timeout).
|
||||
// Safe pattern: register notified BEFORE checking the condition
|
||||
// to avoid missing a notify_one that fires between load and select.
|
||||
loop {
|
||||
let notified = window_notify.notified();
|
||||
tokio::pin!(notified);
|
||||
notified.as_mut().enable();
|
||||
let w = send_window.load(Ordering::Acquire);
|
||||
if w > 0 { break; }
|
||||
tokio::select! {
|
||||
_ = notified => continue,
|
||||
_ = stream_token.cancelled() => break,
|
||||
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
||||
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if stream_token.is_cancelled() { break; }
|
||||
|
||||
// Limit read size to available window.
|
||||
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
|
||||
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
|
||||
// which would be falsely interpreted as EOF.
|
||||
let w = send_window.load(Ordering::Acquire) as usize;
|
||||
if w == 0 {
|
||||
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
|
||||
break;
|
||||
}
|
||||
let max_read = w.min(32768);
|
||||
|
||||
tokio::select! {
|
||||
read_result = up_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
|
||||
match read_result {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
send_window.fetch_sub(n as u32, Ordering::Release);
|
||||
encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
|
||||
let frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
|
||||
// Sustained classification: >2.5 MB/s for >10 seconds
|
||||
dl_bytes_sent += n as u64;
|
||||
if !is_sustained {
|
||||
let elapsed = dl_start.elapsed().as_secs();
|
||||
if elapsed >= remoteingress_protocol::SUSTAINED_MIN_DURATION_SECS
|
||||
&& dl_bytes_sent / elapsed >= remoteingress_protocol::SUSTAINED_THRESHOLD_BPS
|
||||
{
|
||||
is_sustained = true;
|
||||
log::debug!("Stream {} classified as sustained (download, {} bytes in {}s)",
|
||||
stream_id, dl_bytes_sent, elapsed);
|
||||
}
|
||||
}
|
||||
let tx = if is_sustained { &sustained_writer_tx } else { &data_writer_tx };
|
||||
let sent = tokio::select! {
|
||||
result = tx.send(frame) => result.is_ok(),
|
||||
_ = stream_token.cancelled() => false,
|
||||
};
|
||||
if !sent { break; }
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
_ = stream_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Send CLOSE_BACK via same channel as DATA_BACK (must arrive AFTER last DATA_BACK).
|
||||
// select! with cancellation guard prevents indefinite blocking if tunnel dies.
|
||||
if !stream_token.is_cancelled() {
|
||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
let tx = if is_sustained { &sustained_writer_tx } else { &data_writer_tx };
|
||||
tokio::select! {
|
||||
_ = tx.send(close_frame) => {}
|
||||
_ = stream_token.cancelled() => {}
|
||||
}
|
||||
}
|
||||
|
||||
writer_for_edge_data.abort();
|
||||
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
|
||||
}
|
||||
.await;
|
||||
|
||||
if let Err(e) = result {
|
||||
log::error!("Stream {} error: {}", stream_id, e);
|
||||
// Send CLOSE_BACK on error (must arrive after any DATA_BACK).
|
||||
// Error path: is_sustained not available here, use data channel (safe —
|
||||
// if error occurs before classification, no sustained frames were sent).
|
||||
if !stream_token.is_cancelled() {
|
||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
tokio::select! {
|
||||
_ = data_writer_tx.send(close_frame) => {}
|
||||
_ = stream_token.cancelled() => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Signal main loop to remove stream from the map.
|
||||
// Cancellation guard prevents indefinite blocking if cleanup channel is full.
|
||||
tokio::select! {
|
||||
_ = cleanup.send(stream_id) => {}
|
||||
_ = stream_token.cancelled() => {}
|
||||
}
|
||||
stream_counter.fetch_sub(1, Ordering::Relaxed);
|
||||
});
|
||||
}
|
||||
FRAME_DATA => {
|
||||
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
|
||||
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
|
||||
// fails if the receiver is dropped (stream handler already exited).
|
||||
if let Some(state) = streams.get(&frame.stream_id) {
|
||||
if state.data_tx.send(frame.payload).is_err() {
|
||||
// Receiver dropped — stream handler already exited, clean up
|
||||
streams.remove(&frame.stream_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_WINDOW_UPDATE => {
|
||||
// Edge consumed data — increase our send window for this stream
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
if let Some(state) = streams.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_CLOSE => {
|
||||
if let Some(state) = streams.remove(&frame.stream_id) {
|
||||
state.cancel_token.cancel();
|
||||
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
||||
edge_id: edge_id.to_string(),
|
||||
stream_id: frame.stream_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
FRAME_PONG => {
|
||||
log::debug!("Received PONG from edge {}", edge_id);
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
|
||||
}
|
||||
}
|
||||
FrameAction::Continue
|
||||
}
|
||||
|
||||
/// Handle a single edge connection: authenticate, then enter frame loop.
|
||||
async fn handle_edge_connection(
|
||||
stream: TcpStream,
|
||||
@@ -300,13 +625,31 @@ async fn handle_edge_connection(
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
|
||||
stream.set_nodelay(true)?;
|
||||
let tls_stream = acceptor.accept(stream).await?;
|
||||
let (read_half, mut write_half) = tokio::io::split(tls_stream);
|
||||
let mut buf_reader = BufReader::new(read_half);
|
||||
// TCP keepalive detects silent network failures (NAT timeout, path change)
|
||||
// faster than the 45s application-level liveness timeout.
|
||||
let ka = socket2::TcpKeepalive::new()
|
||||
.with_time(Duration::from_secs(30));
|
||||
#[cfg(target_os = "linux")]
|
||||
let ka = ka.with_interval(Duration::from_secs(10));
|
||||
let _ = socket2::SockRef::from(&stream).set_tcp_keepalive(&ka);
|
||||
let mut tls_stream = acceptor.accept(stream).await?;
|
||||
|
||||
// Read auth line: "EDGE <edgeId> <secret>\n"
|
||||
let mut auth_line = String::new();
|
||||
buf_reader.read_line(&mut auth_line).await?;
|
||||
// Byte-by-byte auth line reading (no BufReader).
|
||||
// Auth line: "EDGE <edgeId> <secret>\n"
|
||||
let mut auth_buf = Vec::with_capacity(512);
|
||||
loop {
|
||||
let mut byte = [0u8; 1];
|
||||
tls_stream.read_exact(&mut byte).await?;
|
||||
if byte[0] == b'\n' {
|
||||
break;
|
||||
}
|
||||
auth_buf.push(byte[0]);
|
||||
if auth_buf.len() > 4096 {
|
||||
return Err("auth line too long".into());
|
||||
}
|
||||
}
|
||||
let auth_line = String::from_utf8(auth_buf)
|
||||
.map_err(|_| "auth line not valid UTF-8")?;
|
||||
let auth_line = auth_line.trim();
|
||||
|
||||
let parts: Vec<&str> = auth_line.splitn(3, ' ').collect();
|
||||
@@ -346,11 +689,15 @@ async fn handle_edge_connection(
|
||||
};
|
||||
let mut handshake_json = serde_json::to_string(&handshake)?;
|
||||
handshake_json.push('\n');
|
||||
write_half.write_all(handshake_json.as_bytes()).await?;
|
||||
tls_stream.write_all(handshake_json.as_bytes()).await?;
|
||||
tls_stream.flush().await?;
|
||||
|
||||
// Track this edge
|
||||
let streams: Arc<Mutex<HashMap<u32, HubStreamState>>> =
|
||||
Arc::new(Mutex::new(HashMap::new()));
|
||||
let mut streams: HashMap<u32, HubStreamState> = HashMap::new();
|
||||
// Per-edge active stream counter for adaptive flow control
|
||||
let edge_stream_count = Arc::new(AtomicU32::new(0));
|
||||
// Cleanup channel: spawned stream tasks send stream_id here when done
|
||||
let (cleanup_tx, mut cleanup_rx) = mpsc::channel::<u32>(256);
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
@@ -361,57 +708,33 @@ async fn handle_edge_connection(
|
||||
|
||||
{
|
||||
let mut edges = connected.lock().await;
|
||||
// If this edge already has an active connection (reconnect scenario),
|
||||
// cancel the old connection so it shuts down immediately instead of
|
||||
// lingering until TCP keepalive detects the dead socket.
|
||||
if let Some(old) = edges.remove(&edge_id) {
|
||||
log::info!("Edge {} reconnected, cancelling old connection", edge_id);
|
||||
old.cancel_token.cancel();
|
||||
}
|
||||
edges.insert(
|
||||
edge_id.clone(),
|
||||
ConnectedEdgeInfo {
|
||||
connected_at: now,
|
||||
peer_addr,
|
||||
active_streams: streams.clone(),
|
||||
edge_stream_count: edge_stream_count.clone(),
|
||||
config_tx,
|
||||
cancel_token: edge_token.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// QoS dual-channel tunnel writer: control frames (PING/PONG/WINDOW_UPDATE/CLOSE)
|
||||
// have priority over data frames (DATA_BACK). This prevents PING starvation under load.
|
||||
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
|
||||
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
|
||||
// Legacy alias for code that sends both control and data (will be migrated)
|
||||
let frame_writer_tx = ctrl_tx.clone();
|
||||
let writer_token = edge_token.clone();
|
||||
let writer_handle = tokio::spawn(async move {
|
||||
// BufWriter coalesces small writes (frame headers, control frames) into fewer
|
||||
// TLS records and syscalls. Flushed after each frame to avoid holding data.
|
||||
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased; // control frames always take priority over data
|
||||
ctrl = ctrl_rx.recv() => {
|
||||
match ctrl {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { break; }
|
||||
if writer.flush().await.is_err() { break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
data = data_rx.recv() => {
|
||||
match data {
|
||||
Some(frame_data) => {
|
||||
if writer.write_all(&frame_data).await.is_err() { break; }
|
||||
if writer.flush().await.is_err() { break; }
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = writer_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
// QoS dual-channel: ctrl frames have priority over data frames.
|
||||
// Stream handlers send through these channels -> TunnelIo drains them.
|
||||
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Bytes>(512);
|
||||
let (data_tx, mut data_rx) = mpsc::channel::<Bytes>(4096);
|
||||
let (sustained_tx, mut sustained_rx) = mpsc::channel::<Bytes>(4096);
|
||||
|
||||
// Spawn task to forward config updates as FRAME_CONFIG frames
|
||||
let config_writer_tx = frame_writer_tx.clone();
|
||||
let config_writer_tx = ctrl_tx.clone();
|
||||
let config_edge_id = edge_id.clone();
|
||||
let config_token = edge_token.clone();
|
||||
let config_handle = tokio::spawn(async move {
|
||||
@@ -448,304 +771,119 @@ async fn handle_edge_connection(
|
||||
let mut last_activity = Instant::now();
|
||||
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
|
||||
|
||||
// Frame reading loop
|
||||
let mut frame_reader = FrameReader::new(buf_reader);
|
||||
// Single-owner I/O engine — no tokio::io::split, no mutex
|
||||
let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
frame_result = frame_reader.next_frame() => {
|
||||
match frame_result {
|
||||
Ok(Some(frame)) => {
|
||||
// Reset liveness on any received frame
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
|
||||
match frame.frame_type {
|
||||
FRAME_OPEN => {
|
||||
// A4: Check stream limit before processing
|
||||
let permit = match stream_semaphore.clone().try_acquire_owned() {
|
||||
Ok(p) => p,
|
||||
Err(_) => {
|
||||
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
|
||||
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
|
||||
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
let _ = frame_writer_tx.try_send(close_frame);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// Assigned in every break path of the hub_loop before use at the end.
|
||||
#[allow(unused_assignments)]
|
||||
let mut disconnect_reason = String::new();
|
||||
|
||||
// Payload is PROXY v1 header line
|
||||
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
|
||||
|
||||
// Parse destination port from PROXY header
|
||||
let dest_port = parse_dest_port_from_proxy(&proxy_header).unwrap_or(443);
|
||||
|
||||
let stream_id = frame.stream_id;
|
||||
let edge_id_clone = edge_id.clone();
|
||||
let event_tx_clone = event_tx.clone();
|
||||
let streams_clone = streams.clone();
|
||||
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
|
||||
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
|
||||
let target = target_host.clone();
|
||||
let stream_token = edge_token.child_token();
|
||||
|
||||
let _ = event_tx.try_send(HubEvent::StreamOpened {
|
||||
edge_id: edge_id.clone(),
|
||||
stream_id,
|
||||
});
|
||||
|
||||
// Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
|
||||
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
|
||||
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
|
||||
let window_notify = Arc::new(Notify::new());
|
||||
{
|
||||
let mut s = streams.lock().await;
|
||||
s.insert(stream_id, HubStreamState {
|
||||
data_tx,
|
||||
cancel_token: stream_token.clone(),
|
||||
send_window: Arc::clone(&send_window),
|
||||
window_notify: Arc::clone(&window_notify),
|
||||
});
|
||||
}
|
||||
|
||||
// Spawn task: connect to SmartProxy, send PROXY header, pipe data
|
||||
tokio::spawn(async move {
|
||||
let _permit = permit; // hold semaphore permit until stream completes
|
||||
|
||||
let result = async {
|
||||
// A2: Connect to SmartProxy with timeout
|
||||
let mut upstream = tokio::time::timeout(
|
||||
Duration::from_secs(10),
|
||||
TcpStream::connect((target.as_str(), dest_port)),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
|
||||
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
|
||||
})??;
|
||||
|
||||
upstream.set_nodelay(true)?;
|
||||
upstream.write_all(proxy_header.as_bytes()).await?;
|
||||
|
||||
let (mut up_read, mut up_write) =
|
||||
upstream.into_split();
|
||||
|
||||
// Forward data from edge (via channel) to SmartProxy
|
||||
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
|
||||
let writer_token = stream_token.clone();
|
||||
let wub_tx = writer_tx.clone();
|
||||
let writer_for_edge_data = tokio::spawn(async move {
|
||||
let mut consumed_since_update: u32 = 0;
|
||||
loop {
|
||||
tokio::select! {
|
||||
data = data_rx.recv() => {
|
||||
match data {
|
||||
Some(data) => {
|
||||
let len = data.len() as u32;
|
||||
// Check cancellation alongside the write so we respond
|
||||
// promptly to FRAME_CLOSE instead of blocking up to 60s.
|
||||
let write_result = tokio::select! {
|
||||
r = tokio::time::timeout(
|
||||
Duration::from_secs(60),
|
||||
up_write.write_all(&data),
|
||||
) => r,
|
||||
_ = writer_token.cancelled() => break,
|
||||
};
|
||||
match write_result {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(_)) => break,
|
||||
Err(_) => {
|
||||
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Track consumption for flow control
|
||||
consumed_since_update += len;
|
||||
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
|
||||
if wub_tx.try_send(frame).is_ok() {
|
||||
consumed_since_update = 0;
|
||||
}
|
||||
// If try_send fails, keep accumulating — retry on next threshold
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = writer_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
// Send final window update for remaining consumed bytes
|
||||
if consumed_since_update > 0 {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
|
||||
let _ = wub_tx.try_send(frame);
|
||||
}
|
||||
let _ = up_write.shutdown().await;
|
||||
});
|
||||
|
||||
// Forward data from SmartProxy back to edge via writer channel
|
||||
// with per-stream flow control (check send_window before reading)
|
||||
let mut buf = vec![0u8; 32768];
|
||||
loop {
|
||||
// Wait for send window to have capacity (with stall timeout)
|
||||
loop {
|
||||
let w = send_window.load(Ordering::Acquire);
|
||||
if w > 0 { break; }
|
||||
tokio::select! {
|
||||
_ = window_notify.notified() => continue,
|
||||
_ = stream_token.cancelled() => break,
|
||||
_ = tokio::time::sleep(Duration::from_secs(120)) => {
|
||||
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if stream_token.is_cancelled() { break; }
|
||||
|
||||
// Limit read size to available window
|
||||
let w = send_window.load(Ordering::Acquire) as usize;
|
||||
let max_read = w.min(buf.len());
|
||||
|
||||
tokio::select! {
|
||||
read_result = up_read.read(&mut buf[..max_read]) => {
|
||||
match read_result {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
send_window.fetch_sub(n as u32, Ordering::Release);
|
||||
let frame =
|
||||
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
|
||||
if data_writer_tx.send(frame).await.is_err() {
|
||||
log::warn!("Stream {} data channel closed, closing", stream_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
_ = stream_token.cancelled() => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
|
||||
// Use send().await to guarantee delivery (try_send silently drops if full).
|
||||
if !stream_token.is_cancelled() {
|
||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
let _ = data_writer_tx.send(close_frame).await;
|
||||
}
|
||||
|
||||
writer_for_edge_data.abort();
|
||||
Ok::<(), Box<dyn std::error::Error + Send + Sync>>(())
|
||||
}
|
||||
.await;
|
||||
|
||||
if let Err(e) = result {
|
||||
log::error!("Stream {} error: {}", stream_id, e);
|
||||
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
|
||||
// Use send().await to guarantee delivery.
|
||||
if !stream_token.is_cancelled() {
|
||||
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
|
||||
let _ = data_writer_tx.send(close_frame).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up stream (guard against duplicate if FRAME_CLOSE already removed it)
|
||||
let was_present = {
|
||||
let mut s = streams_clone.lock().await;
|
||||
s.remove(&stream_id).is_some()
|
||||
};
|
||||
if was_present {
|
||||
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
|
||||
edge_id: edge_id_clone,
|
||||
stream_id,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
FRAME_DATA => {
|
||||
// Non-blocking dispatch to per-stream channel.
|
||||
// With flow control, the sender should rarely exceed the channel capacity.
|
||||
let mut s = streams.lock().await;
|
||||
if let Some(state) = s.get(&frame.stream_id) {
|
||||
if state.data_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
|
||||
if let Some(state) = s.remove(&frame.stream_id) {
|
||||
state.cancel_token.cancel();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_WINDOW_UPDATE => {
|
||||
// Edge consumed data — increase our send window for this stream
|
||||
if let Some(increment) = decode_window_update(&frame.payload) {
|
||||
if increment > 0 {
|
||||
let s = streams.lock().await;
|
||||
if let Some(state) = s.get(&frame.stream_id) {
|
||||
let prev = state.send_window.fetch_add(increment, Ordering::Release);
|
||||
if prev + increment > MAX_WINDOW_SIZE {
|
||||
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
|
||||
}
|
||||
state.window_notify.notify_one();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FRAME_CLOSE => {
|
||||
let mut s = streams.lock().await;
|
||||
if let Some(state) = s.remove(&frame.stream_id) {
|
||||
state.cancel_token.cancel();
|
||||
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
||||
edge_id: edge_id.clone(),
|
||||
stream_id: frame.stream_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
FRAME_PONG => {
|
||||
log::debug!("Received PONG from edge {}", edge_id);
|
||||
}
|
||||
_ => {
|
||||
log::warn!("Unexpected frame type {} from edge", frame.frame_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
log::info!("Edge {} disconnected (EOF)", edge_id);
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Edge {} frame error: {}", edge_id, e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
'hub_loop: loop {
|
||||
// Drain completed stream cleanups from spawned tasks
|
||||
while let Ok(stream_id) = cleanup_rx.try_recv() {
|
||||
if streams.remove(&stream_id).is_some() {
|
||||
let _ = event_tx.try_send(HubEvent::StreamClosed {
|
||||
edge_id: edge_id.clone(),
|
||||
stream_id,
|
||||
});
|
||||
}
|
||||
_ = ping_ticker.tick() => {
|
||||
let ping_frame = encode_frame(0, FRAME_PING, &[]);
|
||||
if frame_writer_tx.try_send(ping_frame).is_err() {
|
||||
log::warn!("Failed to send PING to edge {}, writer channel full/closed", edge_id);
|
||||
}
|
||||
|
||||
// Drain any buffered frames
|
||||
loop {
|
||||
let frame = match tunnel_io.try_parse_frame() {
|
||||
Some(Ok(f)) => f,
|
||||
Some(Err(e)) => {
|
||||
log::error!("Edge {} frame error: {}", edge_id, e);
|
||||
disconnect_reason = format!("edge_frame_error: {}", e);
|
||||
break 'hub_loop;
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
if let FrameAction::Disconnect(reason) = handle_hub_frame(
|
||||
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
|
||||
&edge_id, &event_tx, &ctrl_tx, &data_tx, &sustained_tx, &target_host, &edge_token,
|
||||
&cleanup_tx,
|
||||
).await {
|
||||
disconnect_reason = reason;
|
||||
break 'hub_loop;
|
||||
}
|
||||
}
|
||||
|
||||
// Poll I/O: write(ctrl->data), flush, read, channels, timers
|
||||
let event = std::future::poll_fn(|cx| {
|
||||
// Queue PING if ticker fires
|
||||
if ping_ticker.poll_tick(cx).is_ready() {
|
||||
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PING, &[]));
|
||||
}
|
||||
tunnel_io.poll_step(cx, &mut ctrl_rx, &mut data_rx, &mut sustained_rx, &mut liveness_deadline, &edge_token)
|
||||
}).await;
|
||||
|
||||
match event {
|
||||
remoteingress_protocol::TunnelEvent::Frame(frame) => {
|
||||
last_activity = Instant::now();
|
||||
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
|
||||
if let FrameAction::Disconnect(reason) = handle_hub_frame(
|
||||
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
|
||||
&edge_id, &event_tx, &ctrl_tx, &data_tx, &sustained_tx, &target_host, &edge_token,
|
||||
&cleanup_tx,
|
||||
).await {
|
||||
disconnect_reason = reason;
|
||||
break;
|
||||
}
|
||||
log::trace!("Sent PING to edge {}", edge_id);
|
||||
}
|
||||
_ = &mut liveness_deadline => {
|
||||
log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting",
|
||||
edge_id, liveness_timeout_dur.as_secs());
|
||||
remoteingress_protocol::TunnelEvent::Eof => {
|
||||
log::info!("Edge {} disconnected (EOF)", edge_id);
|
||||
disconnect_reason = "edge_eof".to_string();
|
||||
break;
|
||||
}
|
||||
_ = edge_token.cancelled() => {
|
||||
remoteingress_protocol::TunnelEvent::ReadError(e) => {
|
||||
log::error!("Edge {} frame error: {}", edge_id, e);
|
||||
disconnect_reason = format!("edge_frame_error: {}", e);
|
||||
break;
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::WriteError(e) => {
|
||||
log::error!("Tunnel write error to edge {}: {}", edge_id, e);
|
||||
disconnect_reason = format!("tunnel_write_error: {}", e);
|
||||
break;
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::LivenessTimeout => {
|
||||
log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting",
|
||||
edge_id, liveness_timeout_dur.as_secs());
|
||||
disconnect_reason = "liveness_timeout".to_string();
|
||||
break;
|
||||
}
|
||||
remoteingress_protocol::TunnelEvent::Cancelled => {
|
||||
log::info!("Edge {} cancelled by hub", edge_id);
|
||||
disconnect_reason = "cancelled_by_hub".to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup: cancel edge token to propagate to all child tasks
|
||||
// Cancel stream tokens FIRST so stream handlers exit immediately.
|
||||
// If we TLS-shutdown first, stream handlers are stuck sending to dead channels
|
||||
// for up to 2 seconds while the shutdown times out on a dead connection.
|
||||
edge_token.cancel();
|
||||
config_handle.abort();
|
||||
writer_handle.abort();
|
||||
|
||||
// Graceful TLS shutdown: send close_notify so the edge sees a clean disconnect.
|
||||
// Stream handlers are already cancelled, so no new data is being produced.
|
||||
let mut tls_stream = tunnel_io.into_inner();
|
||||
let _ = tokio::time::timeout(
|
||||
Duration::from_secs(2),
|
||||
tls_stream.shutdown(),
|
||||
).await;
|
||||
{
|
||||
let mut edges = connected.lock().await;
|
||||
edges.remove(&edge_id);
|
||||
}
|
||||
let _ = event_tx.try_send(HubEvent::EdgeDisconnected {
|
||||
edge_id: edge_id.clone(),
|
||||
reason: disconnect_reason,
|
||||
});
|
||||
|
||||
Ok(())
|
||||
@@ -968,10 +1106,12 @@ mod tests {
|
||||
fn test_hub_event_edge_disconnected_serialize() {
|
||||
let event = HubEvent::EdgeDisconnected {
|
||||
edge_id: "edge-2".to_string(),
|
||||
reason: "liveness_timeout".to_string(),
|
||||
};
|
||||
let json = serde_json::to_value(&event).unwrap();
|
||||
assert_eq!(json["type"], "edgeDisconnected");
|
||||
assert_eq!(json["edgeId"], "edge-2");
|
||||
assert_eq!(json["reason"], "liveness_timeout");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -4,4 +4,10 @@ version = "2.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1", features = ["io-util"] }
|
||||
tokio = { version = "1", features = ["io-util", "sync", "time"] }
|
||||
tokio-util = "0.7"
|
||||
bytes = "1"
|
||||
log = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["io-util", "macros", "rt"] }
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
use std::collections::VecDeque;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
use bytes::{Bytes, BytesMut, BufMut};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
|
||||
use tokio::time::Instant;
|
||||
|
||||
// Frame type constants
|
||||
pub const FRAME_OPEN: u8 = 0x01;
|
||||
@@ -19,19 +26,36 @@ pub const FRAME_HEADER_SIZE: usize = 9;
|
||||
pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024;
|
||||
|
||||
// Per-stream flow control constants
|
||||
/// Initial per-stream window size (4 MB). Sized for full throughput at high RTT:
|
||||
/// at 100ms RTT, this sustains ~40 MB/s per stream.
|
||||
/// Initial (and maximum) per-stream window size (4 MB).
|
||||
pub const INITIAL_STREAM_WINDOW: u32 = 4 * 1024 * 1024;
|
||||
/// Send WINDOW_UPDATE after consuming this many bytes (half the initial window).
|
||||
pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
|
||||
/// Maximum window size to prevent overflow.
|
||||
pub const MAX_WINDOW_SIZE: u32 = 16 * 1024 * 1024;
|
||||
pub const MAX_WINDOW_SIZE: u32 = 4 * 1024 * 1024;
|
||||
|
||||
// Sustained stream classification constants
|
||||
/// Throughput threshold for sustained classification (2.5 MB/s = 20 Mbit/s).
|
||||
pub const SUSTAINED_THRESHOLD_BPS: u64 = 2_500_000;
|
||||
/// Minimum duration before a stream can be classified as sustained.
|
||||
pub const SUSTAINED_MIN_DURATION_SECS: u64 = 10;
|
||||
/// Fixed window for sustained streams (1 MB — the floor).
|
||||
pub const SUSTAINED_WINDOW: u32 = 1 * 1024 * 1024;
|
||||
/// Maximum bytes written from sustained queue per forced drain (1 MB/s guarantee).
|
||||
pub const SUSTAINED_FORCED_DRAIN_CAP: usize = 1_048_576;
|
||||
|
||||
/// Encode a WINDOW_UPDATE frame for a specific stream.
|
||||
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Vec<u8> {
|
||||
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Bytes {
|
||||
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
|
||||
}
|
||||
|
||||
/// Compute the target per-stream window size based on the number of active streams.
|
||||
/// Total memory budget is ~200MB shared across all streams. Up to 50 streams get the
|
||||
/// full 4MB window; above that the window scales down to a 1MB floor at 200+ streams.
|
||||
pub fn compute_window_for_stream_count(active: u32) -> u32 {
|
||||
let per_stream = (200 * 1024 * 1024u64) / (active.max(1) as u64);
|
||||
per_stream.clamp(1 * 1024 * 1024, INITIAL_STREAM_WINDOW as u64) as u32
|
||||
}
|
||||
|
||||
/// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
|
||||
pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
|
||||
if payload.len() != 4 {
|
||||
@@ -45,18 +69,28 @@ pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
|
||||
pub struct Frame {
|
||||
pub stream_id: u32,
|
||||
pub frame_type: u8,
|
||||
pub payload: Vec<u8>,
|
||||
pub payload: Bytes,
|
||||
}
|
||||
|
||||
/// Encode a frame into bytes: [stream_id:4][type:1][length:4][payload]
|
||||
pub fn encode_frame(stream_id: u32, frame_type: u8, payload: &[u8]) -> Vec<u8> {
|
||||
pub fn encode_frame(stream_id: u32, frame_type: u8, payload: &[u8]) -> Bytes {
|
||||
let len = payload.len() as u32;
|
||||
let mut buf = Vec::with_capacity(FRAME_HEADER_SIZE + payload.len());
|
||||
buf.extend_from_slice(&stream_id.to_be_bytes());
|
||||
buf.push(frame_type);
|
||||
buf.extend_from_slice(&len.to_be_bytes());
|
||||
buf.extend_from_slice(payload);
|
||||
buf
|
||||
let mut buf = BytesMut::with_capacity(FRAME_HEADER_SIZE + payload.len());
|
||||
buf.put_slice(&stream_id.to_be_bytes());
|
||||
buf.put_u8(frame_type);
|
||||
buf.put_slice(&len.to_be_bytes());
|
||||
buf.put_slice(payload);
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
/// Write a frame header into `buf[0..FRAME_HEADER_SIZE]`.
|
||||
/// The caller must ensure payload is already at `buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + payload_len]`.
|
||||
/// This enables zero-copy encoding: read directly into `buf[FRAME_HEADER_SIZE..]`, then
|
||||
/// prepend the header without copying the payload.
|
||||
pub fn encode_frame_header(buf: &mut [u8], stream_id: u32, frame_type: u8, payload_len: usize) {
|
||||
buf[0..4].copy_from_slice(&stream_id.to_be_bytes());
|
||||
buf[4] = frame_type;
|
||||
buf[5..9].copy_from_slice(&(payload_len as u32).to_be_bytes());
|
||||
}
|
||||
|
||||
/// Build a PROXY protocol v1 header line.
|
||||
@@ -111,13 +145,17 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
||||
]);
|
||||
|
||||
if length > MAX_PAYLOAD_SIZE {
|
||||
log::error!(
|
||||
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||
self.header_buf, stream_id, frame_type, length
|
||||
);
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("frame payload too large: {} bytes", length),
|
||||
format!("frame payload too large: {} bytes (header={:02x?})", length, self.header_buf),
|
||||
));
|
||||
}
|
||||
|
||||
let mut payload = vec![0u8; length as usize];
|
||||
let mut payload = BytesMut::zeroed(length as usize);
|
||||
if length > 0 {
|
||||
self.reader.read_exact(&mut payload).await?;
|
||||
}
|
||||
@@ -125,7 +163,7 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
||||
Ok(Some(Frame {
|
||||
stream_id,
|
||||
frame_type,
|
||||
payload,
|
||||
payload: payload.freeze(),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -135,10 +173,409 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TunnelIo: single-owner I/O multiplexer for the TLS tunnel connection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Events produced by the TunnelIo event loop.
|
||||
#[derive(Debug)]
|
||||
pub enum TunnelEvent {
|
||||
/// A complete frame was read from the remote side.
|
||||
Frame(Frame),
|
||||
/// The remote side closed the connection (EOF).
|
||||
Eof,
|
||||
/// A read error occurred.
|
||||
ReadError(std::io::Error),
|
||||
/// A write error occurred.
|
||||
WriteError(std::io::Error),
|
||||
/// No frames received for the liveness timeout duration.
|
||||
LivenessTimeout,
|
||||
/// The cancellation token was triggered.
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
/// Write state extracted into a sub-struct so the borrow checker can see
|
||||
/// disjoint field access between `self.write` and `self.stream`.
|
||||
struct WriteState {
|
||||
ctrl_queue: VecDeque<Bytes>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
|
||||
data_queue: VecDeque<Bytes>, // DATA, DATA_BACK — only when ctrl is empty
|
||||
sustained_queue: VecDeque<Bytes>, // DATA, DATA_BACK from sustained streams — lowest priority
|
||||
offset: usize, // progress within current frame being written
|
||||
flush_needed: bool,
|
||||
// Sustained starvation prevention: guaranteed 1 MB/s drain
|
||||
sustained_last_drain: Instant,
|
||||
sustained_bytes_this_period: usize,
|
||||
}
|
||||
|
||||
impl WriteState {
|
||||
fn has_work(&self) -> bool {
|
||||
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty() || !self.sustained_queue.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Single-owner I/O engine for the tunnel TLS connection.
|
||||
///
|
||||
/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex.
|
||||
/// Uses three priority write queues:
|
||||
/// 1. ctrl (PONG, WINDOW_UPDATE, CLOSE, OPEN) — always first
|
||||
/// 2. data (DATA, DATA_BACK from normal streams) — when ctrl empty
|
||||
/// 3. sustained (DATA, DATA_BACK from sustained streams) — lowest priority,
|
||||
/// drained freely when ctrl+data empty, or forced 1MB/s when they're not
|
||||
pub struct TunnelIo<S> {
|
||||
stream: S,
|
||||
// Read state: accumulate bytes, parse frames incrementally
|
||||
read_buf: Vec<u8>,
|
||||
read_pos: usize,
|
||||
parse_pos: usize,
|
||||
// Write state: extracted sub-struct for safe disjoint borrows
|
||||
write: WriteState,
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
pub fn new(stream: S, initial_data: Vec<u8>) -> Self {
|
||||
let read_pos = initial_data.len();
|
||||
let mut read_buf = initial_data;
|
||||
if read_buf.capacity() < 65536 {
|
||||
read_buf.reserve(65536 - read_buf.len());
|
||||
}
|
||||
Self {
|
||||
stream,
|
||||
read_buf,
|
||||
read_pos,
|
||||
parse_pos: 0,
|
||||
write: WriteState {
|
||||
ctrl_queue: VecDeque::new(),
|
||||
data_queue: VecDeque::new(),
|
||||
sustained_queue: VecDeque::new(),
|
||||
offset: 0,
|
||||
flush_needed: false,
|
||||
sustained_last_drain: Instant::now(),
|
||||
sustained_bytes_this_period: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Queue a high-priority control frame (PONG, WINDOW_UPDATE, CLOSE, OPEN).
|
||||
pub fn queue_ctrl(&mut self, frame: Bytes) {
|
||||
self.write.ctrl_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Queue a lower-priority data frame (DATA, DATA_BACK).
|
||||
pub fn queue_data(&mut self, frame: Bytes) {
|
||||
self.write.data_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Queue a lowest-priority sustained data frame.
|
||||
pub fn queue_sustained(&mut self, frame: Bytes) {
|
||||
self.write.sustained_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Try to parse a complete frame from the read buffer.
|
||||
/// Uses a parse_pos cursor to avoid drain() on every frame.
|
||||
pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> {
|
||||
let available = self.read_pos - self.parse_pos;
|
||||
if available < FRAME_HEADER_SIZE {
|
||||
return None;
|
||||
}
|
||||
|
||||
let base = self.parse_pos;
|
||||
let stream_id = u32::from_be_bytes([
|
||||
self.read_buf[base], self.read_buf[base + 1],
|
||||
self.read_buf[base + 2], self.read_buf[base + 3],
|
||||
]);
|
||||
let frame_type = self.read_buf[base + 4];
|
||||
let length = u32::from_be_bytes([
|
||||
self.read_buf[base + 5], self.read_buf[base + 6],
|
||||
self.read_buf[base + 7], self.read_buf[base + 8],
|
||||
]);
|
||||
|
||||
if length > MAX_PAYLOAD_SIZE {
|
||||
let header = [
|
||||
self.read_buf[base], self.read_buf[base + 1],
|
||||
self.read_buf[base + 2], self.read_buf[base + 3],
|
||||
self.read_buf[base + 4], self.read_buf[base + 5],
|
||||
self.read_buf[base + 6], self.read_buf[base + 7],
|
||||
self.read_buf[base + 8],
|
||||
];
|
||||
log::error!(
|
||||
"CORRUPT FRAME HEADER: raw={:02x?} stream_id={} type=0x{:02x} length={}",
|
||||
header, stream_id, frame_type, length
|
||||
);
|
||||
return Some(Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("frame payload too large: {} bytes (header={:02x?})", length, header),
|
||||
)));
|
||||
}
|
||||
|
||||
let total_frame_size = FRAME_HEADER_SIZE + length as usize;
|
||||
if available < total_frame_size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let payload = Bytes::copy_from_slice(
|
||||
&self.read_buf[base + FRAME_HEADER_SIZE..base + total_frame_size],
|
||||
);
|
||||
self.parse_pos += total_frame_size;
|
||||
|
||||
// Compact when parse_pos > half the data to reclaim memory
|
||||
if self.parse_pos > self.read_pos / 2 && self.parse_pos > 0 {
|
||||
self.read_buf.drain(..self.parse_pos);
|
||||
self.read_pos -= self.parse_pos;
|
||||
self.parse_pos = 0;
|
||||
}
|
||||
|
||||
Some(Ok(Frame { stream_id, frame_type, payload }))
|
||||
}
|
||||
|
||||
/// Poll-based I/O step. Returns Ready on events, Pending when idle.
|
||||
///
|
||||
/// Order: write(ctrl->data->sustained) -> flush -> read -> channels -> timers
|
||||
pub fn poll_step(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
ctrl_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
data_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
sustained_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
|
||||
cancel_token: &tokio_util::sync::CancellationToken,
|
||||
) -> Poll<TunnelEvent> {
|
||||
// 1. WRITE: 3-tier priority — ctrl first, then data, then sustained.
|
||||
// Sustained drains freely when ctrl+data are empty.
|
||||
// Write one frame, set flush_needed, then flush must complete before
|
||||
// writing more. This prevents unbounded TLS session buffer growth.
|
||||
// Safe: `self.write` and `self.stream` are disjoint fields.
|
||||
let mut writes = 0;
|
||||
while self.write.has_work() && writes < 16 && !self.write.flush_needed {
|
||||
// Pick queue: ctrl > data > sustained
|
||||
let queue_id = if !self.write.ctrl_queue.is_empty() {
|
||||
0 // ctrl
|
||||
} else if !self.write.data_queue.is_empty() {
|
||||
1 // data
|
||||
} else {
|
||||
2 // sustained
|
||||
};
|
||||
let frame = match queue_id {
|
||||
0 => self.write.ctrl_queue.front().unwrap(),
|
||||
1 => self.write.data_queue.front().unwrap(),
|
||||
_ => self.write.sustained_queue.front().unwrap(),
|
||||
};
|
||||
let remaining = &frame[self.write.offset..];
|
||||
|
||||
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={} sustained_q={}",
|
||||
self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||
));
|
||||
}
|
||||
Poll::Ready(Ok(n)) => {
|
||||
self.write.offset += n;
|
||||
self.write.flush_needed = true;
|
||||
if self.write.offset >= frame.len() {
|
||||
match queue_id {
|
||||
0 => { self.write.ctrl_queue.pop_front(); }
|
||||
1 => { self.write.data_queue.pop_front(); }
|
||||
_ => {
|
||||
self.write.sustained_queue.pop_front();
|
||||
self.write.sustained_last_drain = Instant::now();
|
||||
self.write.sustained_bytes_this_period = 0;
|
||||
}
|
||||
}
|
||||
self.write.offset = 0;
|
||||
writes += 1;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={} sustained_q={})",
|
||||
e, self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
|
||||
return Poll::Ready(TunnelEvent::WriteError(e));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 1b. FORCED SUSTAINED DRAIN: when ctrl/data have work but sustained is waiting,
|
||||
// guarantee at least 1 MB/s by draining up to SUSTAINED_FORCED_DRAIN_CAP
|
||||
// once per second.
|
||||
if !self.write.sustained_queue.is_empty()
|
||||
&& (!self.write.ctrl_queue.is_empty() || !self.write.data_queue.is_empty())
|
||||
&& !self.write.flush_needed
|
||||
{
|
||||
let now = Instant::now();
|
||||
if now.duration_since(self.write.sustained_last_drain) >= Duration::from_secs(1) {
|
||||
self.write.sustained_bytes_this_period = 0;
|
||||
self.write.sustained_last_drain = now;
|
||||
|
||||
while !self.write.sustained_queue.is_empty()
|
||||
&& self.write.sustained_bytes_this_period < SUSTAINED_FORCED_DRAIN_CAP
|
||||
&& !self.write.flush_needed
|
||||
{
|
||||
let frame = self.write.sustained_queue.front().unwrap();
|
||||
let remaining = &frame[self.write.offset..];
|
||||
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||
));
|
||||
}
|
||||
Poll::Ready(Ok(n)) => {
|
||||
self.write.offset += n;
|
||||
self.write.flush_needed = true;
|
||||
self.write.sustained_bytes_this_period += n;
|
||||
if self.write.offset >= frame.len() {
|
||||
self.write.sustained_queue.pop_front();
|
||||
self.write.offset = 0;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(e));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. FLUSH: push encrypted data from TLS session to TCP.
|
||||
if self.write.flush_needed {
|
||||
match Pin::new(&mut self.stream).poll_flush(cx) {
|
||||
Poll::Ready(Ok(())) => {
|
||||
self.write.flush_needed = false;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
log::error!("TunnelIo: poll_flush error: {}", e);
|
||||
return Poll::Ready(TunnelEvent::WriteError(e));
|
||||
}
|
||||
Poll::Pending => {} // TCP waker will notify us
|
||||
}
|
||||
}
|
||||
|
||||
// 3. READ: drain stream until Pending to ensure the TCP waker is always registered.
|
||||
// Without this loop, a Ready return with partial frame data would consume
|
||||
// the waker without re-registering it, causing the task to sleep until a
|
||||
// timer or channel wakes it (potentially 15+ seconds of lost reads).
|
||||
loop {
|
||||
// Compact if needed to make room for reads
|
||||
if self.parse_pos > 0 && self.read_buf.len() - self.read_pos < 32768 {
|
||||
self.read_buf.drain(..self.parse_pos);
|
||||
self.read_pos -= self.parse_pos;
|
||||
self.parse_pos = 0;
|
||||
}
|
||||
if self.read_buf.len() < self.read_pos + 32768 {
|
||||
self.read_buf.resize(self.read_pos + 32768, 0);
|
||||
}
|
||||
let mut rbuf = ReadBuf::new(&mut self.read_buf[self.read_pos..]);
|
||||
match Pin::new(&mut self.stream).poll_read(cx, &mut rbuf) {
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = rbuf.filled().len();
|
||||
if n == 0 {
|
||||
return Poll::Ready(TunnelEvent::Eof);
|
||||
}
|
||||
self.read_pos += n;
|
||||
if let Some(result) = self.try_parse_frame() {
|
||||
return match result {
|
||||
Ok(frame) => Poll::Ready(TunnelEvent::Frame(frame)),
|
||||
Err(e) => Poll::Ready(TunnelEvent::ReadError(e)),
|
||||
};
|
||||
}
|
||||
// Partial data — loop to call poll_read again so the TCP
|
||||
// waker is re-registered when it finally returns Pending.
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
log::error!("TunnelIo: poll_read error: {}", e);
|
||||
return Poll::Ready(TunnelEvent::ReadError(e));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 4. CHANNELS: drain ctrl (always — priority), data (only if queue is small).
|
||||
// Ctrl frames must never be delayed — always drain fully.
|
||||
// Data frames are gated: keep data in the bounded channel for proper
|
||||
// backpressure when TLS writes are slow. Without this gate, the internal
|
||||
// data_queue (unbounded VecDeque) grows to hundreds of MB under throttle -> OOM.
|
||||
let mut got_new = false;
|
||||
loop {
|
||||
match ctrl_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.write.ctrl_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "ctrl channel closed"),
|
||||
));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
if self.write.data_queue.len() < 64 {
|
||||
loop {
|
||||
match data_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.write.data_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"),
|
||||
));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sustained channel: drain when sustained_queue is small (same backpressure pattern).
|
||||
// Channel close is non-fatal — not all connections have sustained streams.
|
||||
if self.write.sustained_queue.len() < 64 {
|
||||
loop {
|
||||
match sustained_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.write.sustained_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. TIMERS
|
||||
if liveness_deadline.as_mut().poll(cx).is_ready() {
|
||||
return Poll::Ready(TunnelEvent::LivenessTimeout);
|
||||
}
|
||||
if cancel_token.is_cancelled() {
|
||||
return Poll::Ready(TunnelEvent::Cancelled);
|
||||
}
|
||||
|
||||
// 6. SELF-WAKE: only when flush is complete AND we have work.
|
||||
// When flush is Pending, the TCP write-readiness waker will notify us.
|
||||
// CRITICAL: do NOT self-wake when flush_needed — poll_write always returns
|
||||
// Ready (TLS buffers in-memory), so self-waking causes a tight spin loop
|
||||
// that fills the TLS session buffer unboundedly -> OOM -> ECONNRESET.
|
||||
if !self.write.flush_needed && (got_new || self.write.has_work()) {
|
||||
cx.waker().wake_by_ref();
|
||||
}
|
||||
|
||||
Poll::Pending
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> S {
|
||||
self.stream
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_frame_header() {
|
||||
let payload = b"hello";
|
||||
let mut buf = vec![0u8; FRAME_HEADER_SIZE + payload.len()];
|
||||
buf[FRAME_HEADER_SIZE..].copy_from_slice(payload);
|
||||
encode_frame_header(&mut buf, 42, FRAME_DATA, payload.len());
|
||||
assert_eq!(buf, &encode_frame(42, FRAME_DATA, payload)[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_frame_header_empty_payload() {
|
||||
let mut buf = vec![0u8; FRAME_HEADER_SIZE];
|
||||
encode_frame_header(&mut buf, 99, FRAME_CLOSE, 0);
|
||||
assert_eq!(buf, &encode_frame(99, FRAME_CLOSE, &[])[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_frame() {
|
||||
let data = b"hello";
|
||||
@@ -304,7 +741,7 @@ mod tests {
|
||||
let frame = reader.next_frame().await.unwrap().unwrap();
|
||||
assert_eq!(frame.stream_id, i as u32);
|
||||
assert_eq!(frame.frame_type, ft);
|
||||
assert_eq!(frame.payload, format!("payload_{}", i).as_bytes());
|
||||
assert_eq!(&frame.payload[..], format!("payload_{}", i).as_bytes());
|
||||
}
|
||||
|
||||
assert!(reader.next_frame().await.unwrap().is_none());
|
||||
@@ -313,7 +750,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_frame_reader_zero_length_payload() {
|
||||
let data = encode_frame(42, FRAME_CLOSE, &[]);
|
||||
let cursor = std::io::Cursor::new(data);
|
||||
let cursor = std::io::Cursor::new(data.to_vec());
|
||||
let mut reader = FrameReader::new(cursor);
|
||||
|
||||
let frame = reader.next_frame().await.unwrap().unwrap();
|
||||
@@ -336,4 +773,101 @@ mod tests {
|
||||
assert_eq!(&pong[0..4], &0u32.to_be_bytes());
|
||||
assert_eq!(pong.len(), FRAME_HEADER_SIZE);
|
||||
}
|
||||
|
||||
// --- compute_window_for_stream_count tests ---
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_zero_streams() {
|
||||
// 0 streams treated as 1: 200MB/1 -> clamped to 4MB max
|
||||
assert_eq!(compute_window_for_stream_count(0), INITIAL_STREAM_WINDOW);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_one_stream() {
|
||||
assert_eq!(compute_window_for_stream_count(1), INITIAL_STREAM_WINDOW);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_50_streams_full() {
|
||||
// 200MB/50 = 4MB = exactly INITIAL_STREAM_WINDOW
|
||||
assert_eq!(compute_window_for_stream_count(50), INITIAL_STREAM_WINDOW);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_51_streams_starts_scaling() {
|
||||
// 200MB/51 < 4MB — first value below max
|
||||
let w = compute_window_for_stream_count(51);
|
||||
assert!(w < INITIAL_STREAM_WINDOW);
|
||||
assert_eq!(w, (200 * 1024 * 1024u64 / 51) as u32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_100_streams() {
|
||||
// 200MB/100 = 2MB
|
||||
assert_eq!(compute_window_for_stream_count(100), 2 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_200_streams_at_floor() {
|
||||
// 200MB/200 = 1MB = exactly the floor
|
||||
assert_eq!(compute_window_for_stream_count(200), 1 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_500_streams_clamped() {
|
||||
// 200MB/500 = 0.4MB -> clamped up to 1MB floor
|
||||
assert_eq!(compute_window_for_stream_count(500), 1 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_max_u32() {
|
||||
// Extreme: u32::MAX streams -> tiny value -> clamped to 1MB
|
||||
assert_eq!(compute_window_for_stream_count(u32::MAX), 1 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_monotonically_decreasing() {
|
||||
let mut prev = compute_window_for_stream_count(1);
|
||||
for n in [2, 10, 50, 51, 100, 200, 500, 1000] {
|
||||
let w = compute_window_for_stream_count(n);
|
||||
assert!(w <= prev, "window increased from {} to {} at n={}", prev, w, n);
|
||||
prev = w;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adaptive_window_total_budget_bounded() {
|
||||
// active x per_stream_window should never exceed 200MB (+ clamp overhead for high N)
|
||||
for n in [1, 10, 50, 100, 200] {
|
||||
let w = compute_window_for_stream_count(n);
|
||||
let total = w as u64 * n as u64;
|
||||
assert!(total <= 200 * 1024 * 1024, "total {}MB exceeds budget at n={}", total / (1024*1024), n);
|
||||
}
|
||||
}
|
||||
|
||||
// --- encode/decode window_update roundtrip ---
|
||||
|
||||
#[test]
|
||||
fn test_window_update_roundtrip() {
|
||||
for &increment in &[0u32, 1, 64 * 1024, INITIAL_STREAM_WINDOW, MAX_WINDOW_SIZE, u32::MAX] {
|
||||
let frame = encode_window_update(42, FRAME_WINDOW_UPDATE, increment);
|
||||
assert_eq!(frame[4], FRAME_WINDOW_UPDATE);
|
||||
let decoded = decode_window_update(&frame[FRAME_HEADER_SIZE..]);
|
||||
assert_eq!(decoded, Some(increment));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_window_update_back_roundtrip() {
|
||||
let frame = encode_window_update(7, FRAME_WINDOW_UPDATE_BACK, 1234567);
|
||||
assert_eq!(frame[4], FRAME_WINDOW_UPDATE_BACK);
|
||||
assert_eq!(decode_window_update(&frame[FRAME_HEADER_SIZE..]), Some(1234567));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_window_update_malformed() {
|
||||
assert_eq!(decode_window_update(&[]), None);
|
||||
assert_eq!(decode_window_update(&[0, 0, 0]), None);
|
||||
assert_eq!(decode_window_update(&[0, 0, 0, 0, 0]), None);
|
||||
}
|
||||
}
|
||||
|
||||
475
test/test.flowcontrol.node.ts
Normal file
475
test/test.flowcontrol.node.ts
Normal file
@@ -0,0 +1,475 @@
|
||||
import { expect, tap } from '@push.rocks/tapbundle';
|
||||
import * as net from 'net';
|
||||
import * as crypto from 'crypto';
|
||||
import { RemoteIngressHub, RemoteIngressEdge } from '../ts/index.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Find N free ports by binding to port 0 and collecting OS-assigned ports. */
|
||||
async function findFreePorts(count: number): Promise<number[]> {
|
||||
const servers: net.Server[] = [];
|
||||
const ports: number[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const server = net.createServer();
|
||||
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
ports.push((server.address() as net.AddressInfo).port);
|
||||
servers.push(server);
|
||||
}
|
||||
await Promise.all(servers.map((s) => new Promise<void>((resolve) => s.close(() => resolve()))));
|
||||
return ports;
|
||||
}
|
||||
|
||||
type TrackingServer = net.Server & { destroyAll: () => void };
|
||||
|
||||
/** Start a TCP echo server that tracks connections for force-close. */
|
||||
function startEchoServer(port: number, host: string): Promise<TrackingServer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const connections = new Set<net.Socket>();
|
||||
const server = net.createServer((socket) => {
|
||||
connections.add(socket);
|
||||
socket.on('close', () => connections.delete(socket));
|
||||
|
||||
// Skip PROXY protocol v1 header line before echoing
|
||||
let proxyHeaderParsed = false;
|
||||
let pendingBuf = Buffer.alloc(0);
|
||||
socket.on('data', (data: Buffer) => {
|
||||
if (!proxyHeaderParsed) {
|
||||
pendingBuf = Buffer.concat([pendingBuf, data]);
|
||||
const idx = pendingBuf.indexOf('\r\n');
|
||||
if (idx !== -1) {
|
||||
proxyHeaderParsed = true;
|
||||
const remainder = pendingBuf.subarray(idx + 2);
|
||||
if (remainder.length > 0) {
|
||||
socket.write(remainder);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
socket.write(data);
|
||||
});
|
||||
socket.on('error', () => {});
|
||||
}) as TrackingServer;
|
||||
|
||||
server.destroyAll = () => {
|
||||
for (const conn of connections) conn.destroy();
|
||||
connections.clear();
|
||||
};
|
||||
|
||||
server.on('error', reject);
|
||||
server.listen(port, host, () => resolve(server));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a server that sends a large response immediately on first data received.
|
||||
* Does NOT wait for end (the tunnel protocol has no half-close).
|
||||
* On receiving first data chunk after PROXY header, sends responseSize bytes then closes.
|
||||
*/
|
||||
function startLargeResponseServer(port: number, host: string, responseSize: number): Promise<TrackingServer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const connections = new Set<net.Socket>();
|
||||
const server = net.createServer((socket) => {
|
||||
connections.add(socket);
|
||||
socket.on('close', () => connections.delete(socket));
|
||||
|
||||
let proxyHeaderParsed = false;
|
||||
let pendingBuf = Buffer.alloc(0);
|
||||
let responseSent = false;
|
||||
|
||||
socket.on('data', (data: Buffer) => {
|
||||
if (!proxyHeaderParsed) {
|
||||
pendingBuf = Buffer.concat([pendingBuf, data]);
|
||||
const idx = pendingBuf.indexOf('\r\n');
|
||||
if (idx !== -1) {
|
||||
proxyHeaderParsed = true;
|
||||
const remainder = pendingBuf.subarray(idx + 2);
|
||||
if (remainder.length > 0 && !responseSent) {
|
||||
responseSent = true;
|
||||
sendLargeResponse(socket, responseSize);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!responseSent) {
|
||||
responseSent = true;
|
||||
sendLargeResponse(socket, responseSize);
|
||||
}
|
||||
});
|
||||
socket.on('error', () => {});
|
||||
}) as TrackingServer;
|
||||
|
||||
server.destroyAll = () => {
|
||||
for (const conn of connections) conn.destroy();
|
||||
connections.clear();
|
||||
};
|
||||
|
||||
server.on('error', reject);
|
||||
server.listen(port, host, () => resolve(server));
|
||||
});
|
||||
}
|
||||
|
||||
function sendLargeResponse(socket: net.Socket, totalBytes: number) {
|
||||
const chunkSize = 32 * 1024;
|
||||
let sent = 0;
|
||||
const writeChunk = () => {
|
||||
while (sent < totalBytes) {
|
||||
const toWrite = Math.min(chunkSize, totalBytes - sent);
|
||||
// Use a deterministic pattern for verification
|
||||
const chunk = Buffer.alloc(toWrite, (sent % 256) & 0xff);
|
||||
const canContinue = socket.write(chunk);
|
||||
sent += toWrite;
|
||||
if (!canContinue) {
|
||||
socket.once('drain', writeChunk);
|
||||
return;
|
||||
}
|
||||
}
|
||||
socket.end();
|
||||
};
|
||||
writeChunk();
|
||||
}
|
||||
|
||||
/** Force-close a server: destroy all connections, then close. */
|
||||
async function forceCloseServer(server: TrackingServer): Promise<void> {
|
||||
server.destroyAll();
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
}
|
||||
|
||||
interface TestTunnel {
|
||||
hub: RemoteIngressHub;
|
||||
edge: RemoteIngressEdge;
|
||||
edgePort: number;
|
||||
cleanup: () => Promise<void>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a full hub + edge tunnel.
|
||||
* Edge binds to 127.0.0.1, upstream server binds to 127.0.0.2.
|
||||
* Hub targetHost = 127.0.0.2 so hub -> upstream doesn't loop back to edge.
|
||||
*/
|
||||
async function startTunnel(edgePort: number, hubPort: number): Promise<TestTunnel> {
|
||||
const hub = new RemoteIngressHub();
|
||||
const edge = new RemoteIngressEdge();
|
||||
|
||||
await hub.start({
|
||||
tunnelPort: hubPort,
|
||||
targetHost: '127.0.0.2',
|
||||
});
|
||||
|
||||
await hub.updateAllowedEdges([
|
||||
{ id: 'test-edge', secret: 'test-secret', listenPorts: [edgePort] },
|
||||
]);
|
||||
|
||||
const connectedPromise = new Promise<void>((resolve, reject) => {
|
||||
const timeout = setTimeout(() => reject(new Error('Edge did not connect within 10s')), 10000);
|
||||
edge.once('tunnelConnected', () => {
|
||||
clearTimeout(timeout);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
await edge.start({
|
||||
hubHost: '127.0.0.1',
|
||||
hubPort,
|
||||
edgeId: 'test-edge',
|
||||
secret: 'test-secret',
|
||||
bindAddress: '127.0.0.1',
|
||||
});
|
||||
|
||||
await connectedPromise;
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
return {
|
||||
hub,
|
||||
edge,
|
||||
edgePort,
|
||||
cleanup: async () => {
|
||||
await edge.stop();
|
||||
await hub.stop();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send data through the tunnel and collect the echoed response.
|
||||
*/
|
||||
function sendAndReceive(port: number, data: Buffer, timeoutMs = 30000): Promise<Buffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
let totalReceived = 0;
|
||||
const expectedLength = data.length;
|
||||
let settled = false;
|
||||
|
||||
const client = net.createConnection({ host: '127.0.0.1', port }, () => {
|
||||
client.write(data);
|
||||
client.end();
|
||||
});
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
client.destroy();
|
||||
reject(new Error(`Timeout after ${timeoutMs}ms — received ${totalReceived}/${expectedLength} bytes`));
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
client.on('data', (chunk: Buffer) => {
|
||||
chunks.push(chunk);
|
||||
totalReceived += chunk.length;
|
||||
if (totalReceived >= expectedLength && !settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
client.destroy();
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('end', () => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('error', (err) => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the tunnel, send a small request, and collect a large response.
|
||||
* Does NOT call end() — the tunnel has no half-close.
|
||||
* Instead, collects until expectedResponseSize bytes arrive.
|
||||
*/
|
||||
function sendAndReceiveLarge(
|
||||
port: number,
|
||||
data: Buffer,
|
||||
expectedResponseSize: number,
|
||||
timeoutMs = 60000,
|
||||
): Promise<Buffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
let totalReceived = 0;
|
||||
let settled = false;
|
||||
|
||||
const client = net.createConnection({ host: '127.0.0.1', port }, () => {
|
||||
client.write(data);
|
||||
// Do NOT call client.end() — the server will respond immediately
|
||||
// and the tunnel CLOSE will happen when the download finishes
|
||||
});
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
client.destroy();
|
||||
reject(new Error(`Timeout after ${timeoutMs}ms — received ${totalReceived}/${expectedResponseSize} bytes`));
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
client.on('data', (chunk: Buffer) => {
|
||||
chunks.push(chunk);
|
||||
totalReceived += chunk.length;
|
||||
if (totalReceived >= expectedResponseSize && !settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
client.destroy();
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('end', () => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('error', (err) => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function sha256(buf: Buffer): string {
|
||||
return crypto.createHash('sha256').update(buf).digest('hex');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let tunnel: TestTunnel;
|
||||
let echoServer: TrackingServer;
|
||||
let hubPort: number;
|
||||
let edgePort: number;
|
||||
|
||||
tap.test('setup: start echo server and tunnel', async () => {
|
||||
[hubPort, edgePort] = await findFreePorts(2);
|
||||
|
||||
echoServer = await startEchoServer(edgePort, '127.0.0.2');
|
||||
tunnel = await startTunnel(edgePort, hubPort);
|
||||
|
||||
expect(tunnel.hub.running).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('single stream: 32MB transfer exceeding initial 4MB window (multiple refills)', async () => {
|
||||
const size = 32 * 1024 * 1024;
|
||||
const data = crypto.randomBytes(size);
|
||||
const expectedHash = sha256(data);
|
||||
|
||||
const received = await sendAndReceive(edgePort, data, 60000);
|
||||
|
||||
expect(received.length).toEqual(size);
|
||||
expect(sha256(received)).toEqual(expectedHash);
|
||||
});
|
||||
|
||||
tap.test('200 concurrent streams with 64KB each', async () => {
|
||||
const streamCount = 200;
|
||||
const payloadSize = 64 * 1024;
|
||||
|
||||
const promises = Array.from({ length: streamCount }, () => {
|
||||
const data = crypto.randomBytes(payloadSize);
|
||||
const hash = sha256(data);
|
||||
return sendAndReceive(edgePort, data, 30000).then((received) => ({
|
||||
sent: hash,
|
||||
received: sha256(received),
|
||||
sizeOk: received.length === payloadSize,
|
||||
}));
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.sent !== r.received);
|
||||
|
||||
expect(failures.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('512 concurrent streams at minimum window boundary (16KB each)', async () => {
|
||||
const streamCount = 512;
|
||||
const payloadSize = 16 * 1024;
|
||||
|
||||
const promises = Array.from({ length: streamCount }, () => {
|
||||
const data = crypto.randomBytes(payloadSize);
|
||||
const hash = sha256(data);
|
||||
return sendAndReceive(edgePort, data, 60000).then((received) => ({
|
||||
sent: hash,
|
||||
received: sha256(received),
|
||||
sizeOk: received.length === payloadSize,
|
||||
}));
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.sent !== r.received);
|
||||
|
||||
expect(failures.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('asymmetric transfer: 4KB request -> 4MB response', async () => {
|
||||
// Swap to large-response server
|
||||
await forceCloseServer(echoServer);
|
||||
const responseSize = 4 * 1024 * 1024; // 4 MB
|
||||
const largeServer = await startLargeResponseServer(edgePort, '127.0.0.2', responseSize);
|
||||
|
||||
try {
|
||||
const requestData = crypto.randomBytes(4 * 1024); // 4 KB
|
||||
const received = await sendAndReceiveLarge(edgePort, requestData, responseSize, 60000);
|
||||
expect(received.length).toEqual(responseSize);
|
||||
} finally {
|
||||
// Always restore echo server even on failure
|
||||
await forceCloseServer(largeServer);
|
||||
echoServer = await startEchoServer(edgePort, '127.0.0.2');
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('100 streams x 1MB each (100MB total exceeding 200MB budget)', async () => {
|
||||
const streamCount = 100;
|
||||
const payloadSize = 1 * 1024 * 1024;
|
||||
|
||||
const promises = Array.from({ length: streamCount }, () => {
|
||||
const data = crypto.randomBytes(payloadSize);
|
||||
const hash = sha256(data);
|
||||
return sendAndReceive(edgePort, data, 120000).then((received) => ({
|
||||
sent: hash,
|
||||
received: sha256(received),
|
||||
sizeOk: received.length === payloadSize,
|
||||
}));
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.sent !== r.received);
|
||||
|
||||
expect(failures.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('active stream counter tracks concurrent connections', async () => {
|
||||
const N = 50;
|
||||
|
||||
// Open N connections and keep them alive (send data but don't close)
|
||||
const sockets: net.Socket[] = [];
|
||||
const connectPromises = Array.from({ length: N }, () => {
|
||||
return new Promise<net.Socket>((resolve, reject) => {
|
||||
const sock = net.createConnection({ host: '127.0.0.1', port: edgePort }, () => {
|
||||
resolve(sock);
|
||||
});
|
||||
sock.on('error', () => {});
|
||||
setTimeout(() => reject(new Error('connect timeout')), 5000);
|
||||
});
|
||||
});
|
||||
|
||||
const connected = await Promise.all(connectPromises);
|
||||
sockets.push(...connected);
|
||||
|
||||
// Brief delay for stream registration to propagate
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
// Verify the edge reports >= N active streams.
|
||||
// This counter is the input to compute_window_for_stream_count(),
|
||||
// so its accuracy determines whether adaptive window sizing is correct.
|
||||
const status = await tunnel.edge.getStatus();
|
||||
expect(status.activeStreams).toBeGreaterThanOrEqual(N);
|
||||
|
||||
// Clean up: destroy all sockets (the tunnel's 300s stream timeout will handle cleanup)
|
||||
for (const sock of sockets) {
|
||||
sock.destroy();
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('50 streams x 2MB each (forces multiple window refills per stream)', async () => {
|
||||
// At 50 concurrent streams: adaptive window = 200MB/50 = 4MB per stream
|
||||
// Each stream sends 2MB → needs ~3 WINDOW_UPDATE refill cycles per stream
|
||||
const streamCount = 50;
|
||||
const payloadSize = 2 * 1024 * 1024;
|
||||
|
||||
const promises = Array.from({ length: streamCount }, () => {
|
||||
const data = crypto.randomBytes(payloadSize);
|
||||
const hash = sha256(data);
|
||||
return sendAndReceive(edgePort, data, 120000).then((received) => ({
|
||||
sent: hash,
|
||||
received: sha256(received),
|
||||
sizeOk: received.length === payloadSize,
|
||||
}));
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.sent !== r.received);
|
||||
|
||||
expect(failures.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('teardown: stop tunnel and echo server', async () => {
|
||||
await tunnel.cleanup();
|
||||
await forceCloseServer(echoServer);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
402
test/test.loadtest.node.ts
Normal file
402
test/test.loadtest.node.ts
Normal file
@@ -0,0 +1,402 @@
|
||||
import { expect, tap } from '@push.rocks/tapbundle';
|
||||
import * as net from 'net';
|
||||
import * as stream from 'stream';
|
||||
import * as crypto from 'crypto';
|
||||
import { RemoteIngressHub, RemoteIngressEdge } from '../ts/index.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers (self-contained — same patterns as test.flowcontrol.node.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function findFreePorts(count: number): Promise<number[]> {
|
||||
const servers: net.Server[] = [];
|
||||
const ports: number[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const server = net.createServer();
|
||||
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
ports.push((server.address() as net.AddressInfo).port);
|
||||
servers.push(server);
|
||||
}
|
||||
await Promise.all(servers.map((s) => new Promise<void>((resolve) => s.close(() => resolve()))));
|
||||
return ports;
|
||||
}
|
||||
|
||||
type TrackingServer = net.Server & { destroyAll: () => void };
|
||||
|
||||
function startEchoServer(port: number, host: string): Promise<TrackingServer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const connections = new Set<net.Socket>();
|
||||
const server = net.createServer((socket) => {
|
||||
connections.add(socket);
|
||||
socket.on('close', () => connections.delete(socket));
|
||||
let proxyHeaderParsed = false;
|
||||
let pendingBuf = Buffer.alloc(0);
|
||||
socket.on('data', (data: Buffer) => {
|
||||
if (!proxyHeaderParsed) {
|
||||
pendingBuf = Buffer.concat([pendingBuf, data]);
|
||||
const idx = pendingBuf.indexOf('\r\n');
|
||||
if (idx !== -1) {
|
||||
proxyHeaderParsed = true;
|
||||
const remainder = pendingBuf.subarray(idx + 2);
|
||||
if (remainder.length > 0) socket.write(remainder);
|
||||
}
|
||||
return;
|
||||
}
|
||||
socket.write(data);
|
||||
});
|
||||
socket.on('error', () => {});
|
||||
}) as TrackingServer;
|
||||
server.destroyAll = () => {
|
||||
for (const conn of connections) conn.destroy();
|
||||
connections.clear();
|
||||
};
|
||||
server.on('error', reject);
|
||||
server.listen(port, host, () => resolve(server));
|
||||
});
|
||||
}
|
||||
|
||||
function sendAndReceive(port: number, data: Buffer, timeoutMs = 30000): Promise<Buffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
let totalReceived = 0;
|
||||
const expectedLength = data.length;
|
||||
let settled = false;
|
||||
|
||||
const client = net.createConnection({ host: '127.0.0.1', port }, () => {
|
||||
client.write(data);
|
||||
client.end();
|
||||
});
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
client.destroy();
|
||||
reject(new Error(`Timeout after ${timeoutMs}ms — received ${totalReceived}/${expectedLength} bytes`));
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
client.on('data', (chunk: Buffer) => {
|
||||
chunks.push(chunk);
|
||||
totalReceived += chunk.length;
|
||||
if (totalReceived >= expectedLength && !settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
client.destroy();
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('end', () => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
resolve(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
client.on('error', (err) => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function sha256(buf: Buffer): string {
|
||||
return crypto.createHash('sha256').update(buf).digest('hex');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Throttle Proxy: rate-limits TCP traffic between edge and hub
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
class ThrottleTransform extends stream.Transform {
|
||||
private bytesPerSec: number;
|
||||
private bucket: number;
|
||||
private lastRefill: number;
|
||||
private destroyed_: boolean = false;
|
||||
|
||||
constructor(bytesPerSecond: number) {
|
||||
super();
|
||||
this.bytesPerSec = bytesPerSecond;
|
||||
this.bucket = bytesPerSecond;
|
||||
this.lastRefill = Date.now();
|
||||
}
|
||||
|
||||
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: stream.TransformCallback) {
|
||||
if (this.destroyed_) return;
|
||||
|
||||
const now = Date.now();
|
||||
const elapsed = (now - this.lastRefill) / 1000;
|
||||
this.bucket = Math.min(this.bytesPerSec, this.bucket + elapsed * this.bytesPerSec);
|
||||
this.lastRefill = now;
|
||||
|
||||
if (chunk.length <= this.bucket) {
|
||||
this.bucket -= chunk.length;
|
||||
callback(null, chunk);
|
||||
} else {
|
||||
// Not enough budget — delay the entire chunk (don't split)
|
||||
const deficit = chunk.length - this.bucket;
|
||||
this.bucket = 0;
|
||||
const delayMs = Math.min((deficit / this.bytesPerSec) * 1000, 1000);
|
||||
setTimeout(() => {
|
||||
if (this.destroyed_) { callback(); return; }
|
||||
this.lastRefill = Date.now();
|
||||
this.bucket = 0;
|
||||
callback(null, chunk);
|
||||
}, delayMs);
|
||||
}
|
||||
}
|
||||
|
||||
_destroy(err: Error | null, callback: (error: Error | null) => void) {
|
||||
this.destroyed_ = true;
|
||||
callback(err);
|
||||
}
|
||||
}
|
||||
|
||||
interface ThrottleProxy {
|
||||
server: net.Server;
|
||||
close: () => Promise<void>;
|
||||
}
|
||||
|
||||
async function startThrottleProxy(
|
||||
listenPort: number,
|
||||
targetHost: string,
|
||||
targetPort: number,
|
||||
bytesPerSecond: number,
|
||||
): Promise<ThrottleProxy> {
|
||||
const connections = new Set<net.Socket>();
|
||||
const server = net.createServer((clientSock) => {
|
||||
connections.add(clientSock);
|
||||
const upstream = net.createConnection({ host: targetHost, port: targetPort });
|
||||
connections.add(upstream);
|
||||
|
||||
const throttleUp = new ThrottleTransform(bytesPerSecond);
|
||||
const throttleDown = new ThrottleTransform(bytesPerSecond);
|
||||
|
||||
clientSock.pipe(throttleUp).pipe(upstream);
|
||||
upstream.pipe(throttleDown).pipe(clientSock);
|
||||
|
||||
let cleaned = false;
|
||||
const cleanup = (source: string, err?: Error) => {
|
||||
if (cleaned) return;
|
||||
cleaned = true;
|
||||
if (err) {
|
||||
console.error(`[ThrottleProxy] cleanup triggered by ${source}: ${err.message}`);
|
||||
} else {
|
||||
console.error(`[ThrottleProxy] cleanup triggered by ${source} (no error)`);
|
||||
}
|
||||
console.error(`[ThrottleProxy] stack:`, new Error().stack);
|
||||
throttleUp.destroy();
|
||||
throttleDown.destroy();
|
||||
clientSock.destroy();
|
||||
upstream.destroy();
|
||||
connections.delete(clientSock);
|
||||
connections.delete(upstream);
|
||||
};
|
||||
clientSock.on('error', (e) => cleanup('clientSock.error', e));
|
||||
upstream.on('error', (e) => cleanup('upstream.error', e));
|
||||
throttleUp.on('error', (e) => cleanup('throttleUp.error', e));
|
||||
throttleDown.on('error', (e) => cleanup('throttleDown.error', e));
|
||||
clientSock.on('close', () => cleanup('clientSock.close'));
|
||||
upstream.on('close', () => cleanup('upstream.close'));
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => server.listen(listenPort, '127.0.0.1', resolve));
|
||||
return {
|
||||
server,
|
||||
close: async () => {
|
||||
for (const c of connections) c.destroy();
|
||||
connections.clear();
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test state
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let hub: RemoteIngressHub;
|
||||
let edge: RemoteIngressEdge;
|
||||
let echoServer: TrackingServer;
|
||||
let throttle: ThrottleProxy;
|
||||
let hubPort: number;
|
||||
let proxyPort: number;
|
||||
let edgePort: number;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
tap.test('setup: start throttled tunnel (100 Mbit/s)', async () => {
|
||||
[hubPort, proxyPort, edgePort] = await findFreePorts(3);
|
||||
|
||||
echoServer = await startEchoServer(edgePort, '127.0.0.2');
|
||||
|
||||
// Throttle proxy: edge → proxy → hub at 100 Mbit/s (12.5 MB/s)
|
||||
throttle = await startThrottleProxy(proxyPort, '127.0.0.1', hubPort, 12.5 * 1024 * 1024);
|
||||
|
||||
hub = new RemoteIngressHub();
|
||||
edge = new RemoteIngressEdge();
|
||||
|
||||
await hub.start({ tunnelPort: hubPort, targetHost: '127.0.0.2' });
|
||||
await hub.updateAllowedEdges([
|
||||
{ id: 'test-edge', secret: 'test-secret', listenPorts: [edgePort] },
|
||||
]);
|
||||
|
||||
const connectedPromise = new Promise<void>((resolve, reject) => {
|
||||
const timeout = setTimeout(() => reject(new Error('Edge did not connect within 10s')), 10000);
|
||||
edge.once('tunnelConnected', () => {
|
||||
clearTimeout(timeout);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Edge connects through throttle proxy
|
||||
await edge.start({
|
||||
hubHost: '127.0.0.1',
|
||||
hubPort: proxyPort,
|
||||
edgeId: 'test-edge',
|
||||
secret: 'test-secret',
|
||||
bindAddress: '127.0.0.1',
|
||||
});
|
||||
|
||||
await connectedPromise;
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('throttled: 5 streams x 20MB each through 100Mbit tunnel', async () => {
|
||||
const streamCount = 5;
|
||||
const payloadSize = 20 * 1024 * 1024; // 20MB per stream = 100MB total round-trip
|
||||
|
||||
const payloads = Array.from({ length: streamCount }, () => crypto.randomBytes(payloadSize));
|
||||
const promises = payloads.map((data) => {
|
||||
const hash = sha256(data);
|
||||
return sendAndReceive(edgePort, data, 300000).then((received) => ({
|
||||
sent: hash,
|
||||
received: sha256(received),
|
||||
sizeOk: received.length === payloadSize,
|
||||
}));
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.sent !== r.received);
|
||||
expect(failures.length).toEqual(0);
|
||||
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('throttled: slow consumer with 20MB does not kill other streams', async () => {
|
||||
// Open a connection that creates download-direction backpressure:
|
||||
// send 20MB but DON'T read the response — client TCP receive buffer fills
|
||||
const slowSock = net.createConnection({ host: '127.0.0.1', port: edgePort });
|
||||
await new Promise<void>((resolve) => slowSock.on('connect', resolve));
|
||||
const slowData = crypto.randomBytes(20 * 1024 * 1024);
|
||||
slowSock.write(slowData);
|
||||
slowSock.end();
|
||||
// Don't read — backpressure builds on the download path
|
||||
|
||||
// Wait for backpressure to develop
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
|
||||
// Meanwhile, 5 normal echo streams with 20MB each must complete
|
||||
const payload = crypto.randomBytes(20 * 1024 * 1024);
|
||||
const hash = sha256(payload);
|
||||
const promises = Array.from({ length: 5 }, () =>
|
||||
sendAndReceive(edgePort, payload, 300000).then((r) => ({
|
||||
hash: sha256(r),
|
||||
sizeOk: r.length === payload.length,
|
||||
}))
|
||||
);
|
||||
const results = await Promise.all(promises);
|
||||
const failures = results.filter((r) => !r.sizeOk || r.hash !== hash);
|
||||
expect(failures.length).toEqual(0);
|
||||
|
||||
// Tunnel still alive
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
|
||||
slowSock.destroy();
|
||||
});
|
||||
|
||||
tap.test('throttled: rapid churn — 3 x 20MB long + 50 x 1MB short streams', async () => {
|
||||
// 3 long streams (20MB each) running alongside 50 short streams (1MB each)
|
||||
const longPayload = crypto.randomBytes(20 * 1024 * 1024);
|
||||
const longHash = sha256(longPayload);
|
||||
const longPromises = Array.from({ length: 3 }, () =>
|
||||
sendAndReceive(edgePort, longPayload, 300000).then((r) => ({
|
||||
hash: sha256(r),
|
||||
sizeOk: r.length === longPayload.length,
|
||||
}))
|
||||
);
|
||||
|
||||
const shortPayload = crypto.randomBytes(1024 * 1024);
|
||||
const shortHash = sha256(shortPayload);
|
||||
const shortPromises = Array.from({ length: 50 }, () =>
|
||||
sendAndReceive(edgePort, shortPayload, 300000).then((r) => ({
|
||||
hash: sha256(r),
|
||||
sizeOk: r.length === shortPayload.length,
|
||||
}))
|
||||
);
|
||||
|
||||
const [longResults, shortResults] = await Promise.all([
|
||||
Promise.all(longPromises),
|
||||
Promise.all(shortPromises),
|
||||
]);
|
||||
|
||||
const longFails = longResults.filter((r) => !r.sizeOk || r.hash !== longHash);
|
||||
const shortFails = shortResults.filter((r) => !r.sizeOk || r.hash !== shortHash);
|
||||
expect(longFails.length).toEqual(0);
|
||||
expect(shortFails.length).toEqual(0);
|
||||
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('throttled: 3 burst waves of 5 streams x 20MB each', async () => {
|
||||
for (let wave = 0; wave < 3; wave++) {
|
||||
const streamCount = 5;
|
||||
const payloadSize = 20 * 1024 * 1024; // 20MB per stream = 100MB per wave
|
||||
|
||||
const promises = Array.from({ length: streamCount }, () => {
|
||||
const data = crypto.randomBytes(payloadSize);
|
||||
return sendAndReceive(edgePort, data, 300000).then((r) => r.length === payloadSize);
|
||||
});
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const ok = results.filter(Boolean).length;
|
||||
expect(ok).toEqual(streamCount);
|
||||
|
||||
// Brief pause between waves
|
||||
await new Promise((r) => setTimeout(r, 500));
|
||||
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('throttled: tunnel still works after all load tests', async () => {
|
||||
const data = crypto.randomBytes(1024);
|
||||
const hash = sha256(data);
|
||||
const received = await sendAndReceive(edgePort, data, 30000);
|
||||
expect(sha256(received)).toEqual(hash);
|
||||
|
||||
const status = await edge.getStatus();
|
||||
expect(status.connected).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('teardown: stop tunnel', async () => {
|
||||
await edge.stop();
|
||||
await hub.stop();
|
||||
if (throttle) await throttle.close();
|
||||
await new Promise<void>((resolve) => echoServer.close(() => resolve()));
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@serve.zone/remoteingress',
|
||||
version: '4.5.9',
|
||||
version: '4.9.0',
|
||||
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ type TEdgeCommands = {
|
||||
hubPort: number;
|
||||
edgeId: string;
|
||||
secret: string;
|
||||
bindAddress?: string;
|
||||
};
|
||||
result: { started: boolean };
|
||||
};
|
||||
@@ -38,6 +39,7 @@ export interface IEdgeConfig {
|
||||
hubPort?: number;
|
||||
edgeId: string;
|
||||
secret: string;
|
||||
bindAddress?: string;
|
||||
}
|
||||
|
||||
const MAX_RESTART_ATTEMPTS = 10;
|
||||
@@ -81,8 +83,10 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
this.bridge.on('management:tunnelConnected', () => {
|
||||
this.emit('tunnelConnected');
|
||||
});
|
||||
this.bridge.on('management:tunnelDisconnected', () => {
|
||||
this.emit('tunnelDisconnected');
|
||||
this.bridge.on('management:tunnelDisconnected', (data: { reason?: string }) => {
|
||||
const reason = data?.reason ?? 'unknown';
|
||||
console.log(`[RemoteIngressEdge] Tunnel disconnected: ${reason}`);
|
||||
this.emit('tunnelDisconnected', data);
|
||||
});
|
||||
this.bridge.on('management:publicIpDiscovered', (data: { ip: string }) => {
|
||||
this.emit('publicIpDiscovered', data);
|
||||
@@ -132,6 +136,7 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
hubPort: edgeConfig.hubPort ?? 8443,
|
||||
edgeId: edgeConfig.edgeId,
|
||||
secret: edgeConfig.secret,
|
||||
...(edgeConfig.bindAddress ? { bindAddress: edgeConfig.bindAddress } : {}),
|
||||
});
|
||||
|
||||
this.started = true;
|
||||
@@ -227,6 +232,7 @@ export class RemoteIngressEdge extends EventEmitter {
|
||||
hubPort: this.savedConfig.hubPort ?? 8443,
|
||||
edgeId: this.savedConfig.edgeId,
|
||||
secret: this.savedConfig.secret,
|
||||
...(this.savedConfig.bindAddress ? { bindAddress: this.savedConfig.bindAddress } : {}),
|
||||
});
|
||||
|
||||
this.started = true;
|
||||
|
||||
@@ -93,7 +93,9 @@ export class RemoteIngressHub extends EventEmitter {
|
||||
this.bridge.on('management:edgeConnected', (data: { edgeId: string; peerAddr: string }) => {
|
||||
this.emit('edgeConnected', data);
|
||||
});
|
||||
this.bridge.on('management:edgeDisconnected', (data: { edgeId: string }) => {
|
||||
this.bridge.on('management:edgeDisconnected', (data: { edgeId: string; reason?: string }) => {
|
||||
const reason = data?.reason ?? 'unknown';
|
||||
console.log(`[RemoteIngressHub] Edge ${data.edgeId} disconnected: ${reason}`);
|
||||
this.emit('edgeDisconnected', data);
|
||||
});
|
||||
this.bridge.on('management:streamOpened', (data: { edgeId: string; streamId: number }) => {
|
||||
|
||||
Reference in New Issue
Block a user