Compare commits

...

4 Commits

Author SHA1 Message Date
e4807be00b v4.9.1
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-18 00:30:03 +00:00
b649322e65 fix(readme): document QoS tiers, heartbeat frames, and adaptive flow control in the protocol overview 2026-03-18 00:30:03 +00:00
d89d1cfbbf v4.9.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-18 00:13:14 +00:00
6cbe8bee5e feat(protocol): add sustained-stream tunnel scheduling to isolate high-throughput traffic 2026-03-18 00:13:14 +00:00
7 changed files with 232 additions and 34 deletions

View File

@@ -1,5 +1,19 @@
# Changelog # Changelog
## 2026-03-18 - 4.9.1 - fix(readme)
document QoS tiers, heartbeat frames, and adaptive flow control in the protocol overview
- Adds PING, PONG, WINDOW_UPDATE, and WINDOW_UPDATE_BACK frame types to the protocol documentation
- Describes the 3-tier priority queues for control, normal data, and sustained traffic
- Explains sustained stream classification and adaptive per-stream window sizing
## 2026-03-18 - 4.9.0 - feat(protocol)
add sustained-stream tunnel scheduling to isolate high-throughput traffic
- Introduce a third low-priority sustained queue in TunnelIo with a forced drain budget to prevent long-lived high-bandwidth streams from starving control and normal data frames.
- Classify upload and download streams as sustained after exceeding the throughput threshold for the minimum duration, and route their DATA and CLOSE frames through the sustained channel.
- Wire the new sustained channel through edge and hub stream handling so sustained traffic is scheduled consistently on both sides of the tunnel.
## 2026-03-18 - 4.8.19 - fix(remoteingress-protocol) ## 2026-03-18 - 4.8.19 - fix(remoteingress-protocol)
reduce per-stream flow control windows and increase control channel buffering reduce per-stream flow control windows and increase control channel buffering

View File

@@ -1,6 +1,6 @@
{ {
"name": "@serve.zone/remoteingress", "name": "@serve.zone/remoteingress",
"version": "4.8.19", "version": "4.9.1",
"private": false, "private": false,
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.", "description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",

View File

@@ -17,7 +17,7 @@ pnpm install @serve.zone/remoteingress
`@serve.zone/remoteingress` uses a **Hub/Edge** topology with a high-performance Rust core and a TypeScript API surface: `@serve.zone/remoteingress` uses a **Hub/Edge** topology with a high-performance Rust core and a TypeScript API surface:
``` ```
┌─────────────────────┐ TLS Tunnel ┌─────────────────────┐ ┌─────────────────────┐ TLS Tunnel ┌─────────────────────┐
│ Network Edge │ ◄══════════════════════════► │ Private Cluster │ │ Network Edge │ ◄══════════════════════════► │ Private Cluster │
│ │ (multiplexed frames + │ │ │ │ (multiplexed frames + │ │
│ RemoteIngressEdge │ shared-secret auth) │ RemoteIngressHub │ │ RemoteIngressEdge │ shared-secret auth) │ RemoteIngressHub │
@@ -48,6 +48,8 @@ pnpm install @serve.zone/remoteingress
- 🎛️ **Dynamic port configuration** — the hub assigns listen ports per edge and can hot-reload them at runtime via `FRAME_CONFIG` frames - 🎛️ **Dynamic port configuration** — the hub assigns listen ports per edge and can hot-reload them at runtime via `FRAME_CONFIG` frames
- 📣 **Event-driven** — both Hub and Edge extend `EventEmitter` for real-time monitoring - 📣 **Event-driven** — both Hub and Edge extend `EventEmitter` for real-time monitoring
-**Rust core** — all frame encoding, TLS, and TCP proxying happen in native code for maximum throughput -**Rust core** — all frame encoding, TLS, and TCP proxying happen in native code for maximum throughput
- 🎚️ **3-tier QoS** — control frames, normal data, and sustained (elephant flow) traffic each get their own priority queue
- 📊 **Adaptive flow control** — per-stream windows scale with active stream count to prevent memory overuse
## 🚀 Usage ## 🚀 Usage
@@ -280,6 +282,10 @@ The tunnel uses a custom binary frame protocol over TLS:
| `DATA_BACK` | `0x04` | Hub → Edge | Response data flowing downstream | | `DATA_BACK` | `0x04` | Hub → Edge | Response data flowing downstream |
| `CLOSE_BACK` | `0x05` | Hub → Edge | Upstream (SmartProxy) closed the connection | | `CLOSE_BACK` | `0x05` | Hub → Edge | Upstream (SmartProxy) closed the connection |
| `CONFIG` | `0x06` | Hub → Edge | Runtime configuration update (e.g. port changes); payload is JSON | | `CONFIG` | `0x06` | Hub → Edge | Runtime configuration update (e.g. port changes); payload is JSON |
| `PING` | `0x07` | Hub → Edge | Heartbeat probe (sent every 15s) |
| `PONG` | `0x08` | Edge → Hub | Heartbeat response |
| `WINDOW_UPDATE` | `0x09` | Edge → Hub | Per-stream flow control: edge consumed N bytes, hub can send more |
| `WINDOW_UPDATE_BACK` | `0x0A` | Hub → Edge | Per-stream flow control: hub consumed N bytes, edge can send more |
Max payload size per frame: **16 MB**. Stream IDs are 32-bit unsigned integers. Max payload size per frame: **16 MB**. Stream IDs are 32-bit unsigned integers.
@@ -292,6 +298,42 @@ Max payload size per frame: **16 MB**. Stream IDs are 32-bit unsigned integers.
5. Frame protocol begins — `OPEN`/`DATA`/`CLOSE` frames flow in both directions 5. Frame protocol begins — `OPEN`/`DATA`/`CLOSE` frames flow in both directions
6. Hub can push `CONFIG` frames at any time to update the edge's listen ports 6. Hub can push `CONFIG` frames at any time to update the edge's listen ports
## 🎚️ QoS & Flow Control
The tunnel multiplexer uses a **3-tier priority system** and **per-stream flow control** to ensure fair bandwidth sharing across thousands of concurrent streams.
### Priority Tiers
All outbound frames are queued into one of three priority levels:
| Tier | Queue | Frames | Behavior |
|------|-------|--------|----------|
| 🔴 **Control** (highest) | `ctrl_queue` | PING, PONG, WINDOW_UPDATE, OPEN, CLOSE, CONFIG | Always drained first. Never delayed. |
| 🟡 **Data** (normal) | `data_queue` | DATA, DATA_BACK from normal streams | Drained when ctrl is empty. Gated at 64 buffered items for backpressure. |
| 🟢 **Sustained** (lowest) | `sustained_queue` | DATA, DATA_BACK from elephant flows | Drained freely when ctrl+data are empty. Otherwise guaranteed **1 MB/s** via forced drain every second. |
This prevents large bulk transfers (e.g. git clones, file downloads) from starving interactive traffic and ensures `WINDOW_UPDATE` frames are never delayed — which would cause flow control deadlocks.
### Sustained Stream Classification
A stream is automatically classified as **sustained** (elephant flow) when:
- It has been active for **>10 seconds**, AND
- Its average throughput exceeds **20 Mbit/s** (2.5 MB/s)
Once classified, the stream's flow control window is locked to the **1 MB floor** and its data frames move to the lowest-priority queue. Classification is one-way — a stream never gets promoted back to normal.
### Adaptive Per-Stream Windows
Each stream has a send window that limits bytes-in-flight. The window size adapts to the number of active streams using a shared **200 MB memory budget**:
| Active Streams | Window per Stream |
|---|---|
| 150 | 4 MB (maximum) |
| 51100 | Scales down (4 MB → 2 MB) |
| 200+ | 1 MB (floor) |
The consumer sends `WINDOW_UPDATE` frames after processing data, allowing the producer to send more. This prevents any single stream from consuming unbounded memory and provides natural backpressure.
## 💡 Example Scenarios ## 💡 Example Scenarios
### 1. Expose a Private Kubernetes Cluster to the Internet ### 1. Expose a Private Kubernetes Cluster to the Internet

View File

@@ -293,6 +293,7 @@ async fn handle_edge_frame(
event_tx: &mpsc::Sender<EdgeEvent>, event_tx: &mpsc::Sender<EdgeEvent>,
tunnel_writer_tx: &mpsc::Sender<Bytes>, tunnel_writer_tx: &mpsc::Sender<Bytes>,
tunnel_data_tx: &mpsc::Sender<Bytes>, tunnel_data_tx: &mpsc::Sender<Bytes>,
tunnel_sustained_tx: &mpsc::Sender<Bytes>,
port_listeners: &mut HashMap<u16, JoinHandle<()>>, port_listeners: &mut HashMap<u16, JoinHandle<()>>,
active_streams: &Arc<AtomicU32>, active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>, next_stream_id: &Arc<AtomicU32>,
@@ -343,6 +344,7 @@ async fn handle_edge_frame(
port_listeners, port_listeners,
tunnel_writer_tx, tunnel_writer_tx,
tunnel_data_tx, tunnel_data_tx,
tunnel_sustained_tx,
client_writers, client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -499,6 +501,7 @@ async fn connect_to_hub_and_run(
// Stream handlers send through these channels → TunnelIo drains them. // Stream handlers send through these channels → TunnelIo drains them.
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Bytes>(512); let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Bytes>(512);
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Bytes>(4096); let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Bytes>(4096);
let (tunnel_sustained_tx, mut tunnel_sustained_rx) = mpsc::channel::<Bytes>(4096);
let tunnel_writer_tx = tunnel_ctrl_tx.clone(); let tunnel_writer_tx = tunnel_ctrl_tx.clone();
// Start TCP listeners for initial ports // Start TCP listeners for initial ports
@@ -509,6 +512,7 @@ async fn connect_to_hub_and_run(
&mut port_listeners, &mut port_listeners,
&tunnel_writer_tx, &tunnel_writer_tx,
&tunnel_data_tx, &tunnel_data_tx,
&tunnel_sustained_tx,
&client_writers, &client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -540,7 +544,7 @@ async fn connect_to_hub_and_run(
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame( if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx, frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners, &tunnel_writer_tx, &tunnel_data_tx, &tunnel_sustained_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address, active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await { ).await {
break 'io_loop EdgeLoopResult::Reconnect(reason); break 'io_loop EdgeLoopResult::Reconnect(reason);
@@ -549,7 +553,7 @@ async fn connect_to_hub_and_run(
// Poll I/O: write(ctrl→data), flush, read, channels, timers // Poll I/O: write(ctrl→data), flush, read, channels, timers
let event = std::future::poll_fn(|cx| { let event = std::future::poll_fn(|cx| {
tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut liveness_deadline, connection_token) tunnel_io.poll_step(cx, &mut tunnel_ctrl_rx, &mut tunnel_data_rx, &mut tunnel_sustained_rx, &mut liveness_deadline, connection_token)
}).await; }).await;
match event { match event {
@@ -558,7 +562,7 @@ async fn connect_to_hub_and_run(
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame( if let EdgeFrameAction::Disconnect(reason) = handle_edge_frame(
frame, &mut tunnel_io, &client_writers, listen_ports, event_tx, frame, &mut tunnel_io, &client_writers, listen_ports, event_tx,
&tunnel_writer_tx, &tunnel_data_tx, &mut port_listeners, &tunnel_writer_tx, &tunnel_data_tx, &tunnel_sustained_tx, &mut port_listeners,
active_streams, next_stream_id, &config.edge_id, connection_token, bind_address, active_streams, next_stream_id, &config.edge_id, connection_token, bind_address,
).await { ).await {
break EdgeLoopResult::Reconnect(reason); break EdgeLoopResult::Reconnect(reason);
@@ -615,6 +619,7 @@ fn apply_port_config(
port_listeners: &mut HashMap<u16, JoinHandle<()>>, port_listeners: &mut HashMap<u16, JoinHandle<()>>,
tunnel_ctrl_tx: &mpsc::Sender<Bytes>, tunnel_ctrl_tx: &mpsc::Sender<Bytes>,
tunnel_data_tx: &mpsc::Sender<Bytes>, tunnel_data_tx: &mpsc::Sender<Bytes>,
tunnel_sustained_tx: &mpsc::Sender<Bytes>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>, client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
active_streams: &Arc<AtomicU32>, active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>, next_stream_id: &Arc<AtomicU32>,
@@ -637,6 +642,7 @@ fn apply_port_config(
for &port in new_set.difference(&old_set) { for &port in new_set.difference(&old_set) {
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone(); let tunnel_data_tx = tunnel_data_tx.clone();
let tunnel_sustained_tx = tunnel_sustained_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let next_stream_id = next_stream_id.clone(); let next_stream_id = next_stream_id.clone();
@@ -671,6 +677,7 @@ fn apply_port_config(
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed); let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_ctrl_tx = tunnel_ctrl_tx.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone(); let tunnel_data_tx = tunnel_data_tx.clone();
let tunnel_sustained_tx = tunnel_sustained_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let edge_id = edge_id.clone(); let edge_id = edge_id.clone();
@@ -687,6 +694,7 @@ fn apply_port_config(
&edge_id, &edge_id,
tunnel_ctrl_tx, tunnel_ctrl_tx,
tunnel_data_tx, tunnel_data_tx,
tunnel_sustained_tx,
client_writers, client_writers,
client_token, client_token,
Arc::clone(&active_streams), Arc::clone(&active_streams),
@@ -730,6 +738,7 @@ async fn handle_client_connection(
edge_id: &str, edge_id: &str,
tunnel_ctrl_tx: mpsc::Sender<Bytes>, tunnel_ctrl_tx: mpsc::Sender<Bytes>,
tunnel_data_tx: mpsc::Sender<Bytes>, tunnel_data_tx: mpsc::Sender<Bytes>,
tunnel_sustained_tx: mpsc::Sender<Bytes>,
client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>, client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
client_token: CancellationToken, client_token: CancellationToken,
active_streams: Arc<AtomicU32>, active_streams: Arc<AtomicU32>,
@@ -833,6 +842,9 @@ async fn handle_client_connection(
// Task: client -> hub (upload direction) with per-stream flow control. // Task: client -> hub (upload direction) with per-stream flow control.
// Zero-copy: read payload directly after the header, then prepend header. // Zero-copy: read payload directly after the header, then prepend header.
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768]; let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
let mut stream_bytes_sent: u64 = 0;
let stream_start = tokio::time::Instant::now();
let mut is_sustained = false;
loop { loop {
// Wait for send window to have capacity (with stall timeout). // Wait for send window to have capacity (with stall timeout).
// Safe pattern: register notified BEFORE checking the condition // Safe pattern: register notified BEFORE checking the condition
@@ -873,8 +885,21 @@ async fn handle_client_connection(
send_window.fetch_sub(n as u32, Ordering::Release); send_window.fetch_sub(n as u32, Ordering::Release);
encode_frame_header(&mut buf, stream_id, FRAME_DATA, n); encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
let data_frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]); let data_frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
// Sustained classification: >2.5 MB/s for >10 seconds
stream_bytes_sent += n as u64;
if !is_sustained {
let elapsed = stream_start.elapsed().as_secs();
if elapsed >= remoteingress_protocol::SUSTAINED_MIN_DURATION_SECS
&& stream_bytes_sent / elapsed >= remoteingress_protocol::SUSTAINED_THRESHOLD_BPS
{
is_sustained = true;
log::debug!("Stream {} classified as sustained (upload, {} bytes in {}s)",
stream_id, stream_bytes_sent, elapsed);
}
}
let tx = if is_sustained { &tunnel_sustained_tx } else { &tunnel_data_tx };
let sent = tokio::select! { let sent = tokio::select! {
result = tunnel_data_tx.send(data_frame) => result.is_ok(), result = tx.send(data_frame) => result.is_ok(),
_ = client_token.cancelled() => false, _ = client_token.cancelled() => false,
}; };
if !sent { break; } if !sent { break; }
@@ -901,8 +926,9 @@ async fn handle_client_connection(
// select! with cancellation guard prevents indefinite blocking if tunnel dies. // select! with cancellation guard prevents indefinite blocking if tunnel dies.
if !client_token.is_cancelled() { if !client_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]);
let tx = if is_sustained { &tunnel_sustained_tx } else { &tunnel_data_tx };
tokio::select! { tokio::select! {
_ = tunnel_data_tx.send(close_frame) => {} _ = tx.send(close_frame) => {}
_ = client_token.cancelled() => {} _ = client_token.cancelled() => {}
} }
} }

View File

@@ -310,6 +310,7 @@ async fn handle_hub_frame(
event_tx: &mpsc::Sender<HubEvent>, event_tx: &mpsc::Sender<HubEvent>,
ctrl_tx: &mpsc::Sender<Bytes>, ctrl_tx: &mpsc::Sender<Bytes>,
data_tx: &mpsc::Sender<Bytes>, data_tx: &mpsc::Sender<Bytes>,
sustained_tx: &mpsc::Sender<Bytes>,
target_host: &str, target_host: &str,
edge_token: &CancellationToken, edge_token: &CancellationToken,
cleanup_tx: &mpsc::Sender<u32>, cleanup_tx: &mpsc::Sender<u32>,
@@ -338,6 +339,7 @@ async fn handle_hub_frame(
let cleanup = cleanup_tx.clone(); let cleanup = cleanup_tx.clone();
let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let sustained_writer_tx = sustained_tx.clone(); // sustained: DATA_BACK from elephant flows
let target = target_host.to_string(); let target = target_host.to_string();
let stream_token = edge_token.child_token(); let stream_token = edge_token.child_token();
@@ -458,6 +460,9 @@ async fn handle_hub_frame(
// with per-stream flow control (check send_window before reading). // with per-stream flow control (check send_window before reading).
// Zero-copy: read payload directly after the header, then prepend header. // Zero-copy: read payload directly after the header, then prepend header.
let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768]; let mut buf = vec![0u8; FRAME_HEADER_SIZE + 32768];
let mut dl_bytes_sent: u64 = 0;
let dl_start = tokio::time::Instant::now();
let mut is_sustained = false;
loop { loop {
// Wait for send window to have capacity (with stall timeout). // Wait for send window to have capacity (with stall timeout).
// Safe pattern: register notified BEFORE checking the condition // Safe pattern: register notified BEFORE checking the condition
@@ -498,8 +503,21 @@ async fn handle_hub_frame(
send_window.fetch_sub(n as u32, Ordering::Release); send_window.fetch_sub(n as u32, Ordering::Release);
encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n); encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
let frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]); let frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
// Sustained classification: >2.5 MB/s for >10 seconds
dl_bytes_sent += n as u64;
if !is_sustained {
let elapsed = dl_start.elapsed().as_secs();
if elapsed >= remoteingress_protocol::SUSTAINED_MIN_DURATION_SECS
&& dl_bytes_sent / elapsed >= remoteingress_protocol::SUSTAINED_THRESHOLD_BPS
{
is_sustained = true;
log::debug!("Stream {} classified as sustained (download, {} bytes in {}s)",
stream_id, dl_bytes_sent, elapsed);
}
}
let tx = if is_sustained { &sustained_writer_tx } else { &data_writer_tx };
let sent = tokio::select! { let sent = tokio::select! {
result = data_writer_tx.send(frame) => result.is_ok(), result = tx.send(frame) => result.is_ok(),
_ = stream_token.cancelled() => false, _ = stream_token.cancelled() => false,
}; };
if !sent { break; } if !sent { break; }
@@ -511,12 +529,13 @@ async fn handle_hub_frame(
} }
} }
// Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK). // Send CLOSE_BACK via same channel as DATA_BACK (must arrive AFTER last DATA_BACK).
// select! with cancellation guard prevents indefinite blocking if tunnel dies. // select! with cancellation guard prevents indefinite blocking if tunnel dies.
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let tx = if is_sustained { &sustained_writer_tx } else { &data_writer_tx };
tokio::select! { tokio::select! {
_ = data_writer_tx.send(close_frame) => {} _ = tx.send(close_frame) => {}
_ = stream_token.cancelled() => {} _ = stream_token.cancelled() => {}
} }
} }
@@ -528,7 +547,9 @@ async fn handle_hub_frame(
if let Err(e) = result { if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e); log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK). // Send CLOSE_BACK on error (must arrive after any DATA_BACK).
// Error path: is_sustained not available here, use data channel (safe —
// if error occurs before classification, no sustained frames were sent).
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
tokio::select! { tokio::select! {
@@ -710,6 +731,7 @@ async fn handle_edge_connection(
// Stream handlers send through these channels -> TunnelIo drains them. // Stream handlers send through these channels -> TunnelIo drains them.
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Bytes>(512); let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Bytes>(512);
let (data_tx, mut data_rx) = mpsc::channel::<Bytes>(4096); let (data_tx, mut data_rx) = mpsc::channel::<Bytes>(4096);
let (sustained_tx, mut sustained_rx) = mpsc::channel::<Bytes>(4096);
// Spawn task to forward config updates as FRAME_CONFIG frames // Spawn task to forward config updates as FRAME_CONFIG frames
let config_writer_tx = ctrl_tx.clone(); let config_writer_tx = ctrl_tx.clone();
@@ -783,7 +805,7 @@ async fn handle_edge_connection(
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let FrameAction::Disconnect(reason) = handle_hub_frame( if let FrameAction::Disconnect(reason) = handle_hub_frame(
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count, frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token, &edge_id, &event_tx, &ctrl_tx, &data_tx, &sustained_tx, &target_host, &edge_token,
&cleanup_tx, &cleanup_tx,
).await { ).await {
disconnect_reason = reason; disconnect_reason = reason;
@@ -797,7 +819,7 @@ async fn handle_edge_connection(
if ping_ticker.poll_tick(cx).is_ready() { if ping_ticker.poll_tick(cx).is_ready() {
tunnel_io.queue_ctrl(encode_frame(0, FRAME_PING, &[])); tunnel_io.queue_ctrl(encode_frame(0, FRAME_PING, &[]));
} }
tunnel_io.poll_step(cx, &mut ctrl_rx, &mut data_rx, &mut liveness_deadline, &edge_token) tunnel_io.poll_step(cx, &mut ctrl_rx, &mut data_rx, &mut sustained_rx, &mut liveness_deadline, &edge_token)
}).await; }).await;
match event { match event {
@@ -806,7 +828,7 @@ async fn handle_edge_connection(
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur); liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
if let FrameAction::Disconnect(reason) = handle_hub_frame( if let FrameAction::Disconnect(reason) = handle_hub_frame(
frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count, frame, &mut tunnel_io, &mut streams, &stream_semaphore, &edge_stream_count,
&edge_id, &event_tx, &ctrl_tx, &data_tx, &target_host, &edge_token, &edge_id, &event_tx, &ctrl_tx, &data_tx, &sustained_tx, &target_host, &edge_token,
&cleanup_tx, &cleanup_tx,
).await { ).await {
disconnect_reason = reason; disconnect_reason = reason;

View File

@@ -2,8 +2,10 @@ use std::collections::VecDeque;
use std::future::Future; use std::future::Future;
use std::pin::Pin; use std::pin::Pin;
use std::task::{Context, Poll}; use std::task::{Context, Poll};
use std::time::Duration;
use bytes::{Bytes, BytesMut, BufMut}; use bytes::{Bytes, BytesMut, BufMut};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
use tokio::time::Instant;
// Frame type constants // Frame type constants
pub const FRAME_OPEN: u8 = 0x01; pub const FRAME_OPEN: u8 = 0x01;
@@ -31,6 +33,16 @@ pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
/// Maximum window size to prevent overflow. /// Maximum window size to prevent overflow.
pub const MAX_WINDOW_SIZE: u32 = 4 * 1024 * 1024; pub const MAX_WINDOW_SIZE: u32 = 4 * 1024 * 1024;
// Sustained stream classification constants
/// Throughput threshold for sustained classification (2.5 MB/s = 20 Mbit/s).
pub const SUSTAINED_THRESHOLD_BPS: u64 = 2_500_000;
/// Minimum duration before a stream can be classified as sustained.
pub const SUSTAINED_MIN_DURATION_SECS: u64 = 10;
/// Fixed window for sustained streams (1 MB — the floor).
pub const SUSTAINED_WINDOW: u32 = 1 * 1024 * 1024;
/// Maximum bytes written from sustained queue per forced drain (1 MB/s guarantee).
pub const SUSTAINED_FORCED_DRAIN_CAP: usize = 1_048_576;
/// Encode a WINDOW_UPDATE frame for a specific stream. /// Encode a WINDOW_UPDATE frame for a specific stream.
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Bytes { pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Bytes {
encode_frame(stream_id, frame_type, &increment.to_be_bytes()) encode_frame(stream_id, frame_type, &increment.to_be_bytes())
@@ -185,24 +197,30 @@ pub enum TunnelEvent {
/// Write state extracted into a sub-struct so the borrow checker can see /// Write state extracted into a sub-struct so the borrow checker can see
/// disjoint field access between `self.write` and `self.stream`. /// disjoint field access between `self.write` and `self.stream`.
struct WriteState { struct WriteState {
ctrl_queue: VecDeque<Bytes>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first ctrl_queue: VecDeque<Bytes>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
data_queue: VecDeque<Bytes>, // DATA, DATA_BACK — only when ctrl is empty data_queue: VecDeque<Bytes>, // DATA, DATA_BACK — only when ctrl is empty
offset: usize, // progress within current frame being written sustained_queue: VecDeque<Bytes>, // DATA, DATA_BACK from sustained streams — lowest priority
offset: usize, // progress within current frame being written
flush_needed: bool, flush_needed: bool,
// Sustained starvation prevention: guaranteed 1 MB/s drain
sustained_last_drain: Instant,
sustained_bytes_this_period: usize,
} }
impl WriteState { impl WriteState {
fn has_work(&self) -> bool { fn has_work(&self) -> bool {
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty() !self.ctrl_queue.is_empty() || !self.data_queue.is_empty() || !self.sustained_queue.is_empty()
} }
} }
/// Single-owner I/O engine for the tunnel TLS connection. /// Single-owner I/O engine for the tunnel TLS connection.
/// ///
/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex. /// Owns the TLS stream directly — no `tokio::io::split()`, no mutex.
/// Uses two priority write queues: ctrl frames (PONG, WINDOW_UPDATE, CLOSE, OPEN) /// Uses three priority write queues:
/// are ALWAYS written before data frames (DATA, DATA_BACK). This prevents /// 1. ctrl (PONG, WINDOW_UPDATE, CLOSE, OPEN) — always first
/// WINDOW_UPDATE starvation that causes flow control deadlocks. /// 2. data (DATA, DATA_BACK from normal streams) — when ctrl empty
/// 3. sustained (DATA, DATA_BACK from sustained streams) — lowest priority,
/// drained freely when ctrl+data empty, or forced 1MB/s when they're not
pub struct TunnelIo<S> { pub struct TunnelIo<S> {
stream: S, stream: S,
// Read state: accumulate bytes, parse frames incrementally // Read state: accumulate bytes, parse frames incrementally
@@ -228,8 +246,11 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
write: WriteState { write: WriteState {
ctrl_queue: VecDeque::new(), ctrl_queue: VecDeque::new(),
data_queue: VecDeque::new(), data_queue: VecDeque::new(),
sustained_queue: VecDeque::new(),
offset: 0, offset: 0,
flush_needed: false, flush_needed: false,
sustained_last_drain: Instant::now(),
sustained_bytes_this_period: 0,
}, },
} }
} }
@@ -244,6 +265,11 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
self.write.data_queue.push_back(frame); self.write.data_queue.push_back(frame);
} }
/// Queue a lowest-priority sustained data frame.
pub fn queue_sustained(&mut self, frame: Bytes) {
self.write.sustained_queue.push_back(frame);
}
/// Try to parse a complete frame from the read buffer. /// Try to parse a complete frame from the read buffer.
/// Uses a parse_pos cursor to avoid drain() on every frame. /// Uses a parse_pos cursor to avoid drain() on every frame.
pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> { pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> {
@@ -303,33 +329,42 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
/// Poll-based I/O step. Returns Ready on events, Pending when idle. /// Poll-based I/O step. Returns Ready on events, Pending when idle.
/// ///
/// Order: write(ctrl->data) -> flush -> read -> channels -> timers /// Order: write(ctrl->data->sustained) -> flush -> read -> channels -> timers
pub fn poll_step( pub fn poll_step(
&mut self, &mut self,
cx: &mut Context<'_>, cx: &mut Context<'_>,
ctrl_rx: &mut tokio::sync::mpsc::Receiver<Bytes>, ctrl_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
data_rx: &mut tokio::sync::mpsc::Receiver<Bytes>, data_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
sustained_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>, liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
cancel_token: &tokio_util::sync::CancellationToken, cancel_token: &tokio_util::sync::CancellationToken,
) -> Poll<TunnelEvent> { ) -> Poll<TunnelEvent> {
// 1. WRITE: drain ctrl queue first, then data queue. // 1. WRITE: 3-tier priority — ctrl first, then data, then sustained.
// Sustained drains freely when ctrl+data are empty.
// Write one frame, set flush_needed, then flush must complete before // Write one frame, set flush_needed, then flush must complete before
// writing more. This prevents unbounded TLS session buffer growth. // writing more. This prevents unbounded TLS session buffer growth.
// Safe: `self.write` and `self.stream` are disjoint fields. // Safe: `self.write` and `self.stream` are disjoint fields.
let mut writes = 0; let mut writes = 0;
while self.write.has_work() && writes < 16 && !self.write.flush_needed { while self.write.has_work() && writes < 16 && !self.write.flush_needed {
let from_ctrl = !self.write.ctrl_queue.is_empty(); // Pick queue: ctrl > data > sustained
let frame = if from_ctrl { let queue_id = if !self.write.ctrl_queue.is_empty() {
self.write.ctrl_queue.front().unwrap() 0 // ctrl
} else if !self.write.data_queue.is_empty() {
1 // data
} else { } else {
self.write.data_queue.front().unwrap() 2 // sustained
};
let frame = match queue_id {
0 => self.write.ctrl_queue.front().unwrap(),
1 => self.write.data_queue.front().unwrap(),
_ => self.write.sustained_queue.front().unwrap(),
}; };
let remaining = &frame[self.write.offset..]; let remaining = &frame[self.write.offset..];
match Pin::new(&mut self.stream).poll_write(cx, remaining) { match Pin::new(&mut self.stream).poll_write(cx, remaining) {
Poll::Ready(Ok(0)) => { Poll::Ready(Ok(0)) => {
log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={}", log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={} sustained_q={}",
self.write.ctrl_queue.len(), self.write.data_queue.len()); self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
return Poll::Ready(TunnelEvent::WriteError( return Poll::Ready(TunnelEvent::WriteError(
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"), std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
)); ));
@@ -338,21 +373,70 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
self.write.offset += n; self.write.offset += n;
self.write.flush_needed = true; self.write.flush_needed = true;
if self.write.offset >= frame.len() { if self.write.offset >= frame.len() {
if from_ctrl { self.write.ctrl_queue.pop_front(); } match queue_id {
else { self.write.data_queue.pop_front(); } 0 => { self.write.ctrl_queue.pop_front(); }
1 => { self.write.data_queue.pop_front(); }
_ => {
self.write.sustained_queue.pop_front();
self.write.sustained_last_drain = Instant::now();
self.write.sustained_bytes_this_period = 0;
}
}
self.write.offset = 0; self.write.offset = 0;
writes += 1; writes += 1;
} }
} }
Poll::Ready(Err(e)) => { Poll::Ready(Err(e)) => {
log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={})", log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={} sustained_q={})",
e, self.write.ctrl_queue.len(), self.write.data_queue.len()); e, self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
return Poll::Ready(TunnelEvent::WriteError(e)); return Poll::Ready(TunnelEvent::WriteError(e));
} }
Poll::Pending => break, Poll::Pending => break,
} }
} }
// 1b. FORCED SUSTAINED DRAIN: when ctrl/data have work but sustained is waiting,
// guarantee at least 1 MB/s by draining up to SUSTAINED_FORCED_DRAIN_CAP
// once per second.
if !self.write.sustained_queue.is_empty()
&& (!self.write.ctrl_queue.is_empty() || !self.write.data_queue.is_empty())
&& !self.write.flush_needed
{
let now = Instant::now();
if now.duration_since(self.write.sustained_last_drain) >= Duration::from_secs(1) {
self.write.sustained_bytes_this_period = 0;
self.write.sustained_last_drain = now;
while !self.write.sustained_queue.is_empty()
&& self.write.sustained_bytes_this_period < SUSTAINED_FORCED_DRAIN_CAP
&& !self.write.flush_needed
{
let frame = self.write.sustained_queue.front().unwrap();
let remaining = &frame[self.write.offset..];
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
Poll::Ready(Ok(0)) => {
return Poll::Ready(TunnelEvent::WriteError(
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
));
}
Poll::Ready(Ok(n)) => {
self.write.offset += n;
self.write.flush_needed = true;
self.write.sustained_bytes_this_period += n;
if self.write.offset >= frame.len() {
self.write.sustained_queue.pop_front();
self.write.offset = 0;
}
}
Poll::Ready(Err(e)) => {
return Poll::Ready(TunnelEvent::WriteError(e));
}
Poll::Pending => break,
}
}
}
}
// 2. FLUSH: push encrypted data from TLS session to TCP. // 2. FLUSH: push encrypted data from TLS session to TCP.
if self.write.flush_needed { if self.write.flush_needed {
match Pin::new(&mut self.stream).poll_flush(cx) { match Pin::new(&mut self.stream).poll_flush(cx) {
@@ -436,6 +520,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
} }
} }
} }
// Sustained channel: drain when sustained_queue is small (same backpressure pattern).
// Channel close is non-fatal — not all connections have sustained streams.
if self.write.sustained_queue.len() < 64 {
loop {
match sustained_rx.poll_recv(cx) {
Poll::Ready(Some(frame)) => { self.write.sustained_queue.push_back(frame); got_new = true; }
Poll::Ready(None) | Poll::Pending => break,
}
}
}
// 5. TIMERS // 5. TIMERS
if liveness_deadline.as_mut().poll(cx).is_ready() { if liveness_deadline.as_mut().poll(cx).is_ready() {

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@serve.zone/remoteingress', name: '@serve.zone/remoteingress',
version: '4.8.19', version: '4.9.1',
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.' description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
} }