v4.8.18

fix(rust-protocol): switch tunnel frame buffers from Vec<u8> to Bytes to reduce copying and memory overhead
v4.8.17
2026-03-17 23:29:02 +00:00 · 2026-03-17 23:29:02 +00:00 · 2026-03-17 22:46:55 +00:00 · 2026-03-17 22:46:55 +00:00 · 2026-03-17 19:13:30 +00:00 · 2026-03-17 19:13:30 +00:00
10 changed files with 192 additions and 170 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,31 @@
 # Changelog

+## 2026-03-17 - 4.8.18 - fix(rust-protocol)
+switch tunnel frame buffers from Vec<u8> to Bytes to reduce copying and memory overhead
+
+- Add the bytes crate to core and protocol crates
+- Update frame encoding, reader payloads, channel queues, and stream backchannels to use Bytes
+- Adjust edge and hub data/control paths to send framed payloads as Bytes
+
+## 2026-03-17 - 4.8.17 - fix(protocol)
+increase per-stream flow control windows and remove adaptive read caps
+
+- Raise the initial per-stream window from 4MB to 16MB and expand the adaptive window budget to 800MB with a 4MB floor
+- Stop limiting edge and hub reads by the adaptive per-stream target window, keeping reads capped only by the current window and 32KB chunk size
+- Update protocol tests to match the new adaptive window scaling and budget boundaries
+
+## 2026-03-17 - 4.8.16 - fix(release)
+bump package version to 4.8.15
+
+- Updates the package.json version field from 4.8.13 to 4.8.15.
+
+## 2026-03-17 - 4.8.13 - fix(remoteingress-protocol)
+require a flush after each written frame to bound TLS buffer growth
+
+- Remove the unflushed byte threshold and stop queueing additional writes while a flush is pending
+- Simplify write and flush error logging after dropping unflushed byte tracking
+- Update tunnel I/O comments to reflect the stricter flush behavior that avoids OOM and connection resets
+
 ## 2026-03-17 - 4.8.12 - fix(tunnel)
 prevent tunnel backpressure buffering from exhausting memory and cancel stream handlers before TLS shutdown

--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@serve.zone/remoteingress",
-  "version": "4.8.12",
+  "version": "4.8.18",
  "private": false,
  "description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
  "main": "dist_ts/index.js",
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -551,6 +551,7 @@ dependencies = [
 name = "remoteingress-core"
 version = "2.0.0"
 dependencies = [
+ "bytes",
 "log",
 "rcgen",
 "remoteingress-protocol",
@@ -568,6 +569,7 @@ dependencies = [
 name = "remoteingress-protocol"
 version = "2.0.0"
 dependencies = [
+ "bytes",
 "log",
 "tokio",
 "tokio-util",
--- a/rust/crates/remoteingress-core/Cargo.toml
+++ b/rust/crates/remoteingress-core/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 remoteingress-protocol = { path = "../remoteingress-protocol" }
 tokio = { version = "1", features = ["full"] }
 tokio-rustls = "0.26"
+bytes = "1"
 rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
 rcgen = "0.13"
 serde = { version = "1", features = ["derive"] }
--- a/rust/crates/remoteingress-core/src/edge.rs
+++ b/rust/crates/remoteingress-core/src/edge.rs
@@ -11,6 +11,7 @@ use tokio_rustls::TlsConnector;
 use tokio_util::sync::CancellationToken;
 use serde::{Deserialize, Serialize};

+use bytes::Bytes;
 use remoteingress_protocol::*;

 type EdgeTlsStream = tokio_rustls::client::TlsStream<TcpStream>;
@@ -26,7 +27,7 @@ enum EdgeFrameAction {
 struct EdgeStreamState {
    /// Unbounded channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
    /// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
-    back_tx: mpsc::UnboundedSender<Vec<u8>>,
+    back_tx: mpsc::UnboundedSender<Bytes>,
    /// Send window for FRAME_DATA (upload direction).
    /// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
    send_window: Arc<AtomicU32>,
@@ -290,8 +291,8 @@ async fn handle_edge_frame(
    client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
    listen_ports: &Arc<RwLock<Vec<u16>>>,
    event_tx: &mpsc::Sender<EdgeEvent>,
-    tunnel_writer_tx: &mpsc::Sender<Vec<u8>>,
-    tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
+    tunnel_writer_tx: &mpsc::Sender<Bytes>,
+    tunnel_data_tx: &mpsc::Sender<Bytes>,
    port_listeners: &mut HashMap<u16, JoinHandle<()>>,
    active_streams: &Arc<AtomicU32>,
    next_stream_id: &Arc<AtomicU32>,
@@ -496,8 +497,8 @@ async fn connect_to_hub_and_run(

    // QoS dual-channel: ctrl frames have priority over data frames.
    // Stream handlers send through these channels → TunnelIo drains them.
-    let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
-    let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
+    let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Bytes>(256);
+    let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Bytes>(4096);
    let tunnel_writer_tx = tunnel_ctrl_tx.clone();

    // Start TCP listeners for initial ports
@@ -519,6 +520,7 @@ async fn connect_to_hub_and_run(
    // Single-owner I/O engine — no tokio::io::split, no mutex
    let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());

+
    let liveness_timeout_dur = Duration::from_secs(45);
    let mut last_activity = Instant::now();
    let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
@@ -611,8 +613,8 @@ async fn connect_to_hub_and_run(
 fn apply_port_config(
    new_ports: &[u16],
    port_listeners: &mut HashMap<u16, JoinHandle<()>>,
-    tunnel_ctrl_tx: &mpsc::Sender<Vec<u8>>,
-    tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
+    tunnel_ctrl_tx: &mpsc::Sender<Bytes>,
+    tunnel_data_tx: &mpsc::Sender<Bytes>,
    client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
    active_streams: &Arc<AtomicU32>,
    next_stream_id: &Arc<AtomicU32>,
@@ -726,8 +728,8 @@ async fn handle_client_connection(
    stream_id: u32,
    dest_port: u16,
    edge_id: &str,
-    tunnel_ctrl_tx: mpsc::Sender<Vec<u8>>,
-    tunnel_data_tx: mpsc::Sender<Vec<u8>>,
+    tunnel_ctrl_tx: mpsc::Sender<Bytes>,
+    tunnel_data_tx: mpsc::Sender<Bytes>,
    client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
    client_token: CancellationToken,
    active_streams: Arc<AtomicU32>,
@@ -752,7 +754,7 @@ async fn handle_client_connection(
    // Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
    // bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
    // streams due to channel overflow — backpressure slows streams, never kills them.
-    let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Vec<u8>>();
+    let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Bytes>();
    // Adaptive initial window: scale with current stream count to keep total in-flight
    // data within the 32MB budget. Prevents burst flooding when many streams open.
    let initial_window = remoteingress_protocol::compute_window_for_stream_count(
@@ -861,11 +863,7 @@ async fn handle_client_connection(
            log::warn!("Stream {} upload: window still 0 after stall timeout, closing", stream_id);
            break;
        }
-        // Adaptive: cap read to current per-stream target window
-        let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
-            active_streams.load(Ordering::Relaxed),
-        ) as usize;
-        let max_read = w.min(32768).min(adaptive_cap);
+        let max_read = w.min(32768);

        tokio::select! {
            read_result = client_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
@@ -874,7 +872,7 @@ async fn handle_client_connection(
                    Ok(n) => {
                        send_window.fetch_sub(n as u32, Ordering::Release);
                        encode_frame_header(&mut buf, stream_id, FRAME_DATA, n);
-                        let data_frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
+                        let data_frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
                        let sent = tokio::select! {
                            result = tunnel_data_tx.send(data_frame) => result.is_ok(),
                            _ = client_token.cancelled() => false,
--- a/rust/crates/remoteingress-core/src/hub.rs
+++ b/rust/crates/remoteingress-core/src/hub.rs
@@ -10,6 +10,7 @@ use tokio_rustls::TlsAcceptor;
 use tokio_util::sync::CancellationToken;
 use serde::{Deserialize, Serialize};

+use bytes::Bytes;
 use remoteingress_protocol::*;

 type HubTlsStream = tokio_rustls::server::TlsStream<TcpStream>;
@@ -26,7 +27,7 @@ struct HubStreamState {
    /// Unbounded channel to deliver FRAME_DATA payloads to the upstream writer task.
    /// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
    /// A bounded channel would kill streams instead of applying backpressure.
-    data_tx: mpsc::UnboundedSender<Vec<u8>>,
+    data_tx: mpsc::UnboundedSender<Bytes>,
    /// Cancellation token for this stream.
    cancel_token: CancellationToken,
    /// Send window for FRAME_DATA_BACK (download direction).
@@ -307,8 +308,8 @@ async fn handle_hub_frame(
    edge_stream_count: &Arc<AtomicU32>,
    edge_id: &str,
    event_tx: &mpsc::Sender<HubEvent>,
-    ctrl_tx: &mpsc::Sender<Vec<u8>>,
-    data_tx: &mpsc::Sender<Vec<u8>>,
+    ctrl_tx: &mpsc::Sender<Bytes>,
+    data_tx: &mpsc::Sender<Bytes>,
    target_host: &str,
    edge_token: &CancellationToken,
    cleanup_tx: &mpsc::Sender<u32>,
@@ -346,7 +347,7 @@ async fn handle_hub_frame(
            });

            // Create channel for data from edge to this stream
-            let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Vec<u8>>();
+            let (stream_data_tx, mut stream_data_rx) = mpsc::unbounded_channel::<Bytes>();
            // Adaptive initial window: scale with current stream count
            // to keep total in-flight data within the 32MB budget.
            let initial_window = compute_window_for_stream_count(
@@ -487,11 +488,7 @@ async fn handle_hub_frame(
                            log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
                            break;
                        }
-                        // Adaptive: cap read to current per-stream target window
-                        let adaptive_cap = remoteingress_protocol::compute_window_for_stream_count(
-                            stream_counter.load(Ordering::Relaxed),
-                        ) as usize;
-                        let max_read = w.min(32768).min(adaptive_cap);
+                        let max_read = w.min(32768);

                        tokio::select! {
                            read_result = up_read.read(&mut buf[FRAME_HEADER_SIZE..FRAME_HEADER_SIZE + max_read]) => {
@@ -500,7 +497,7 @@ async fn handle_hub_frame(
                                    Ok(n) => {
                                        send_window.fetch_sub(n as u32, Ordering::Release);
                                        encode_frame_header(&mut buf, stream_id, FRAME_DATA_BACK, n);
-                                        let frame = buf[..FRAME_HEADER_SIZE + n].to_vec();
+                                        let frame = Bytes::copy_from_slice(&buf[..FRAME_HEADER_SIZE + n]);
                                        let sent = tokio::select! {
                                            result = data_writer_tx.send(frame) => result.is_ok(),
                                            _ = stream_token.cancelled() => false,
@@ -711,8 +708,8 @@ async fn handle_edge_connection(

    // QoS dual-channel: ctrl frames have priority over data frames.
    // Stream handlers send through these channels -> TunnelIo drains them.
-    let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(256);
-    let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
+    let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Bytes>(256);
+    let (data_tx, mut data_rx) = mpsc::channel::<Bytes>(4096);

    // Spawn task to forward config updates as FRAME_CONFIG frames
    let config_writer_tx = ctrl_tx.clone();
@@ -755,6 +752,7 @@ async fn handle_edge_connection(
    // Single-owner I/O engine — no tokio::io::split, no mutex
    let mut tunnel_io = remoteingress_protocol::TunnelIo::new(tls_stream, Vec::new());

+
    // Assigned in every break path of the hub_loop before use at the end.
    #[allow(unused_assignments)]
    let mut disconnect_reason = String::new();
--- a/rust/crates/remoteingress-protocol/Cargo.toml
+++ b/rust/crates/remoteingress-protocol/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 tokio = { version = "1", features = ["io-util", "sync", "time"] }
 tokio-util = "0.7"
+bytes = "1"
 log = "0.4"

 [dev-dependencies]
--- a/rust/crates/remoteingress-protocol/src/lib.rs
+++ b/rust/crates/remoteingress-protocol/src/lib.rs
@@ -2,6 +2,7 @@ use std::collections::VecDeque;
 use std::future::Future;
 use std::pin::Pin;
 use std::task::{Context, Poll};
+use bytes::{Bytes, BytesMut, BufMut};
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};

 // Frame type constants
@@ -23,26 +24,24 @@ pub const FRAME_HEADER_SIZE: usize = 9;
 pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024;

 // Per-stream flow control constants
-/// Initial per-stream window size (4 MB). Sized for full throughput at high RTT:
-/// at 100ms RTT, this sustains ~40 MB/s per stream.
-pub const INITIAL_STREAM_WINDOW: u32 = 4 * 1024 * 1024;
+/// Initial (and maximum) per-stream window size (16 MB).
+pub const INITIAL_STREAM_WINDOW: u32 = 16 * 1024 * 1024;
 /// Send WINDOW_UPDATE after consuming this many bytes (half the initial window).
 pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
 /// Maximum window size to prevent overflow.
 pub const MAX_WINDOW_SIZE: u32 = 16 * 1024 * 1024;

 /// Encode a WINDOW_UPDATE frame for a specific stream.
-pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Vec<u8> {
+pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Bytes {
    encode_frame(stream_id, frame_type, &increment.to_be_bytes())
 }

 /// Compute the target per-stream window size based on the number of active streams.
-/// Total memory budget is ~32MB shared across all streams. As more streams are active,
-/// each gets a smaller window. This adapts to current demand — few streams get high
-/// throughput, many streams save memory and reduce control frame pressure.
+/// Total memory budget is ~800MB shared across all streams. Up to 50 streams get the
+/// full 16MB window; above that the window scales down to a 4MB floor at 200+ streams.
 pub fn compute_window_for_stream_count(active: u32) -> u32 {
-    let per_stream = (32 * 1024 * 1024u64) / (active.max(1) as u64);
-    per_stream.clamp(64 * 1024, INITIAL_STREAM_WINDOW as u64) as u32
+    let per_stream = (800 * 1024 * 1024u64) / (active.max(1) as u64);
+    per_stream.clamp(4 * 1024 * 1024, INITIAL_STREAM_WINDOW as u64) as u32
 }

 /// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
@@ -58,18 +57,18 @@ pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
 pub struct Frame {
    pub stream_id: u32,
    pub frame_type: u8,
-    pub payload: Vec<u8>,
+    pub payload: Bytes,
 }

 /// Encode a frame into bytes: [stream_id:4][type:1][length:4][payload]
-pub fn encode_frame(stream_id: u32, frame_type: u8, payload: &[u8]) -> Vec<u8> {
+pub fn encode_frame(stream_id: u32, frame_type: u8, payload: &[u8]) -> Bytes {
    let len = payload.len() as u32;
-    let mut buf = Vec::with_capacity(FRAME_HEADER_SIZE + payload.len());
-    buf.extend_from_slice(&stream_id.to_be_bytes());
-    buf.push(frame_type);
-    buf.extend_from_slice(&len.to_be_bytes());
-    buf.extend_from_slice(payload);
-    buf
+    let mut buf = BytesMut::with_capacity(FRAME_HEADER_SIZE + payload.len());
+    buf.put_slice(&stream_id.to_be_bytes());
+    buf.put_u8(frame_type);
+    buf.put_slice(&len.to_be_bytes());
+    buf.put_slice(payload);
+    buf.freeze()
 }

 /// Write a frame header into `buf[0..FRAME_HEADER_SIZE]`.
@@ -144,7 +143,7 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
            ));
        }

-        let mut payload = vec![0u8; length as usize];
+        let mut payload = BytesMut::zeroed(length as usize);
        if length > 0 {
            self.reader.read_exact(&mut payload).await?;
        }
@@ -152,7 +151,7 @@ impl<R: AsyncRead + Unpin> FrameReader<R> {
        Ok(Some(Frame {
            stream_id,
            frame_type,
-            payload,
+            payload: payload.freeze(),
        }))
    }

@@ -186,8 +185,8 @@ pub enum TunnelEvent {
 /// Write state extracted into a sub-struct so the borrow checker can see
 /// disjoint field access between `self.write` and `self.stream`.
 struct WriteState {
-    ctrl_queue: VecDeque<Vec<u8>>,   // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
-    data_queue: VecDeque<Vec<u8>>,   // DATA, DATA_BACK — only when ctrl is empty
+    ctrl_queue: VecDeque<Bytes>,   // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
+    data_queue: VecDeque<Bytes>,   // DATA, DATA_BACK — only when ctrl is empty
    offset: usize,                   // progress within current frame being written
    flush_needed: bool,
 }
@@ -236,12 +235,12 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
    }

    /// Queue a high-priority control frame (PONG, WINDOW_UPDATE, CLOSE, OPEN).
-    pub fn queue_ctrl(&mut self, frame: Vec<u8>) {
+    pub fn queue_ctrl(&mut self, frame: Bytes) {
        self.write.ctrl_queue.push_back(frame);
    }

    /// Queue a lower-priority data frame (DATA, DATA_BACK).
-    pub fn queue_data(&mut self, frame: Vec<u8>) {
+    pub fn queue_data(&mut self, frame: Bytes) {
        self.write.data_queue.push_back(frame);
    }

@@ -287,7 +286,9 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
            return None;
        }

-        let payload = self.read_buf[base + FRAME_HEADER_SIZE..base + total_frame_size].to_vec();
+        let payload = Bytes::copy_from_slice(
+            &self.read_buf[base + FRAME_HEADER_SIZE..base + total_frame_size],
+        );
        self.parse_pos += total_frame_size;

        // Compact when parse_pos > half the data to reclaim memory
@@ -302,19 +303,18 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {

    /// Poll-based I/O step. Returns Ready on events, Pending when idle.
    ///
-    /// Order: write(ctrl→data) → flush → read → channels → timers
+    /// Order: write(ctrl->data) -> flush -> read -> channels -> timers
    pub fn poll_step(
        &mut self,
        cx: &mut Context<'_>,
-        ctrl_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
-        data_rx: &mut tokio::sync::mpsc::Receiver<Vec<u8>>,
+        ctrl_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
+        data_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
        liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
        cancel_token: &tokio_util::sync::CancellationToken,
    ) -> Poll<TunnelEvent> {
        // 1. WRITE: drain ctrl queue first, then data queue.
-        //    Only write when flush is complete — otherwise the TLS session buffer
-        //    grows without bound (poll_write always returns Ready, buffering plaintext
-        //    in the TLS session even when TCP can't keep up).
+        //    Write one frame, set flush_needed, then flush must complete before
+        //    writing more. This prevents unbounded TLS session buffer growth.
        //    Safe: `self.write` and `self.stream` are disjoint fields.
        let mut writes = 0;
        while self.write.has_work() && writes < 16 && !self.write.flush_needed {
@@ -328,6 +328,8 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {

            match Pin::new(&mut self.stream).poll_write(cx, remaining) {
                Poll::Ready(Ok(0)) => {
+                    log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={}",
+                        self.write.ctrl_queue.len(), self.write.data_queue.len());
                    return Poll::Ready(TunnelEvent::WriteError(
                        std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
                    ));
@@ -342,7 +344,11 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
                        writes += 1;
                    }
                }
-                Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
+                Poll::Ready(Err(e)) => {
+                    log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={})",
+                        e, self.write.ctrl_queue.len(), self.write.data_queue.len());
+                    return Poll::Ready(TunnelEvent::WriteError(e));
+                }
                Poll::Pending => break,
            }
        }
@@ -350,8 +356,13 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
        // 2. FLUSH: push encrypted data from TLS session to TCP.
        if self.write.flush_needed {
            match Pin::new(&mut self.stream).poll_flush(cx) {
-                Poll::Ready(Ok(())) => self.write.flush_needed = false,
-                Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::WriteError(e)),
+                Poll::Ready(Ok(())) => {
+                    self.write.flush_needed = false;
+                }
+                Poll::Ready(Err(e)) => {
+                    log::error!("TunnelIo: poll_flush error: {}", e);
+                    return Poll::Ready(TunnelEvent::WriteError(e));
+                }
                Poll::Pending => {} // TCP waker will notify us
            }
        }
@@ -387,12 +398,19 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
                    // Partial data — loop to call poll_read again so the TCP
                    // waker is re-registered when it finally returns Pending.
                }
-                Poll::Ready(Err(e)) => return Poll::Ready(TunnelEvent::ReadError(e)),
+                Poll::Ready(Err(e)) => {
+                    log::error!("TunnelIo: poll_read error: {}", e);
+                    return Poll::Ready(TunnelEvent::ReadError(e));
+                }
                Poll::Pending => break,
            }
        }

-        // 4. CHANNELS: drain ctrl into ctrl_queue, data into data_queue.
+        // 4. CHANNELS: drain ctrl (always — priority), data (only if queue is small).
+        //    Ctrl frames must never be delayed — always drain fully.
+        //    Data frames are gated: keep data in the bounded channel for proper
+        //    backpressure when TLS writes are slow. Without this gate, the internal
+        //    data_queue (unbounded VecDeque) grows to hundreds of MB under throttle -> OOM.
        let mut got_new = false;
        loop {
            match ctrl_rx.poll_recv(cx) {
@@ -405,15 +423,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
                Poll::Pending => break,
            }
        }
-        loop {
-            match data_rx.poll_recv(cx) {
-                Poll::Ready(Some(frame)) => { self.write.data_queue.push_back(frame); got_new = true; }
-                Poll::Ready(None) => {
-                    return Poll::Ready(TunnelEvent::WriteError(
-                        std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"),
-                    ));
+        if self.write.data_queue.len() < 64 {
+            loop {
+                match data_rx.poll_recv(cx) {
+                    Poll::Ready(Some(frame)) => { self.write.data_queue.push_back(frame); got_new = true; }
+                    Poll::Ready(None) => {
+                        return Poll::Ready(TunnelEvent::WriteError(
+                            std::io::Error::new(std::io::ErrorKind::BrokenPipe, "data channel closed"),
+                        ));
+                    }
+                    Poll::Pending => break,
                }
-                Poll::Pending => break,
            }
        }

@@ -426,10 +446,10 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
        }

        // 6. SELF-WAKE: only when flush is complete AND we have work.
-        //    If flush is pending, the TCP write-readiness waker will notify us.
-        //    CRITICAL: do NOT self-wake when flush_needed — this causes unbounded
-        //    TLS session buffer growth (poll_write always accepts plaintext, but TCP
-        //    can't drain it fast enough → OOM → process killed → ECONNRESET).
+        //    When flush is Pending, the TCP write-readiness waker will notify us.
+        //    CRITICAL: do NOT self-wake when flush_needed — poll_write always returns
+        //    Ready (TLS buffers in-memory), so self-waking causes a tight spin loop
+        //    that fills the TLS session buffer unboundedly -> OOM -> ECONNRESET.
        if !self.write.flush_needed && (got_new || self.write.has_work()) {
            cx.waker().wake_by_ref();
        }
@@ -452,14 +472,14 @@ mod tests {
        let mut buf = vec![0u8; FRAME_HEADER_SIZE + payload.len()];
        buf[FRAME_HEADER_SIZE..].copy_from_slice(payload);
        encode_frame_header(&mut buf, 42, FRAME_DATA, payload.len());
-        assert_eq!(buf, encode_frame(42, FRAME_DATA, payload));
+        assert_eq!(buf, &encode_frame(42, FRAME_DATA, payload)[..]);
    }

    #[test]
    fn test_encode_frame_header_empty_payload() {
        let mut buf = vec![0u8; FRAME_HEADER_SIZE];
        encode_frame_header(&mut buf, 99, FRAME_CLOSE, 0);
-        assert_eq!(buf, encode_frame(99, FRAME_CLOSE, &[]));
+        assert_eq!(buf, &encode_frame(99, FRAME_CLOSE, &[])[..]);
    }

    #[test]
@@ -627,7 +647,7 @@ mod tests {
            let frame = reader.next_frame().await.unwrap().unwrap();
            assert_eq!(frame.stream_id, i as u32);
            assert_eq!(frame.frame_type, ft);
-            assert_eq!(frame.payload, format!("payload_{}", i).as_bytes());
+            assert_eq!(&frame.payload[..], format!("payload_{}", i).as_bytes());
        }

        assert!(reader.next_frame().await.unwrap().is_none());
@@ -636,7 +656,7 @@ mod tests {
    #[tokio::test]
    async fn test_frame_reader_zero_length_payload() {
        let data = encode_frame(42, FRAME_CLOSE, &[]);
-        let cursor = std::io::Cursor::new(data);
+        let cursor = std::io::Cursor::new(data.to_vec());
        let mut reader = FrameReader::new(cursor);

        let frame = reader.next_frame().await.unwrap().unwrap();
@@ -664,90 +684,57 @@ mod tests {

    #[test]
    fn test_adaptive_window_zero_streams() {
-        // 0 streams treated as 1: 32MB/1 = 32MB → clamped to 4MB max
+        // 0 streams treated as 1: 800MB/1 -> clamped to 16MB max
        assert_eq!(compute_window_for_stream_count(0), INITIAL_STREAM_WINDOW);
    }

    #[test]
    fn test_adaptive_window_one_stream() {
-        // 32MB/1 = 32MB → clamped to 4MB max
        assert_eq!(compute_window_for_stream_count(1), INITIAL_STREAM_WINDOW);
    }

    #[test]
-    fn test_adaptive_window_at_max_boundary() {
-        // 32MB/8 = 4MB = exactly INITIAL_STREAM_WINDOW
-        assert_eq!(compute_window_for_stream_count(8), INITIAL_STREAM_WINDOW);
+    fn test_adaptive_window_50_streams_full() {
+        // 800MB/50 = 16MB = exactly INITIAL_STREAM_WINDOW
+        assert_eq!(compute_window_for_stream_count(50), INITIAL_STREAM_WINDOW);
    }

    #[test]
-    fn test_adaptive_window_just_below_max() {
-        // 32MB/9 = 3,728,270 — first value below INITIAL_STREAM_WINDOW
-        let w = compute_window_for_stream_count(9);
+    fn test_adaptive_window_51_streams_starts_scaling() {
+        // 800MB/51 < 16MB — first value below max
+        let w = compute_window_for_stream_count(51);
        assert!(w < INITIAL_STREAM_WINDOW);
-        assert_eq!(w, (32 * 1024 * 1024u64 / 9) as u32);
-    }
-
-    #[test]
-    fn test_adaptive_window_16_streams() {
-        // 32MB/16 = 2MB
-        assert_eq!(compute_window_for_stream_count(16), 2 * 1024 * 1024);
+        assert_eq!(w, (800 * 1024 * 1024u64 / 51) as u32);
    }

    #[test]
    fn test_adaptive_window_100_streams() {
-        // 32MB/100 = 335,544 bytes (~327KB)
-        let w = compute_window_for_stream_count(100);
-        assert_eq!(w, (32 * 1024 * 1024u64 / 100) as u32);
-        assert!(w > 64 * 1024); // above floor
-        assert!(w < INITIAL_STREAM_WINDOW as u32); // below ceiling
+        // 800MB/100 = 8MB
+        assert_eq!(compute_window_for_stream_count(100), 8 * 1024 * 1024);
    }

    #[test]
-    fn test_adaptive_window_200_streams() {
-        // 32MB/200 = 167,772 bytes (~163KB), above 64KB floor
-        let w = compute_window_for_stream_count(200);
-        assert_eq!(w, (32 * 1024 * 1024u64 / 200) as u32);
-        assert!(w > 64 * 1024);
+    fn test_adaptive_window_200_streams_at_floor() {
+        // 800MB/200 = 4MB = exactly the floor
+        assert_eq!(compute_window_for_stream_count(200), 4 * 1024 * 1024);
    }

    #[test]
-    fn test_adaptive_window_500_streams() {
-        // 32MB/500 = 67,108 bytes (~65.5KB), just above 64KB floor
-        let w = compute_window_for_stream_count(500);
-        assert_eq!(w, (32 * 1024 * 1024u64 / 500) as u32);
-        assert!(w > 64 * 1024);
-    }
-
-    #[test]
-    fn test_adaptive_window_at_min_boundary() {
-        // 32MB/512 = 65,536 = exactly 64KB floor
-        assert_eq!(compute_window_for_stream_count(512), 64 * 1024);
-    }
-
-    #[test]
-    fn test_adaptive_window_below_min_clamped() {
-        // 32MB/513 = 65,408 → clamped up to 64KB
-        assert_eq!(compute_window_for_stream_count(513), 64 * 1024);
-    }
-
-    #[test]
-    fn test_adaptive_window_1000_streams() {
-        // 32MB/1000 = 33,554 → clamped to 64KB
-        assert_eq!(compute_window_for_stream_count(1000), 64 * 1024);
+    fn test_adaptive_window_500_streams_clamped() {
+        // 800MB/500 = 1.6MB -> clamped up to 4MB floor
+        assert_eq!(compute_window_for_stream_count(500), 4 * 1024 * 1024);
    }

    #[test]
    fn test_adaptive_window_max_u32() {
-        // Extreme: u32::MAX streams → tiny value → clamped to 64KB
-        assert_eq!(compute_window_for_stream_count(u32::MAX), 64 * 1024);
+        // Extreme: u32::MAX streams -> tiny value -> clamped to 4MB
+        assert_eq!(compute_window_for_stream_count(u32::MAX), 4 * 1024 * 1024);
    }

    #[test]
    fn test_adaptive_window_monotonically_decreasing() {
-        // Window should decrease (or stay same) as stream count increases
        let mut prev = compute_window_for_stream_count(1);
-        for n in [2, 5, 10, 50, 100, 200, 500, 512, 1000] {
+        for n in [2, 10, 50, 51, 100, 200, 500, 1000] {
            let w = compute_window_for_stream_count(n);
            assert!(w <= prev, "window increased from {} to {} at n={}", prev, w, n);
            prev = w;
@@ -756,11 +743,11 @@ mod tests {

    #[test]
    fn test_adaptive_window_total_budget_bounded() {
-        // active × per_stream_window should never exceed 32MB (+ clamp overhead for high N)
-        for n in [1, 10, 50, 100, 200, 500] {
+        // active x per_stream_window should never exceed 800MB (+ clamp overhead for high N)
+        for n in [1, 10, 50, 100, 200] {
            let w = compute_window_for_stream_count(n);
            let total = w as u64 * n as u64;
-            assert!(total <= 32 * 1024 * 1024, "total {}MB exceeds budget at n={}", total / (1024*1024), n);
+            assert!(total <= 800 * 1024 * 1024, "total {}MB exceeds budget at n={}", total / (1024*1024), n);
        }
    }

--- a/test/test.loadtest.node.ts
+++ b/test/test.loadtest.node.ts
@@ -142,7 +142,7 @@ class ThrottleTransform extends stream.Transform {
      this.bucket = 0;
      const delayMs = Math.min((deficit / this.bytesPerSec) * 1000, 1000);
      setTimeout(() => {
-        if (this.destroyed_) return;
+        if (this.destroyed_) { callback(); return; }
        this.lastRefill = Date.now();
        this.bucket = 0;
        callback(null, chunk);
@@ -179,7 +179,16 @@ async function startThrottleProxy(
    clientSock.pipe(throttleUp).pipe(upstream);
    upstream.pipe(throttleDown).pipe(clientSock);

-    const cleanup = () => {
+    let cleaned = false;
+    const cleanup = (source: string, err?: Error) => {
+      if (cleaned) return;
+      cleaned = true;
+      if (err) {
+        console.error(`[ThrottleProxy] cleanup triggered by ${source}: ${err.message}`);
+      } else {
+        console.error(`[ThrottleProxy] cleanup triggered by ${source} (no error)`);
+      }
+      console.error(`[ThrottleProxy] stack:`, new Error().stack);
      throttleUp.destroy();
      throttleDown.destroy();
      clientSock.destroy();
@@ -187,12 +196,12 @@ async function startThrottleProxy(
      connections.delete(clientSock);
      connections.delete(upstream);
    };
-    clientSock.on('error', cleanup);
-    upstream.on('error', cleanup);
-    throttleUp.on('error', cleanup);
-    throttleDown.on('error', cleanup);
-    clientSock.on('close', cleanup);
-    upstream.on('close', cleanup);
+    clientSock.on('error', (e) => cleanup('clientSock.error', e));
+    upstream.on('error', (e) => cleanup('upstream.error', e));
+    throttleUp.on('error', (e) => cleanup('throttleUp.error', e));
+    throttleDown.on('error', (e) => cleanup('throttleDown.error', e));
+    clientSock.on('close', () => cleanup('clientSock.close'));
+    upstream.on('close', () => cleanup('upstream.close'));
  });

  await new Promise<void>((resolve) => server.listen(listenPort, '127.0.0.1', resolve));
@@ -222,13 +231,13 @@ let edgePort: number;
 // Tests
 // ---------------------------------------------------------------------------

-tap.test('setup: start throttled tunnel (20 Mbit/s)', async () => {
+tap.test('setup: start throttled tunnel (100 Mbit/s)', async () => {
  [hubPort, proxyPort, edgePort] = await findFreePorts(3);

  echoServer = await startEchoServer(edgePort, '127.0.0.2');

-  // Throttle proxy: edge → proxy → hub at 20 Mbit/s (2.5 MB/s)
-  throttle = await startThrottleProxy(proxyPort, '127.0.0.1', hubPort, 2.5 * 1024 * 1024);
+  // Throttle proxy: edge → proxy → hub at 100 Mbit/s (12.5 MB/s)
+  throttle = await startThrottleProxy(proxyPort, '127.0.0.1', hubPort, 12.5 * 1024 * 1024);

  hub = new RemoteIngressHub();
  edge = new RemoteIngressEdge();
@@ -246,7 +255,7 @@ tap.test('setup: start throttled tunnel (20 Mbit/s)', async () => {
    });
  });

-  // Edge connects to proxy, not hub directly
+  // Edge connects through throttle proxy
  await edge.start({
    hubHost: '127.0.0.1',
    hubPort: proxyPort,
@@ -262,12 +271,12 @@ tap.test('setup: start throttled tunnel (20 Mbit/s)', async () => {
  expect(status.connected).toBeTrue();
 });

-tap.test('throttled: 10 streams x 50MB each through 10MB/s tunnel', async () => {
-  const streamCount = 10;
-  const payloadSize = 50 * 1024 * 1024; // 50MB per stream = 500MB total round-trip
+tap.test('throttled: 5 streams x 20MB each through 100Mbit tunnel', async () => {
+  const streamCount = 5;
+  const payloadSize = 20 * 1024 * 1024; // 20MB per stream = 100MB total round-trip

-  const promises = Array.from({ length: streamCount }, () => {
-    const data = crypto.randomBytes(payloadSize);
+  const payloads = Array.from({ length: streamCount }, () => crypto.randomBytes(payloadSize));
+  const promises = payloads.map((data) => {
    const hash = sha256(data);
    return sendAndReceive(edgePort, data, 300000).then((received) => ({
      sent: hash,
@@ -284,23 +293,23 @@ tap.test('throttled: 10 streams x 50MB each through 10MB/s tunnel', async () =>
  expect(status.connected).toBeTrue();
 });

-tap.test('throttled: slow consumer with 50MB does not kill other streams', async () => {
-  // Open a connection that creates massive download-direction backpressure:
-  // send 50MB but DON'T read the response — client TCP receive buffer fills
+tap.test('throttled: slow consumer with 20MB does not kill other streams', async () => {
+  // Open a connection that creates download-direction backpressure:
+  // send 20MB but DON'T read the response — client TCP receive buffer fills
  const slowSock = net.createConnection({ host: '127.0.0.1', port: edgePort });
  await new Promise<void>((resolve) => slowSock.on('connect', resolve));
-  const slowData = crypto.randomBytes(50 * 1024 * 1024);
+  const slowData = crypto.randomBytes(20 * 1024 * 1024);
  slowSock.write(slowData);
  slowSock.end();
  // Don't read — backpressure builds on the download path

  // Wait for backpressure to develop
-  await new Promise((r) => setTimeout(r, 3000));
+  await new Promise((r) => setTimeout(r, 2000));

-  // Meanwhile, 10 normal echo streams with 50MB each must complete
-  const payload = crypto.randomBytes(50 * 1024 * 1024);
+  // Meanwhile, 5 normal echo streams with 20MB each must complete
+  const payload = crypto.randomBytes(20 * 1024 * 1024);
  const hash = sha256(payload);
-  const promises = Array.from({ length: 10 }, () =>
+  const promises = Array.from({ length: 5 }, () =>
    sendAndReceive(edgePort, payload, 300000).then((r) => ({
      hash: sha256(r),
      sizeOk: r.length === payload.length,
@@ -317,11 +326,11 @@ tap.test('throttled: slow consumer with 50MB does not kill other streams', async
  slowSock.destroy();
 });

-tap.test('throttled: rapid churn — 5 x 50MB long + 200 x 1MB short streams', async () => {
-  // 5 long streams (50MB each) running alongside 200 short streams (1MB each)
-  const longPayload = crypto.randomBytes(50 * 1024 * 1024);
+tap.test('throttled: rapid churn — 3 x 20MB long + 50 x 1MB short streams', async () => {
+  // 3 long streams (20MB each) running alongside 50 short streams (1MB each)
+  const longPayload = crypto.randomBytes(20 * 1024 * 1024);
  const longHash = sha256(longPayload);
-  const longPromises = Array.from({ length: 5 }, () =>
+  const longPromises = Array.from({ length: 3 }, () =>
    sendAndReceive(edgePort, longPayload, 300000).then((r) => ({
      hash: sha256(r),
      sizeOk: r.length === longPayload.length,
@@ -330,7 +339,7 @@ tap.test('throttled: rapid churn — 5 x 50MB long + 200 x 1MB short streams', a

  const shortPayload = crypto.randomBytes(1024 * 1024);
  const shortHash = sha256(shortPayload);
-  const shortPromises = Array.from({ length: 200 }, () =>
+  const shortPromises = Array.from({ length: 50 }, () =>
    sendAndReceive(edgePort, shortPayload, 300000).then((r) => ({
      hash: sha256(r),
      sizeOk: r.length === shortPayload.length,
@@ -351,10 +360,10 @@ tap.test('throttled: rapid churn — 5 x 50MB long + 200 x 1MB short streams', a
  expect(status.connected).toBeTrue();
 });

-tap.test('throttled: 5 burst waves of 20 streams x 50MB each', async () => {
-  for (let wave = 0; wave < 5; wave++) {
-    const streamCount = 20;
-    const payloadSize = 50 * 1024 * 1024; // 50MB per stream = 1GB per wave
+tap.test('throttled: 3 burst waves of 5 streams x 20MB each', async () => {
+  for (let wave = 0; wave < 3; wave++) {
+    const streamCount = 5;
+    const payloadSize = 20 * 1024 * 1024; // 20MB per stream = 100MB per wave

    const promises = Array.from({ length: streamCount }, () => {
      const data = crypto.randomBytes(payloadSize);
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@serve.zone/remoteingress',
-  version: '4.8.12',
+  version: '4.8.18',
  description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
 }
Author	SHA1	Message	Date
Juergen Kunz	ce7ccd83dc	v4.8.18 Some checks failed Default (tags) / security (push) Failing after 0s Details Default (tags) / test (push) Failing after 0s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-03-17 23:29:02 +00:00
Juergen Kunz	93578d7034	fix(rust-protocol): switch tunnel frame buffers from Vec<u8> to Bytes to reduce copying and memory overhead	2026-03-17 23:29:02 +00:00
Juergen Kunz	4cfc518301	v4.8.17 Some checks failed Default (tags) / security (push) Failing after 1s Details Default (tags) / test (push) Failing after 0s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-03-17 22:46:55 +00:00
Juergen Kunz	124df129ec	fix(protocol): increase per-stream flow control windows and remove adaptive read caps	2026-03-17 22:46:55 +00:00
Juergen Kunz	0b8420aac9	v4.8.16 Some checks failed Default (tags) / security (push) Failing after 1s Details Default (tags) / test (push) Failing after 0s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-03-17 19:13:30 +00:00
Juergen Kunz	afd193336a	fix(release): bump package version to 4.8.15	2026-03-17 19:13:30 +00:00
Juergen Kunz	e8d429f117	v4.8.13 Some checks failed Default (tags) / security (push) Failing after 0s Details Default (tags) / test (push) Failing after 0s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-03-17 15:50:47 +00:00
Juergen Kunz	3c2299430a	fix(remoteingress-protocol): require a flush after each written frame to bound TLS buffer growth	2026-03-17 15:50:47 +00:00
Juergen Kunz	8b5df9a0b7	update	2026-03-17 15:36:23 +00:00