feat(protocol): add sustained-stream tunnel scheduling to isolate high-throughput traffic
This commit is contained in:
@@ -2,8 +2,10 @@ use std::collections::VecDeque;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
use bytes::{Bytes, BytesMut, BufMut};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
|
||||
use tokio::time::Instant;
|
||||
|
||||
// Frame type constants
|
||||
pub const FRAME_OPEN: u8 = 0x01;
|
||||
@@ -31,6 +33,16 @@ pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
|
||||
/// Maximum window size to prevent overflow.
|
||||
pub const MAX_WINDOW_SIZE: u32 = 4 * 1024 * 1024;
|
||||
|
||||
// Sustained stream classification constants
|
||||
/// Throughput threshold for sustained classification (2.5 MB/s = 20 Mbit/s).
|
||||
pub const SUSTAINED_THRESHOLD_BPS: u64 = 2_500_000;
|
||||
/// Minimum duration before a stream can be classified as sustained.
|
||||
pub const SUSTAINED_MIN_DURATION_SECS: u64 = 10;
|
||||
/// Fixed window for sustained streams (1 MB — the floor).
|
||||
pub const SUSTAINED_WINDOW: u32 = 1 * 1024 * 1024;
|
||||
/// Maximum bytes written from sustained queue per forced drain (1 MB/s guarantee).
|
||||
pub const SUSTAINED_FORCED_DRAIN_CAP: usize = 1_048_576;
|
||||
|
||||
/// Encode a WINDOW_UPDATE frame for a specific stream.
|
||||
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Bytes {
|
||||
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
|
||||
@@ -185,24 +197,30 @@ pub enum TunnelEvent {
|
||||
/// Write state extracted into a sub-struct so the borrow checker can see
|
||||
/// disjoint field access between `self.write` and `self.stream`.
|
||||
struct WriteState {
|
||||
ctrl_queue: VecDeque<Bytes>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
|
||||
data_queue: VecDeque<Bytes>, // DATA, DATA_BACK — only when ctrl is empty
|
||||
offset: usize, // progress within current frame being written
|
||||
ctrl_queue: VecDeque<Bytes>, // PONG, WINDOW_UPDATE, CLOSE, OPEN — always first
|
||||
data_queue: VecDeque<Bytes>, // DATA, DATA_BACK — only when ctrl is empty
|
||||
sustained_queue: VecDeque<Bytes>, // DATA, DATA_BACK from sustained streams — lowest priority
|
||||
offset: usize, // progress within current frame being written
|
||||
flush_needed: bool,
|
||||
// Sustained starvation prevention: guaranteed 1 MB/s drain
|
||||
sustained_last_drain: Instant,
|
||||
sustained_bytes_this_period: usize,
|
||||
}
|
||||
|
||||
impl WriteState {
|
||||
fn has_work(&self) -> bool {
|
||||
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty()
|
||||
!self.ctrl_queue.is_empty() || !self.data_queue.is_empty() || !self.sustained_queue.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Single-owner I/O engine for the tunnel TLS connection.
|
||||
///
|
||||
/// Owns the TLS stream directly — no `tokio::io::split()`, no mutex.
|
||||
/// Uses two priority write queues: ctrl frames (PONG, WINDOW_UPDATE, CLOSE, OPEN)
|
||||
/// are ALWAYS written before data frames (DATA, DATA_BACK). This prevents
|
||||
/// WINDOW_UPDATE starvation that causes flow control deadlocks.
|
||||
/// Uses three priority write queues:
|
||||
/// 1. ctrl (PONG, WINDOW_UPDATE, CLOSE, OPEN) — always first
|
||||
/// 2. data (DATA, DATA_BACK from normal streams) — when ctrl empty
|
||||
/// 3. sustained (DATA, DATA_BACK from sustained streams) — lowest priority,
|
||||
/// drained freely when ctrl+data empty, or forced 1MB/s when they're not
|
||||
pub struct TunnelIo<S> {
|
||||
stream: S,
|
||||
// Read state: accumulate bytes, parse frames incrementally
|
||||
@@ -228,8 +246,11 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
write: WriteState {
|
||||
ctrl_queue: VecDeque::new(),
|
||||
data_queue: VecDeque::new(),
|
||||
sustained_queue: VecDeque::new(),
|
||||
offset: 0,
|
||||
flush_needed: false,
|
||||
sustained_last_drain: Instant::now(),
|
||||
sustained_bytes_this_period: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -244,6 +265,11 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
self.write.data_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Queue a lowest-priority sustained data frame.
|
||||
pub fn queue_sustained(&mut self, frame: Bytes) {
|
||||
self.write.sustained_queue.push_back(frame);
|
||||
}
|
||||
|
||||
/// Try to parse a complete frame from the read buffer.
|
||||
/// Uses a parse_pos cursor to avoid drain() on every frame.
|
||||
pub fn try_parse_frame(&mut self) -> Option<Result<Frame, std::io::Error>> {
|
||||
@@ -303,33 +329,42 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
|
||||
/// Poll-based I/O step. Returns Ready on events, Pending when idle.
|
||||
///
|
||||
/// Order: write(ctrl->data) -> flush -> read -> channels -> timers
|
||||
/// Order: write(ctrl->data->sustained) -> flush -> read -> channels -> timers
|
||||
pub fn poll_step(
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
ctrl_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
data_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
sustained_rx: &mut tokio::sync::mpsc::Receiver<Bytes>,
|
||||
liveness_deadline: &mut Pin<Box<tokio::time::Sleep>>,
|
||||
cancel_token: &tokio_util::sync::CancellationToken,
|
||||
) -> Poll<TunnelEvent> {
|
||||
// 1. WRITE: drain ctrl queue first, then data queue.
|
||||
// 1. WRITE: 3-tier priority — ctrl first, then data, then sustained.
|
||||
// Sustained drains freely when ctrl+data are empty.
|
||||
// Write one frame, set flush_needed, then flush must complete before
|
||||
// writing more. This prevents unbounded TLS session buffer growth.
|
||||
// Safe: `self.write` and `self.stream` are disjoint fields.
|
||||
let mut writes = 0;
|
||||
while self.write.has_work() && writes < 16 && !self.write.flush_needed {
|
||||
let from_ctrl = !self.write.ctrl_queue.is_empty();
|
||||
let frame = if from_ctrl {
|
||||
self.write.ctrl_queue.front().unwrap()
|
||||
// Pick queue: ctrl > data > sustained
|
||||
let queue_id = if !self.write.ctrl_queue.is_empty() {
|
||||
0 // ctrl
|
||||
} else if !self.write.data_queue.is_empty() {
|
||||
1 // data
|
||||
} else {
|
||||
self.write.data_queue.front().unwrap()
|
||||
2 // sustained
|
||||
};
|
||||
let frame = match queue_id {
|
||||
0 => self.write.ctrl_queue.front().unwrap(),
|
||||
1 => self.write.data_queue.front().unwrap(),
|
||||
_ => self.write.sustained_queue.front().unwrap(),
|
||||
};
|
||||
let remaining = &frame[self.write.offset..];
|
||||
|
||||
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={}",
|
||||
self.write.ctrl_queue.len(), self.write.data_queue.len());
|
||||
log::error!("TunnelIo: poll_write returned 0 (write zero), ctrl_q={} data_q={} sustained_q={}",
|
||||
self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||
));
|
||||
@@ -338,21 +373,70 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
self.write.offset += n;
|
||||
self.write.flush_needed = true;
|
||||
if self.write.offset >= frame.len() {
|
||||
if from_ctrl { self.write.ctrl_queue.pop_front(); }
|
||||
else { self.write.data_queue.pop_front(); }
|
||||
match queue_id {
|
||||
0 => { self.write.ctrl_queue.pop_front(); }
|
||||
1 => { self.write.data_queue.pop_front(); }
|
||||
_ => {
|
||||
self.write.sustained_queue.pop_front();
|
||||
self.write.sustained_last_drain = Instant::now();
|
||||
self.write.sustained_bytes_this_period = 0;
|
||||
}
|
||||
}
|
||||
self.write.offset = 0;
|
||||
writes += 1;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={})",
|
||||
e, self.write.ctrl_queue.len(), self.write.data_queue.len());
|
||||
log::error!("TunnelIo: poll_write error: {} (ctrl_q={} data_q={} sustained_q={})",
|
||||
e, self.write.ctrl_queue.len(), self.write.data_queue.len(), self.write.sustained_queue.len());
|
||||
return Poll::Ready(TunnelEvent::WriteError(e));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
|
||||
// 1b. FORCED SUSTAINED DRAIN: when ctrl/data have work but sustained is waiting,
|
||||
// guarantee at least 1 MB/s by draining up to SUSTAINED_FORCED_DRAIN_CAP
|
||||
// once per second.
|
||||
if !self.write.sustained_queue.is_empty()
|
||||
&& (!self.write.ctrl_queue.is_empty() || !self.write.data_queue.is_empty())
|
||||
&& !self.write.flush_needed
|
||||
{
|
||||
let now = Instant::now();
|
||||
if now.duration_since(self.write.sustained_last_drain) >= Duration::from_secs(1) {
|
||||
self.write.sustained_bytes_this_period = 0;
|
||||
self.write.sustained_last_drain = now;
|
||||
|
||||
while !self.write.sustained_queue.is_empty()
|
||||
&& self.write.sustained_bytes_this_period < SUSTAINED_FORCED_DRAIN_CAP
|
||||
&& !self.write.flush_needed
|
||||
{
|
||||
let frame = self.write.sustained_queue.front().unwrap();
|
||||
let remaining = &frame[self.write.offset..];
|
||||
match Pin::new(&mut self.stream).poll_write(cx, remaining) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(
|
||||
std::io::Error::new(std::io::ErrorKind::WriteZero, "write zero"),
|
||||
));
|
||||
}
|
||||
Poll::Ready(Ok(n)) => {
|
||||
self.write.offset += n;
|
||||
self.write.flush_needed = true;
|
||||
self.write.sustained_bytes_this_period += n;
|
||||
if self.write.offset >= frame.len() {
|
||||
self.write.sustained_queue.pop_front();
|
||||
self.write.offset = 0;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
return Poll::Ready(TunnelEvent::WriteError(e));
|
||||
}
|
||||
Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. FLUSH: push encrypted data from TLS session to TCP.
|
||||
if self.write.flush_needed {
|
||||
match Pin::new(&mut self.stream).poll_flush(cx) {
|
||||
@@ -436,6 +520,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin> TunnelIo<S> {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sustained channel: drain when sustained_queue is small (same backpressure pattern).
|
||||
// Channel close is non-fatal — not all connections have sustained streams.
|
||||
if self.write.sustained_queue.len() < 64 {
|
||||
loop {
|
||||
match sustained_rx.poll_recv(cx) {
|
||||
Poll::Ready(Some(frame)) => { self.write.sustained_queue.push_back(frame); got_new = true; }
|
||||
Poll::Ready(None) | Poll::Pending => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. TIMERS
|
||||
if liveness_deadline.as_mut().poll(cx).is_ready() {
|
||||
|
||||
Reference in New Issue
Block a user