fix(remoteingress-core): prevent stream stalls by guaranteeing flow-control updates and avoiding bounded per-stream channel overflows
This commit is contained in:
@@ -24,8 +24,9 @@ enum EdgeFrameAction {
|
||||
|
||||
/// Per-stream state tracked in the edge's client_writers map.
|
||||
struct EdgeStreamState {
|
||||
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
||||
back_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Unbounded channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
|
||||
/// Unbounded because flow control (WINDOW_UPDATE) already limits bytes-in-flight.
|
||||
back_tx: mpsc::UnboundedSender<Vec<u8>>,
|
||||
/// Send window for FRAME_DATA (upload direction).
|
||||
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
|
||||
send_window: Arc<AtomicU32>,
|
||||
@@ -300,10 +301,13 @@ async fn handle_edge_frame(
|
||||
) -> EdgeFrameAction {
|
||||
match frame.frame_type {
|
||||
FRAME_DATA_BACK => {
|
||||
// Dispatch to per-stream unbounded channel. Flow control (WINDOW_UPDATE)
|
||||
// limits bytes-in-flight, so the channel won't grow unbounded. send() only
|
||||
// fails if the receiver is dropped (hub_to_client task already exited).
|
||||
let mut writers = client_writers.lock().await;
|
||||
if let Some(state) = writers.get(&frame.stream_id) {
|
||||
if state.back_tx.try_send(frame.payload).is_err() {
|
||||
log::warn!("Stream {} back-channel full, closing", frame.stream_id);
|
||||
if state.back_tx.send(frame.payload).is_err() {
|
||||
// Receiver dropped — hub_to_client task already exited, clean up
|
||||
writers.remove(&frame.stream_id);
|
||||
}
|
||||
}
|
||||
@@ -731,8 +735,10 @@ async fn handle_client_connection(
|
||||
return;
|
||||
}
|
||||
|
||||
// Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
|
||||
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(1024);
|
||||
// Per-stream unbounded back-channel. Flow control (WINDOW_UPDATE) limits
|
||||
// bytes-in-flight, so this won't grow unbounded. Unbounded avoids killing
|
||||
// streams due to channel overflow — backpressure slows streams, never kills them.
|
||||
let (back_tx, mut back_rx) = mpsc::unbounded_channel::<Vec<u8>>();
|
||||
// Adaptive initial window: scale with current stream count to keep total in-flight
|
||||
// data within the 32MB budget. Prevents burst flooding when many streams open.
|
||||
let initial_window = remoteingress_protocol::compute_window_for_stream_count(
|
||||
@@ -779,10 +785,16 @@ async fn handle_client_connection(
|
||||
if consumed_since_update >= threshold {
|
||||
let increment = consumed_since_update.min(adaptive_window);
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, increment);
|
||||
if wu_tx.try_send(frame).is_ok() {
|
||||
consumed_since_update -= increment;
|
||||
// Use send().await for guaranteed delivery — dropping WINDOW_UPDATEs
|
||||
// causes permanent flow stalls. Safe: runs in per-stream task, not main loop.
|
||||
tokio::select! {
|
||||
result = wu_tx.send(frame) => {
|
||||
if result.is_ok() {
|
||||
consumed_since_update -= increment;
|
||||
}
|
||||
}
|
||||
_ = hub_to_client_token.cancelled() => break,
|
||||
}
|
||||
// If try_send fails, keep accumulating — retry on next threshold
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
@@ -794,7 +806,7 @@ async fn handle_client_connection(
|
||||
// Send final window update for any remaining consumed bytes
|
||||
if consumed_since_update > 0 {
|
||||
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
|
||||
let _ = wu_tx.try_send(frame);
|
||||
let _ = wu_tx.send(frame).await;
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user