Compare commits

..

34 Commits

Author SHA1 Message Date
b6e66a7fa6 v4.5.10 2026-03-16 13:48:35 +00:00
1391b39601 fix(remoteingress-core): guard zero-window reads to avoid false EOF handling on stalled streams 2026-03-16 13:48:35 +00:00
e813c2f044 v4.5.9 2026-03-16 11:29:38 +00:00
0b8c1f0b57 fix(remoteingress-core): delay stream close until downstream response draining finishes to prevent truncated transfers 2026-03-16 11:29:38 +00:00
a63dbf2502 v4.5.8 2026-03-16 10:51:59 +00:00
4b95a3c999 fix(remoteingress-core): ensure upstream writes cancel promptly and reliably deliver CLOSE_BACK frames 2026-03-16 10:51:59 +00:00
51ab32f6c3 v4.5.7 2026-03-16 09:44:31 +00:00
ed52520d50 fix(remoteingress-core): improve tunnel reconnect and frame write efficiency 2026-03-16 09:44:31 +00:00
a08011d2da v4.5.6 2026-03-16 09:36:03 +00:00
679b247c8a fix(remoteingress-core): disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency 2026-03-16 09:36:03 +00:00
32f9845495 v4.5.5 2026-03-16 09:02:02 +00:00
c0e1daa0e4 fix(remoteingress-core): wait for hub-to-client draining before cleanup and reliably send close frames 2026-03-16 09:02:02 +00:00
fd511c8a5c v4.5.4 2026-03-15 21:06:44 +00:00
c490e35a8f fix(remoteingress-core): preserve stream close ordering and add flow-control stall timeouts 2026-03-15 21:06:44 +00:00
579e553da0 v4.5.3 2026-03-15 19:26:39 +00:00
a8ee0b33d7 fix(remoteingress-core): prioritize control frames over data in edge and hub tunnel writers 2026-03-15 19:26:39 +00:00
43e320a36d v4.5.2 2026-03-15 18:16:10 +00:00
6ac4b37532 fix(remoteingress-core): improve stream flow control retries and increase channel buffer capacity 2026-03-15 18:16:10 +00:00
f456b0ba4f v4.5.1 2026-03-15 17:52:45 +00:00
69530f73aa fix(protocol): increase per-stream flow control window and channel buffers to improve high-RTT throughput 2026-03-15 17:52:45 +00:00
207b4a5cec v4.5.0 2026-03-15 17:33:59 +00:00
761551596b feat(remoteingress-core): add per-stream flow control for edge and hub tunnel data transfer 2026-03-15 17:33:59 +00:00
cf2d32bfe7 v4.4.1 2026-03-15 17:01:27 +00:00
4e9041c6a7 fix(remoteingress-core): prevent stream data loss by applying backpressure and closing saturated channels 2026-03-15 17:01:27 +00:00
86d4e9889a v4.4.0 2026-03-03 11:47:50 +00:00
45a2811f3e feat(remoteingress): add heartbeat PING/PONG and liveness timeouts; implement fast-reconnect/backoff reset and JS crash-recovery auto-restart 2026-03-03 11:47:50 +00:00
d6a07c28a0 v4.3.0 2026-02-26 23:47:16 +00:00
56a14aa7c5 feat(hub): add optional TLS certificate/key support to hub start config and bridge 2026-02-26 23:47:16 +00:00
417f62e646 v4.2.0 2026-02-26 23:02:23 +00:00
bda82f32ca feat(core): expose edge peer address in hub events and migrate writers to channel-based, non-blocking framing with stream limits and timeouts 2026-02-26 23:02:23 +00:00
4b06cb1b24 v4.1.0 2026-02-26 17:39:40 +00:00
1aae4b8c8e feat(remoteingress-bin): use mimalloc as the global allocator to reduce memory overhead and improve allocation performance 2026-02-26 17:39:40 +00:00
3474e8c310 v4.0.1 2026-02-26 12:37:40 +00:00
3df20df2a1 fix(hub): cancel per-stream tokens on stream close and avoid duplicate StreamClosed events; bump @types/node devDependency to ^25.3.0 2026-02-26 12:37:39 +00:00
12 changed files with 856 additions and 127 deletions

View File

@@ -1,5 +1,125 @@
# Changelog # Changelog
## 2026-03-16 - 4.5.10 - fix(remoteingress-core)
guard zero-window reads to avoid false EOF handling on stalled streams
- Prevent upload and download loops from calling read on an empty buffer when flow-control window remains at 0 after stall timeout
- Log a warning and close the affected stream instead of misinterpreting Ok(0) as end-of-file
## 2026-03-16 - 4.5.9 - fix(remoteingress-core)
delay stream close until downstream response draining finishes to prevent truncated transfers
- Waits for the hub-to-client download task to finish before sending the stream CLOSE frame
- Prevents upstream reads from being cancelled mid-response during asymmetric transfers such as git fetch
- Retains the existing timeout so stalled downloads still clean up safely
## 2026-03-16 - 4.5.8 - fix(remoteingress-core)
ensure upstream writes cancel promptly and reliably deliver CLOSE_BACK frames
- listen for stream cancellation while waiting on upstream write timeouts so FRAME_CLOSE does not block for up to 60 seconds
- replace try_send with send().await when emitting CLOSE_BACK frames to avoid silently dropping close notifications when the data channel is full
## 2026-03-16 - 4.5.7 - fix(remoteingress-core)
improve tunnel reconnect and frame write efficiency
- Reuse the TLS connector across edge reconnections to preserve session resumption state and reduce reconnect latency.
- Buffer hub and edge frame writes to coalesce small control and data frames into fewer TLS records and syscalls while still flushing each frame promptly.
## 2026-03-16 - 4.5.6 - fix(remoteingress-core)
disable Nagle's algorithm on edge, hub, and upstream TCP sockets to reduce control-frame latency
- Enable TCP_NODELAY on the edge connection to the hub for faster PING/PONG and WINDOW_UPDATE delivery
- Apply TCP_NODELAY on accepted hub streams before TLS handling
- Enable TCP_NODELAY on SmartProxy upstream connections before sending the PROXY header
## 2026-03-16 - 4.5.5 - fix(remoteingress-core)
wait for hub-to-client draining before cleanup and reliably send close frames
- switch CLOSE frame delivery on the data channel from try_send to send().await to avoid dropping it when the channel is full
- delay stream cleanup until the hub-to-client task finishes or times out so large downstream responses continue after upload EOF
- add a bounded 5-minute wait for download draining to prevent premature termination of asymmetric transfers such as git fetch
## 2026-03-15 - 4.5.4 - fix(remoteingress-core)
preserve stream close ordering and add flow-control stall timeouts
- Send CLOSE and CLOSE_BACK frames on the data channel so they arrive after the final stream data frames.
- Log and abort stalled upload and download paths when flow-control windows stay empty for 120 seconds.
- Apply a 60-second timeout when writing buffered stream data to the upstream connection to prevent hung streams.
## 2026-03-15 - 4.5.3 - fix(remoteingress-core)
prioritize control frames over data in edge and hub tunnel writers
- Split tunnel/frame writers into separate control and data channels in edge and hub
- Use biased select loops so PING, PONG, WINDOW_UPDATE, OPEN, and CLOSE frames are sent before data frames
- Route stream data through dedicated data channels while keeping OPEN, CLOSE, and flow-control updates on control channels to prevent keepalive starvation under load
## 2026-03-15 - 4.5.2 - fix(remoteingress-core)
improve stream flow control retries and increase channel buffer capacity
- increase per-stream mpsc channel capacity from 128 to 256 on both edge and hub paths
- only reset accumulated window update bytes after a successful try_send to avoid dropping flow-control credits when the update channel is busy
## 2026-03-15 - 4.5.1 - fix(protocol)
increase per-stream flow control window and channel buffers to improve high-RTT throughput
- raise the initial stream window from 256 KB to 4 MB to allow more in-flight data per stream
- increase edge and hub mpsc channel capacities from 16 to 128 to better absorb throughput under flow control
## 2026-03-15 - 4.5.0 - feat(remoteingress-core)
add per-stream flow control for edge and hub tunnel data transfer
- introduce WINDOW_UPDATE frame types and protocol helpers for per-stream flow control
- track per-stream send windows on both edge and hub to limit reads based on available capacity
- send window updates after downstream writes to reduce channel pressure during large transfers
## 2026-03-15 - 4.4.1 - fix(remoteingress-core)
prevent stream data loss by applying backpressure and closing saturated channels
- replace non-blocking frame writes with awaited sends in per-stream tasks so large transfers respect backpressure instead of dropping data
- close and remove streams when back-channel or data channels fill up to avoid TCP stream corruption from silently dropped frames
## 2026-03-03 - 4.4.0 - feat(remoteingress)
add heartbeat PING/PONG and liveness timeouts; implement fast-reconnect/backoff reset and JS crash-recovery auto-restart
- protocol: add FRAME_PING and FRAME_PONG and unit tests for ping/pong frames
- edge (Rust): reset backoff after successful connection, respond to PING with PONG, track liveness via deadline and reconnect on timeout, use Duration/Instant helpers
- hub (Rust): send periodic PING to edges, handle PONGs, enforce liveness timeout and disconnect inactive edges, use tokio interval and time utilities
- ts: RemoteIngressEdge and RemoteIngressHub: add crash-recovery auto-restart with exponential backoff and max attempts, save/restore config and allowed edges, register/remove exit handlers, ensure stop() marks stopping and cleans up listeners
- minor API/typing: introduce TAllowedEdge alias and persist allowed edges for restart recovery
## 2026-02-26 - 4.3.0 - feat(hub)
add optional TLS certificate/key support to hub start config and bridge
- TypeScript: add tls.certPem and tls.keyPem to IHubConfig and include tlsCertPem/tlsKeyPem in startHub bridge command when both are provided
- TypeScript: extend startHub params with tlsCertPem and tlsKeyPem and conditionally send them
- Rust: change HubConfig serde attributes for tls_cert_pem and tls_key_pem from skip to default so absent PEM fields deserialize as None
- Enables optional provisioning of TLS certificate and key to the hub when provided from the JS side
## 2026-02-26 - 4.2.0 - feat(core)
expose edge peer address in hub events and migrate writers to channel-based, non-blocking framing with stream limits and timeouts
- Add peerAddr to ConnectedEdgeStatus and HubEvent::EdgeConnected and surface it to the TS frontend event (management:edgeConnected).
- Replace Arc<Mutex<WriteHalf>> writers with dedicated mpsc channel writer tasks in both hub and edge crates to serialize writes off the main tasks.
- Use non-blocking try_send for data frames to avoid head-of-line blocking and drop frames with warnings when channels are full.
- Introduce MAX_STREAMS_PER_EDGE semaphore to limit concurrent streams per edge and reject excess opens with a CLOSE_BACK frame.
- Add a 10s timeout when connecting to SmartProxy to avoid hanging connections.
- Ensure writer tasks are aborted on shutdown/cleanup and propagate cancellation tokens appropriately.
## 2026-02-26 - 4.1.0 - feat(remoteingress-bin)
use mimalloc as the global allocator to reduce memory overhead and improve allocation performance
- added mimalloc = "0.1" dependency to rust/crates/remoteingress-bin/Cargo.toml
- registered mimalloc as the #[global_allocator] in rust/crates/remoteingress-bin/src/main.rs
- updated Cargo.lock with libmimalloc-sys and mimalloc package entries
## 2026-02-26 - 4.0.1 - fix(hub)
cancel per-stream tokens on stream close and avoid duplicate StreamClosed events; bump @types/node devDependency to ^25.3.0
- Add CancellationToken to per-stream entries so each stream can be cancelled independently.
- Ensure StreamClosed event is only emitted when a stream was actually present (guards against duplicate events).
- Cancel the stream-specific token on FRAME_CLOSE to stop associated tasks and free resources.
- DevDependency bump: @types/node updated from ^25.2.3 to ^25.3.0.
## 2026-02-19 - 4.0.0 - BREAKING CHANGE(remoteingress-core) ## 2026-02-19 - 4.0.0 - BREAKING CHANGE(remoteingress-core)
add cancellation tokens and cooperative shutdown; switch event channels to bounded mpsc and improve cleanup add cancellation tokens and cooperative shutdown; switch event channels to bounded mpsc and improve cleanup

View File

@@ -1,6 +1,6 @@
{ {
"name": "@serve.zone/remoteingress", "name": "@serve.zone/remoteingress",
"version": "4.0.0", "version": "4.5.10",
"private": false, "private": false,
"description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.", "description": "Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
@@ -20,7 +20,7 @@
"@git.zone/tsrust": "^1.3.0", "@git.zone/tsrust": "^1.3.0",
"@git.zone/tstest": "^3.1.8", "@git.zone/tstest": "^3.1.8",
"@push.rocks/tapbundle": "^6.0.3", "@push.rocks/tapbundle": "^6.0.3",
"@types/node": "^25.2.3" "@types/node": "^25.3.0"
}, },
"dependencies": { "dependencies": {
"@push.rocks/qenv": "^6.1.3", "@push.rocks/qenv": "^6.1.3",

66
pnpm-lock.yaml generated
View File

@@ -34,8 +34,8 @@ importers:
specifier: ^6.0.3 specifier: ^6.0.3
version: 6.0.3(socks@2.8.7) version: 6.0.3(socks@2.8.7)
'@types/node': '@types/node':
specifier: ^25.2.3 specifier: ^25.3.0
version: 25.2.3 version: 25.3.0
packages: packages:
@@ -1501,8 +1501,8 @@ packages:
'@types/node@22.19.11': '@types/node@22.19.11':
resolution: {integrity: sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==} resolution: {integrity: sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==}
'@types/node@25.2.3': '@types/node@25.3.0':
resolution: {integrity: sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ==} resolution: {integrity: sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==}
'@types/parse5@6.0.3': '@types/parse5@6.0.3':
resolution: {integrity: sha512-SuT16Q1K51EAVPz1K29DJ/sXjhSQ0zjvsypYJ6tlwVsRV9jwW5Adq2ch8Dq8kDBCkYnELS7N7VNCSB5nC56t/g==} resolution: {integrity: sha512-SuT16Q1K51EAVPz1K29DJ/sXjhSQ0zjvsypYJ6tlwVsRV9jwW5Adq2ch8Dq8kDBCkYnELS7N7VNCSB5nC56t/g==}
@@ -3986,8 +3986,8 @@ packages:
undici-types@6.21.0: undici-types@6.21.0:
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
undici-types@7.16.0: undici-types@7.18.2:
resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==}
unified@11.0.5: unified@11.0.5:
resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==} resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==}
@@ -5178,7 +5178,7 @@ snapshots:
'@jest/schemas': 29.6.3 '@jest/schemas': 29.6.3
'@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-lib-coverage': 2.0.6
'@types/istanbul-reports': 3.0.4 '@types/istanbul-reports': 3.0.4
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/yargs': 17.0.35 '@types/yargs': 17.0.35
chalk: 4.1.2 chalk: 4.1.2
@@ -6736,14 +6736,14 @@ snapshots:
'@types/accepts@1.3.7': '@types/accepts@1.3.7':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/babel__code-frame@7.27.0': {} '@types/babel__code-frame@7.27.0': {}
'@types/body-parser@1.19.6': '@types/body-parser@1.19.6':
dependencies: dependencies:
'@types/connect': 3.4.38 '@types/connect': 3.4.38
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/buffer-json@2.0.3': {} '@types/buffer-json@2.0.3': {}
@@ -6760,17 +6760,17 @@ snapshots:
'@types/clean-css@4.2.11': '@types/clean-css@4.2.11':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
source-map: 0.6.1 source-map: 0.6.1
'@types/co-body@6.1.3': '@types/co-body@6.1.3':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/qs': 6.14.0 '@types/qs': 6.14.0
'@types/connect@3.4.38': '@types/connect@3.4.38':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/content-disposition@0.5.9': {} '@types/content-disposition@0.5.9': {}
@@ -6781,11 +6781,11 @@ snapshots:
'@types/connect': 3.4.38 '@types/connect': 3.4.38
'@types/express': 5.0.6 '@types/express': 5.0.6
'@types/keygrip': 1.0.6 '@types/keygrip': 1.0.6
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/cors@2.8.19': '@types/cors@2.8.19':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/debounce@1.2.4': {} '@types/debounce@1.2.4': {}
@@ -6797,7 +6797,7 @@ snapshots:
'@types/express-serve-static-core@5.1.1': '@types/express-serve-static-core@5.1.1':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/qs': 6.14.0 '@types/qs': 6.14.0
'@types/range-parser': 1.2.7 '@types/range-parser': 1.2.7
'@types/send': 1.2.1 '@types/send': 1.2.1
@@ -6811,7 +6811,7 @@ snapshots:
'@types/fs-extra@11.0.4': '@types/fs-extra@11.0.4':
dependencies: dependencies:
'@types/jsonfile': 6.1.4 '@types/jsonfile': 6.1.4
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/hast@3.0.4': '@types/hast@3.0.4':
dependencies: dependencies:
@@ -6845,7 +6845,7 @@ snapshots:
'@types/jsonfile@6.1.4': '@types/jsonfile@6.1.4':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/keygrip@1.0.6': {} '@types/keygrip@1.0.6': {}
@@ -6862,7 +6862,7 @@ snapshots:
'@types/http-errors': 2.0.5 '@types/http-errors': 2.0.5
'@types/keygrip': 1.0.6 '@types/keygrip': 1.0.6
'@types/koa-compose': 3.2.9 '@types/koa-compose': 3.2.9
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/mdast@4.0.4': '@types/mdast@4.0.4':
dependencies: dependencies:
@@ -6876,19 +6876,19 @@ snapshots:
'@types/mute-stream@0.0.4': '@types/mute-stream@0.0.4':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/node-forge@1.3.14': '@types/node-forge@1.3.14':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/node@22.19.11': '@types/node@22.19.11':
dependencies: dependencies:
undici-types: 6.21.0 undici-types: 6.21.0
'@types/node@25.2.3': '@types/node@25.3.0':
dependencies: dependencies:
undici-types: 7.16.0 undici-types: 7.18.2
'@types/parse5@6.0.3': {} '@types/parse5@6.0.3': {}
@@ -6904,18 +6904,18 @@ snapshots:
'@types/s3rver@3.7.4': '@types/s3rver@3.7.4':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/semver@7.7.1': {} '@types/semver@7.7.1': {}
'@types/send@1.2.1': '@types/send@1.2.1':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/serve-static@2.2.0': '@types/serve-static@2.2.0':
dependencies: dependencies:
'@types/http-errors': 2.0.5 '@types/http-errors': 2.0.5
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/sinon-chai@3.2.12': '@types/sinon-chai@3.2.12':
dependencies: dependencies:
@@ -6934,11 +6934,11 @@ snapshots:
'@types/tar-stream@3.1.4': '@types/tar-stream@3.1.4':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/through2@2.0.41': '@types/through2@2.0.41':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/triple-beam@1.3.5': {} '@types/triple-beam@1.3.5': {}
@@ -6966,11 +6966,11 @@ snapshots:
'@types/ws@7.4.7': '@types/ws@7.4.7':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/ws@8.18.1': '@types/ws@8.18.1':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
'@types/yargs-parser@21.0.3': {} '@types/yargs-parser@21.0.3': {}
@@ -6980,7 +6980,7 @@ snapshots:
'@types/yauzl@2.10.3': '@types/yauzl@2.10.3':
dependencies: dependencies:
'@types/node': 25.2.3 '@types/node': 25.3.0
optional: true optional: true
'@ungap/structured-clone@1.3.0': {} '@ungap/structured-clone@1.3.0': {}
@@ -7585,7 +7585,7 @@ snapshots:
engine.io@6.6.4: engine.io@6.6.4:
dependencies: dependencies:
'@types/cors': 2.8.19 '@types/cors': 2.8.19
'@types/node': 25.2.3 '@types/node': 25.3.0
accepts: 1.3.8 accepts: 1.3.8
base64id: 2.0.0 base64id: 2.0.0
cookie: 0.7.2 cookie: 0.7.2
@@ -8299,7 +8299,7 @@ snapshots:
jest-util@29.7.0: jest-util@29.7.0:
dependencies: dependencies:
'@jest/types': 29.6.3 '@jest/types': 29.6.3
'@types/node': 25.2.3 '@types/node': 25.3.0
chalk: 4.1.2 chalk: 4.1.2
ci-info: 3.9.0 ci-info: 3.9.0
graceful-fs: 4.2.11 graceful-fs: 4.2.11
@@ -9807,7 +9807,7 @@ snapshots:
undici-types@6.21.0: {} undici-types@6.21.0: {}
undici-types@7.16.0: {} undici-types@7.18.2: {}
unified@11.0.5: unified@11.0.5:
dependencies: dependencies:

20
rust/Cargo.lock generated
View File

@@ -327,6 +327,16 @@ version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "libmimalloc-sys"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870"
dependencies = [
"cc",
"libc",
]
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.14" version = "0.4.14"
@@ -348,6 +358,15 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "mimalloc"
version = "0.1.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8"
dependencies = [
"libmimalloc-sys",
]
[[package]] [[package]]
name = "mio" name = "mio"
version = "1.1.1" version = "1.1.1"
@@ -519,6 +538,7 @@ dependencies = [
"clap", "clap",
"env_logger", "env_logger",
"log", "log",
"mimalloc",
"remoteingress-core", "remoteingress-core",
"remoteingress-protocol", "remoteingress-protocol",
"rustls", "rustls",

View File

@@ -17,3 +17,4 @@ serde_json = "1"
log = "0.4" log = "0.4"
env_logger = "0.11" env_logger = "0.11"
rustls = { version = "0.23", default-features = false, features = ["ring"] } rustls = { version = "0.23", default-features = false, features = ["ring"] }
mimalloc = "0.1"

View File

@@ -1,3 +1,6 @@
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
use clap::Parser; use clap::Parser;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::sync::Arc; use std::sync::Arc;
@@ -164,10 +167,10 @@ async fn handle_request(
tokio::spawn(async move { tokio::spawn(async move {
while let Some(event) = event_rx.recv().await { while let Some(event) = event_rx.recv().await {
match &event { match &event {
HubEvent::EdgeConnected { edge_id } => { HubEvent::EdgeConnected { edge_id, peer_addr } => {
send_event( send_event(
"edgeConnected", "edgeConnected",
serde_json::json!({ "edgeId": edge_id }), serde_json::json!({ "edgeId": edge_id, "peerAddr": peer_addr }),
); );
} }
HubEvent::EdgeDisconnected { edge_id } => { HubEvent::EdgeDisconnected { edge_id } => {

View File

@@ -1,16 +1,29 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::net::{TcpListener, TcpStream}; use tokio::net::{TcpListener, TcpStream};
use tokio::sync::{mpsc, Mutex, RwLock}; use tokio::sync::{mpsc, Mutex, Notify, RwLock};
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio::time::{Instant, sleep_until};
use tokio_rustls::TlsConnector; use tokio_rustls::TlsConnector;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use remoteingress_protocol::*; use remoteingress_protocol::*;
/// Per-stream state tracked in the edge's client_writers map.
struct EdgeStreamState {
/// Channel to deliver FRAME_DATA_BACK payloads to the hub_to_client task.
back_tx: mpsc::Sender<Vec<u8>>,
/// Send window for FRAME_DATA (upload direction).
/// Decremented by the client reader, incremented by FRAME_WINDOW_UPDATE_BACK from hub.
send_window: Arc<AtomicU32>,
/// Notifier to wake the client reader when the window opens.
window_notify: Arc<Notify>,
}
/// Edge configuration (hub-host + credentials only; ports come from hub). /// Edge configuration (hub-host + credentials only; ports come from hub).
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@@ -181,6 +194,14 @@ async fn edge_main_loop(
let mut backoff_ms: u64 = 1000; let mut backoff_ms: u64 = 1000;
let max_backoff_ms: u64 = 30000; let max_backoff_ms: u64 = 30000;
// Build TLS config ONCE outside the reconnect loop — preserves session
// cache across reconnections for TLS session resumption (saves 1 RTT).
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
loop { loop {
// Create a per-connection child token // Create a per-connection child token
let connection_token = cancel_token.child_token(); let connection_token = cancel_token.child_token();
@@ -196,12 +217,20 @@ async fn edge_main_loop(
&listen_ports, &listen_ports,
&mut shutdown_rx, &mut shutdown_rx,
&connection_token, &connection_token,
&connector,
) )
.await; .await;
// Cancel connection token to kill all orphaned tasks from this cycle // Cancel connection token to kill all orphaned tasks from this cycle
connection_token.cancel(); connection_token.cancel();
// Reset backoff after a successful connection for fast reconnect
let was_connected = *connected.read().await;
if was_connected {
backoff_ms = 1000;
log::info!("Was connected; resetting backoff to {}ms for fast reconnect", backoff_ms);
}
*connected.write().await = false; *connected.write().await = false;
let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected); let _ = event_tx.try_send(EdgeEvent::TunnelDisconnected);
active_streams.store(0, Ordering::Relaxed); active_streams.store(0, Ordering::Relaxed);
@@ -214,7 +243,7 @@ async fn edge_main_loop(
EdgeLoopResult::Reconnect => { EdgeLoopResult::Reconnect => {
log::info!("Reconnecting in {}ms...", backoff_ms); log::info!("Reconnecting in {}ms...", backoff_ms);
tokio::select! { tokio::select! {
_ = tokio::time::sleep(std::time::Duration::from_millis(backoff_ms)) => {} _ = tokio::time::sleep(Duration::from_millis(backoff_ms)) => {}
_ = cancel_token.cancelled() => break, _ = cancel_token.cancelled() => break,
_ = shutdown_rx.recv() => break, _ = shutdown_rx.recv() => break,
} }
@@ -239,18 +268,16 @@ async fn connect_to_hub_and_run(
listen_ports: &Arc<RwLock<Vec<u16>>>, listen_ports: &Arc<RwLock<Vec<u16>>>,
shutdown_rx: &mut mpsc::Receiver<()>, shutdown_rx: &mut mpsc::Receiver<()>,
connection_token: &CancellationToken, connection_token: &CancellationToken,
connector: &TlsConnector,
) -> EdgeLoopResult { ) -> EdgeLoopResult {
// Build TLS connector that skips cert verification (auth is via secret)
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(NoCertVerifier))
.with_no_client_auth();
let connector = TlsConnector::from(Arc::new(tls_config));
let addr = format!("{}:{}", config.hub_host, config.hub_port); let addr = format!("{}:{}", config.hub_host, config.hub_port);
let tcp = match TcpStream::connect(&addr).await { let tcp = match TcpStream::connect(&addr).await {
Ok(s) => s, Ok(s) => {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
let _ = s.set_nodelay(true);
s
}
Err(e) => { Err(e) => {
log::error!("Failed to connect to hub at {}: {}", addr, e); log::error!("Failed to connect to hub at {}: {}", addr, e);
return EdgeLoopResult::Reconnect; return EdgeLoopResult::Reconnect;
@@ -336,25 +363,60 @@ async fn connect_to_hub_and_run(
_ = stun_token.cancelled() => break, _ = stun_token.cancelled() => break,
} }
tokio::select! { tokio::select! {
_ = tokio::time::sleep(std::time::Duration::from_secs(stun_interval)) => {} _ = tokio::time::sleep(Duration::from_secs(stun_interval)) => {}
_ = stun_token.cancelled() => break, _ = stun_token.cancelled() => break,
} }
} }
}); });
// Client socket map: stream_id -> sender for writing data back to client // Client socket map: stream_id -> per-stream state (back channel + flow control)
let client_writers: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>> = let client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>> =
Arc::new(Mutex::new(HashMap::new())); Arc::new(Mutex::new(HashMap::new()));
// Shared tunnel writer // QoS dual-channel tunnel writer: control frames (PONG/WINDOW_UPDATE/CLOSE/OPEN)
let tunnel_writer = Arc::new(Mutex::new(write_half)); // have priority over data frames (DATA). Prevents PING starvation under load.
let (tunnel_ctrl_tx, mut tunnel_ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
let (tunnel_data_tx, mut tunnel_data_rx) = mpsc::channel::<Vec<u8>>(4096);
// Legacy alias — control channel for PONG, CLOSE, WINDOW_UPDATE, OPEN
let tunnel_writer_tx = tunnel_ctrl_tx.clone();
let tw_token = connection_token.clone();
let tunnel_writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop {
tokio::select! {
biased; // control frames always take priority over data
ctrl = tunnel_ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
data = tunnel_data_rx.recv() => {
match data {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
_ = tw_token.cancelled() => break,
}
}
});
// Start TCP listeners for initial ports (hot-reloadable) // Start TCP listeners for initial ports (hot-reloadable)
let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new(); let mut port_listeners: HashMap<u16, JoinHandle<()>> = HashMap::new();
apply_port_config( apply_port_config(
&handshake.listen_ports, &handshake.listen_ports,
&mut port_listeners, &mut port_listeners,
&tunnel_writer, &tunnel_writer_tx,
&tunnel_data_tx,
&client_writers, &client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -362,6 +424,11 @@ async fn connect_to_hub_and_run(
connection_token, connection_token,
); );
// Heartbeat: liveness timeout detects silent hub failures
let liveness_timeout_dur = Duration::from_secs(45);
let mut last_activity = Instant::now();
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
// Read frames from hub // Read frames from hub
let mut frame_reader = FrameReader::new(buf_reader); let mut frame_reader = FrameReader::new(buf_reader);
let result = loop { let result = loop {
@@ -369,11 +436,35 @@ async fn connect_to_hub_and_run(
frame_result = frame_reader.next_frame() => { frame_result = frame_reader.next_frame() => {
match frame_result { match frame_result {
Ok(Some(frame)) => { Ok(Some(frame)) => {
// Reset liveness on any received frame
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type { match frame.frame_type {
FRAME_DATA_BACK => { FRAME_DATA_BACK => {
let writers = client_writers.lock().await; // Non-blocking dispatch to per-stream channel.
if let Some(tx) = writers.get(&frame.stream_id) { // With flow control, the sender should rarely exceed the channel capacity.
let _ = tx.send(frame.payload).await; let mut writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
if state.back_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} back-channel full, closing stream", frame.stream_id);
writers.remove(&frame.stream_id);
}
}
}
FRAME_WINDOW_UPDATE_BACK => {
// Hub consumed data — increase our send window for this stream (upload direction)
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let writers = client_writers.lock().await;
if let Some(state) = writers.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
} }
} }
FRAME_CLOSE_BACK => { FRAME_CLOSE_BACK => {
@@ -390,7 +481,8 @@ async fn connect_to_hub_and_run(
apply_port_config( apply_port_config(
&update.listen_ports, &update.listen_ports,
&mut port_listeners, &mut port_listeners,
&tunnel_writer, &tunnel_writer_tx,
&tunnel_data_tx,
&client_writers, &client_writers,
active_streams, active_streams,
next_stream_id, next_stream_id,
@@ -399,6 +491,14 @@ async fn connect_to_hub_and_run(
); );
} }
} }
FRAME_PING => {
let pong_frame = encode_frame(0, FRAME_PONG, &[]);
if tunnel_writer_tx.try_send(pong_frame).is_err() {
log::warn!("Failed to send PONG, writer channel full/closed");
break EdgeLoopResult::Reconnect;
}
log::trace!("Received PING from hub, sent PONG");
}
_ => { _ => {
log::warn!("Unexpected frame type {} from hub", frame.frame_type); log::warn!("Unexpected frame type {} from hub", frame.frame_type);
} }
@@ -414,6 +514,11 @@ async fn connect_to_hub_and_run(
} }
} }
} }
_ = &mut liveness_deadline => {
log::warn!("Hub liveness timeout (no frames for {}s), reconnecting",
liveness_timeout_dur.as_secs());
break EdgeLoopResult::Reconnect;
}
_ = connection_token.cancelled() => { _ = connection_token.cancelled() => {
log::info!("Connection cancelled"); log::info!("Connection cancelled");
break EdgeLoopResult::Shutdown; break EdgeLoopResult::Shutdown;
@@ -427,6 +532,7 @@ async fn connect_to_hub_and_run(
// Cancel connection token to propagate to all child tasks BEFORE aborting // Cancel connection token to propagate to all child tasks BEFORE aborting
connection_token.cancel(); connection_token.cancel();
stun_handle.abort(); stun_handle.abort();
tunnel_writer_handle.abort();
for (_, h) in port_listeners.drain() { for (_, h) in port_listeners.drain() {
h.abort(); h.abort();
} }
@@ -438,8 +544,9 @@ async fn connect_to_hub_and_run(
fn apply_port_config( fn apply_port_config(
new_ports: &[u16], new_ports: &[u16],
port_listeners: &mut HashMap<u16, JoinHandle<()>>, port_listeners: &mut HashMap<u16, JoinHandle<()>>,
tunnel_writer: &Arc<Mutex<tokio::io::WriteHalf<tokio_rustls::client::TlsStream<TcpStream>>>>, tunnel_ctrl_tx: &mpsc::Sender<Vec<u8>>,
client_writers: &Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>>, tunnel_data_tx: &mpsc::Sender<Vec<u8>>,
client_writers: &Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
active_streams: &Arc<AtomicU32>, active_streams: &Arc<AtomicU32>,
next_stream_id: &Arc<AtomicU32>, next_stream_id: &Arc<AtomicU32>,
edge_id: &str, edge_id: &str,
@@ -458,7 +565,8 @@ fn apply_port_config(
// Add new ports // Add new ports
for &port in new_set.difference(&old_set) { for &port in new_set.difference(&old_set) {
let tunnel_writer = tunnel_writer.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let next_stream_id = next_stream_id.clone(); let next_stream_id = next_stream_id.clone();
@@ -481,7 +589,8 @@ fn apply_port_config(
match accept_result { match accept_result {
Ok((client_stream, client_addr)) => { Ok((client_stream, client_addr)) => {
let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed); let stream_id = next_stream_id.fetch_add(1, Ordering::Relaxed);
let tunnel_writer = tunnel_writer.clone(); let tunnel_ctrl_tx = tunnel_ctrl_tx.clone();
let tunnel_data_tx = tunnel_data_tx.clone();
let client_writers = client_writers.clone(); let client_writers = client_writers.clone();
let active_streams = active_streams.clone(); let active_streams = active_streams.clone();
let edge_id = edge_id.clone(); let edge_id = edge_id.clone();
@@ -496,7 +605,8 @@ fn apply_port_config(
stream_id, stream_id,
port, port,
&edge_id, &edge_id,
tunnel_writer, tunnel_ctrl_tx,
tunnel_data_tx,
client_writers, client_writers,
client_token, client_token,
) )
@@ -526,8 +636,9 @@ async fn handle_client_connection(
stream_id: u32, stream_id: u32,
dest_port: u16, dest_port: u16,
edge_id: &str, edge_id: &str,
tunnel_writer: Arc<Mutex<tokio::io::WriteHalf<tokio_rustls::client::TlsStream<TcpStream>>>>, tunnel_ctrl_tx: mpsc::Sender<Vec<u8>>,
client_writers: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>>, tunnel_data_tx: mpsc::Sender<Vec<u8>>,
client_writers: Arc<Mutex<HashMap<u32, EdgeStreamState>>>,
client_token: CancellationToken, client_token: CancellationToken,
) { ) {
let client_ip = client_addr.ip().to_string(); let client_ip = client_addr.ip().to_string();
@@ -536,36 +647,52 @@ async fn handle_client_connection(
// Determine edge IP (use 0.0.0.0 as placeholder — hub doesn't use it for routing) // Determine edge IP (use 0.0.0.0 as placeholder — hub doesn't use it for routing)
let edge_ip = "0.0.0.0"; let edge_ip = "0.0.0.0";
// Send OPEN frame with PROXY v1 header // Send OPEN frame with PROXY v1 header via control channel
let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port); let proxy_header = build_proxy_v1_header(&client_ip, edge_ip, client_port, dest_port);
let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes()); let open_frame = encode_frame(stream_id, FRAME_OPEN, proxy_header.as_bytes());
{ if tunnel_ctrl_tx.send(open_frame).await.is_err() {
let mut w = tunnel_writer.lock().await; return;
if w.write_all(&open_frame).await.is_err() {
return;
}
} }
// Set up channel for data coming back from hub // Set up channel for data coming back from hub (capacity 16 is sufficient with flow control)
let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256); let (back_tx, mut back_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
let window_notify = Arc::new(Notify::new());
{ {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
writers.insert(stream_id, back_tx); writers.insert(stream_id, EdgeStreamState {
back_tx,
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
} }
let (mut client_read, mut client_write) = client_stream.into_split(); let (mut client_read, mut client_write) = client_stream.into_split();
// Task: hub -> client // Task: hub -> client (download direction)
// After writing to client TCP, send WINDOW_UPDATE to hub so it can send more
let hub_to_client_token = client_token.clone(); let hub_to_client_token = client_token.clone();
let hub_to_client = tokio::spawn(async move { let wu_tx = tunnel_ctrl_tx.clone();
let mut hub_to_client = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop { loop {
tokio::select! { tokio::select! {
data = back_rx.recv() => { data = back_rx.recv() => {
match data { match data {
Some(data) => { Some(data) => {
let len = data.len() as u32;
if client_write.write_all(&data).await.is_err() { if client_write.write_all(&data).await.is_err() {
break; break;
} }
// Track consumption for flow control
consumed_since_update += len;
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
if wu_tx.try_send(frame).is_ok() {
consumed_since_update = 0;
}
// If try_send fails, keep accumulating — retry on next threshold
}
} }
None => break, None => break,
} }
@@ -573,20 +700,52 @@ async fn handle_client_connection(
_ = hub_to_client_token.cancelled() => break, _ = hub_to_client_token.cancelled() => break,
} }
} }
// Send final window update for any remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE, consumed_since_update);
let _ = wu_tx.try_send(frame);
}
let _ = client_write.shutdown().await; let _ = client_write.shutdown().await;
}); });
// Task: client -> hub // Task: client -> hub (upload direction) with per-stream flow control
let mut buf = vec![0u8; 32768]; let mut buf = vec![0u8; 32768];
loop { loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = client_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} upload stalled (window empty for 120s)", stream_id);
break;
}
}
}
if client_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} upload: window still 0 after stall timeout, closing", stream_id);
break;
}
let max_read = w.min(buf.len());
tokio::select! { tokio::select! {
read_result = client_read.read(&mut buf) => { read_result = client_read.read(&mut buf[..max_read]) => {
match read_result { match read_result {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]); let data_frame = encode_frame(stream_id, FRAME_DATA, &buf[..n]);
let mut w = tunnel_writer.lock().await; if tunnel_data_tx.send(data_frame).await.is_err() {
if w.write_all(&data_frame).await.is_err() { log::warn!("Stream {} data channel closed, closing", stream_id);
break; break;
} }
} }
@@ -597,19 +756,29 @@ async fn handle_client_connection(
} }
} }
// Send CLOSE frame (only if not cancelled) // Wait for the download task (hub → client) to finish BEFORE sending CLOSE.
// Upload EOF (client done sending) does NOT mean the response is done.
// For asymmetric transfers like git fetch (small request, large response),
// the response is still streaming when the upload finishes.
// Sending CLOSE before the response finishes would cause the hub to cancel
// the upstream reader mid-response, truncating the data.
let _ = tokio::time::timeout(
Duration::from_secs(300), // 5 min max wait for download to finish
&mut hub_to_client,
).await;
// NOW send CLOSE — the response has been fully delivered (or timed out).
if !client_token.is_cancelled() { if !client_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE, &[]);
let mut w = tunnel_writer.lock().await; let _ = tunnel_data_tx.send(close_frame).await;
let _ = w.write_all(&close_frame).await;
} }
// Cleanup // Clean up
{ {
let mut writers = client_writers.lock().await; let mut writers = client_writers.lock().await;
writers.remove(&stream_id); writers.remove(&stream_id);
} }
hub_to_client.abort(); hub_to_client.abort(); // No-op if already finished; safety net if timeout fired
let _ = edge_id; // used for logging context let _ = edge_id; // used for logging context
} }

View File

@@ -1,23 +1,39 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::time::Duration;
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::net::{TcpListener, TcpStream}; use tokio::net::{TcpListener, TcpStream};
use tokio::sync::{mpsc, Mutex, RwLock}; use tokio::sync::{mpsc, Mutex, Notify, RwLock, Semaphore};
use tokio::time::{interval, sleep_until, Instant};
use tokio_rustls::TlsAcceptor; use tokio_rustls::TlsAcceptor;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use remoteingress_protocol::*; use remoteingress_protocol::*;
/// Per-stream state tracked in the hub's stream map.
struct HubStreamState {
/// Channel to deliver FRAME_DATA payloads to the upstream writer task.
data_tx: mpsc::Sender<Vec<u8>>,
/// Cancellation token for this stream.
cancel_token: CancellationToken,
/// Send window for FRAME_DATA_BACK (download direction).
/// Decremented by the upstream reader, incremented by FRAME_WINDOW_UPDATE from edge.
send_window: Arc<AtomicU32>,
/// Notifier to wake the upstream reader when the window opens.
window_notify: Arc<Notify>,
}
/// Hub configuration. /// Hub configuration.
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct HubConfig { pub struct HubConfig {
pub tunnel_port: u16, pub tunnel_port: u16,
pub target_host: Option<String>, pub target_host: Option<String>,
#[serde(skip)] #[serde(default)]
pub tls_cert_pem: Option<String>, pub tls_cert_pem: Option<String>,
#[serde(skip)] #[serde(default)]
pub tls_key_pem: Option<String>, pub tls_key_pem: Option<String>,
} }
@@ -65,6 +81,7 @@ pub struct ConnectedEdgeStatus {
pub edge_id: String, pub edge_id: String,
pub connected_at: u64, pub connected_at: u64,
pub active_streams: usize, pub active_streams: usize,
pub peer_addr: String,
} }
/// Events emitted by the hub. /// Events emitted by the hub.
@@ -73,7 +90,7 @@ pub struct ConnectedEdgeStatus {
#[serde(tag = "type")] #[serde(tag = "type")]
pub enum HubEvent { pub enum HubEvent {
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
EdgeConnected { edge_id: String }, EdgeConnected { edge_id: String, peer_addr: String },
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
EdgeDisconnected { edge_id: String }, EdgeDisconnected { edge_id: String },
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@@ -105,7 +122,8 @@ pub struct TunnelHub {
struct ConnectedEdgeInfo { struct ConnectedEdgeInfo {
connected_at: u64, connected_at: u64,
active_streams: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>>, peer_addr: String,
active_streams: Arc<Mutex<HashMap<u32, HubStreamState>>>,
config_tx: mpsc::Sender<EdgeConfigUpdate>, config_tx: mpsc::Sender<EdgeConfigUpdate>,
#[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed #[allow(dead_code)] // kept alive for Drop — cancels child tokens when edge is removed
cancel_token: CancellationToken, cancel_token: CancellationToken,
@@ -176,6 +194,7 @@ impl TunnelHub {
edge_id: id.clone(), edge_id: id.clone(),
connected_at: info.connected_at, connected_at: info.connected_at,
active_streams: streams.len(), active_streams: streams.len(),
peer_addr: info.peer_addr.clone(),
}); });
} }
@@ -218,9 +237,10 @@ impl TunnelHub {
let event_tx = event_tx.clone(); let event_tx = event_tx.clone();
let target = target_host.clone(); let target = target_host.clone();
let edge_token = hub_token.child_token(); let edge_token = hub_token.child_token();
let peer_addr = addr.ip().to_string();
tokio::spawn(async move { tokio::spawn(async move {
if let Err(e) = handle_edge_connection( if let Err(e) = handle_edge_connection(
stream, acceptor, allowed, connected, event_tx, target, edge_token, stream, acceptor, allowed, connected, event_tx, target, edge_token, peer_addr,
).await { ).await {
log::error!("Edge connection error: {}", e); log::error!("Edge connection error: {}", e);
} }
@@ -264,6 +284,9 @@ impl Drop for TunnelHub {
} }
} }
/// Maximum concurrent streams per edge connection.
const MAX_STREAMS_PER_EDGE: usize = 1024;
/// Handle a single edge connection: authenticate, then enter frame loop. /// Handle a single edge connection: authenticate, then enter frame loop.
async fn handle_edge_connection( async fn handle_edge_connection(
stream: TcpStream, stream: TcpStream,
@@ -273,7 +296,10 @@ async fn handle_edge_connection(
event_tx: mpsc::Sender<HubEvent>, event_tx: mpsc::Sender<HubEvent>,
target_host: String, target_host: String,
edge_token: CancellationToken, edge_token: CancellationToken,
peer_addr: String,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Disable Nagle's algorithm for low-latency control frames (PING/PONG, WINDOW_UPDATE)
stream.set_nodelay(true)?;
let tls_stream = acceptor.accept(stream).await?; let tls_stream = acceptor.accept(stream).await?;
let (read_half, mut write_half) = tokio::io::split(tls_stream); let (read_half, mut write_half) = tokio::io::split(tls_stream);
let mut buf_reader = BufReader::new(read_half); let mut buf_reader = BufReader::new(read_half);
@@ -307,9 +333,10 @@ async fn handle_edge_connection(
} }
}; };
log::info!("Edge {} authenticated", edge_id); log::info!("Edge {} authenticated from {}", edge_id, peer_addr);
let _ = event_tx.try_send(HubEvent::EdgeConnected { let _ = event_tx.try_send(HubEvent::EdgeConnected {
edge_id: edge_id.clone(), edge_id: edge_id.clone(),
peer_addr: peer_addr.clone(),
}); });
// Send handshake response with initial config before frame protocol begins // Send handshake response with initial config before frame protocol begins
@@ -322,7 +349,7 @@ async fn handle_edge_connection(
write_half.write_all(handshake_json.as_bytes()).await?; write_half.write_all(handshake_json.as_bytes()).await?;
// Track this edge // Track this edge
let streams: Arc<Mutex<HashMap<u32, mpsc::Sender<Vec<u8>>>>> = let streams: Arc<Mutex<HashMap<u32, HubStreamState>>> =
Arc::new(Mutex::new(HashMap::new())); Arc::new(Mutex::new(HashMap::new()));
let now = std::time::SystemTime::now() let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
@@ -338,6 +365,7 @@ async fn handle_edge_connection(
edge_id.clone(), edge_id.clone(),
ConnectedEdgeInfo { ConnectedEdgeInfo {
connected_at: now, connected_at: now,
peer_addr,
active_streams: streams.clone(), active_streams: streams.clone(),
config_tx, config_tx,
cancel_token: edge_token.clone(), cancel_token: edge_token.clone(),
@@ -345,11 +373,45 @@ async fn handle_edge_connection(
); );
} }
// Shared writer for sending frames back to edge // QoS dual-channel tunnel writer: control frames (PING/PONG/WINDOW_UPDATE/CLOSE)
let write_half = Arc::new(Mutex::new(write_half)); // have priority over data frames (DATA_BACK). This prevents PING starvation under load.
let (ctrl_tx, mut ctrl_rx) = mpsc::channel::<Vec<u8>>(64);
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(4096);
// Legacy alias for code that sends both control and data (will be migrated)
let frame_writer_tx = ctrl_tx.clone();
let writer_token = edge_token.clone();
let writer_handle = tokio::spawn(async move {
// BufWriter coalesces small writes (frame headers, control frames) into fewer
// TLS records and syscalls. Flushed after each frame to avoid holding data.
let mut writer = tokio::io::BufWriter::with_capacity(65536, write_half);
loop {
tokio::select! {
biased; // control frames always take priority over data
ctrl = ctrl_rx.recv() => {
match ctrl {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
data = data_rx.recv() => {
match data {
Some(frame_data) => {
if writer.write_all(&frame_data).await.is_err() { break; }
if writer.flush().await.is_err() { break; }
}
None => break,
}
}
_ = writer_token.cancelled() => break,
}
}
});
// Spawn task to forward config updates as FRAME_CONFIG frames // Spawn task to forward config updates as FRAME_CONFIG frames
let config_writer = write_half.clone(); let config_writer_tx = frame_writer_tx.clone();
let config_edge_id = edge_id.clone(); let config_edge_id = edge_id.clone();
let config_token = edge_token.clone(); let config_token = edge_token.clone();
let config_handle = tokio::spawn(async move { let config_handle = tokio::spawn(async move {
@@ -360,8 +422,7 @@ async fn handle_edge_connection(
Some(update) => { Some(update) => {
if let Ok(payload) = serde_json::to_vec(&update) { if let Ok(payload) = serde_json::to_vec(&update) {
let frame = encode_frame(0, FRAME_CONFIG, &payload); let frame = encode_frame(0, FRAME_CONFIG, &payload);
let mut w = config_writer.lock().await; if config_writer_tx.send(frame).await.is_err() {
if w.write_all(&frame).await.is_err() {
log::error!("Failed to send config update to edge {}", config_edge_id); log::error!("Failed to send config update to edge {}", config_edge_id);
break; break;
} }
@@ -376,6 +437,17 @@ async fn handle_edge_connection(
} }
}); });
// A4: Semaphore to limit concurrent streams per edge
let stream_semaphore = Arc::new(Semaphore::new(MAX_STREAMS_PER_EDGE));
// Heartbeat: periodic PING and liveness timeout
let ping_interval_dur = Duration::from_secs(15);
let liveness_timeout_dur = Duration::from_secs(45);
let mut ping_ticker = interval(ping_interval_dur);
ping_ticker.tick().await; // consume the immediate first tick
let mut last_activity = Instant::now();
let mut liveness_deadline = Box::pin(sleep_until(last_activity + liveness_timeout_dur));
// Frame reading loop // Frame reading loop
let mut frame_reader = FrameReader::new(buf_reader); let mut frame_reader = FrameReader::new(buf_reader);
@@ -384,8 +456,24 @@ async fn handle_edge_connection(
frame_result = frame_reader.next_frame() => { frame_result = frame_reader.next_frame() => {
match frame_result { match frame_result {
Ok(Some(frame)) => { Ok(Some(frame)) => {
// Reset liveness on any received frame
last_activity = Instant::now();
liveness_deadline.as_mut().reset(last_activity + liveness_timeout_dur);
match frame.frame_type { match frame.frame_type {
FRAME_OPEN => { FRAME_OPEN => {
// A4: Check stream limit before processing
let permit = match stream_semaphore.clone().try_acquire_owned() {
Ok(p) => p,
Err(_) => {
log::warn!("Edge {} exceeded max streams ({}), rejecting stream {}",
edge_id, MAX_STREAMS_PER_EDGE, frame.stream_id);
let close_frame = encode_frame(frame.stream_id, FRAME_CLOSE_BACK, &[]);
let _ = frame_writer_tx.try_send(close_frame);
continue;
}
};
// Payload is PROXY v1 header line // Payload is PROXY v1 header line
let proxy_header = String::from_utf8_lossy(&frame.payload).to_string(); let proxy_header = String::from_utf8_lossy(&frame.payload).to_string();
@@ -396,7 +484,8 @@ async fn handle_edge_connection(
let edge_id_clone = edge_id.clone(); let edge_id_clone = edge_id.clone();
let event_tx_clone = event_tx.clone(); let event_tx_clone = event_tx.clone();
let streams_clone = streams.clone(); let streams_clone = streams.clone();
let writer_clone = write_half.clone(); let writer_tx = ctrl_tx.clone(); // control: CLOSE_BACK, WINDOW_UPDATE_BACK
let data_writer_tx = data_tx.clone(); // data: DATA_BACK
let target = target_host.clone(); let target = target_host.clone();
let stream_token = edge_token.child_token(); let stream_token = edge_token.child_token();
@@ -405,33 +494,78 @@ async fn handle_edge_connection(
stream_id, stream_id,
}); });
// Create channel for data from edge to this stream // Create channel for data from edge to this stream (capacity 16 is sufficient with flow control)
let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256); let (data_tx, mut data_rx) = mpsc::channel::<Vec<u8>>(256);
let send_window = Arc::new(AtomicU32::new(INITIAL_STREAM_WINDOW));
let window_notify = Arc::new(Notify::new());
{ {
let mut s = streams.lock().await; let mut s = streams.lock().await;
s.insert(stream_id, data_tx); s.insert(stream_id, HubStreamState {
data_tx,
cancel_token: stream_token.clone(),
send_window: Arc::clone(&send_window),
window_notify: Arc::clone(&window_notify),
});
} }
// Spawn task: connect to SmartProxy, send PROXY header, pipe data // Spawn task: connect to SmartProxy, send PROXY header, pipe data
tokio::spawn(async move { tokio::spawn(async move {
let _permit = permit; // hold semaphore permit until stream completes
let result = async { let result = async {
let mut upstream = // A2: Connect to SmartProxy with timeout
TcpStream::connect((target.as_str(), dest_port)).await?; let mut upstream = tokio::time::timeout(
Duration::from_secs(10),
TcpStream::connect((target.as_str(), dest_port)),
)
.await
.map_err(|_| -> Box<dyn std::error::Error + Send + Sync> {
format!("connect to SmartProxy {}:{} timed out (10s)", target, dest_port).into()
})??;
upstream.set_nodelay(true)?;
upstream.write_all(proxy_header.as_bytes()).await?; upstream.write_all(proxy_header.as_bytes()).await?;
let (mut up_read, mut up_write) = let (mut up_read, mut up_write) =
upstream.into_split(); upstream.into_split();
// Forward data from edge (via channel) to SmartProxy // Forward data from edge (via channel) to SmartProxy
// After writing to upstream, send WINDOW_UPDATE_BACK to edge
let writer_token = stream_token.clone(); let writer_token = stream_token.clone();
let wub_tx = writer_tx.clone();
let writer_for_edge_data = tokio::spawn(async move { let writer_for_edge_data = tokio::spawn(async move {
let mut consumed_since_update: u32 = 0;
loop { loop {
tokio::select! { tokio::select! {
data = data_rx.recv() => { data = data_rx.recv() => {
match data { match data {
Some(data) => { Some(data) => {
if up_write.write_all(&data).await.is_err() { let len = data.len() as u32;
break; // Check cancellation alongside the write so we respond
// promptly to FRAME_CLOSE instead of blocking up to 60s.
let write_result = tokio::select! {
r = tokio::time::timeout(
Duration::from_secs(60),
up_write.write_all(&data),
) => r,
_ = writer_token.cancelled() => break,
};
match write_result {
Ok(Ok(())) => {}
Ok(Err(_)) => break,
Err(_) => {
log::warn!("Stream {} write to upstream timed out (60s)", stream_id);
break;
}
}
// Track consumption for flow control
consumed_since_update += len;
if consumed_since_update >= WINDOW_UPDATE_THRESHOLD {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
if wub_tx.try_send(frame).is_ok() {
consumed_since_update = 0;
}
// If try_send fails, keep accumulating — retry on next threshold
} }
} }
None => break, None => break,
@@ -440,21 +574,54 @@ async fn handle_edge_connection(
_ = writer_token.cancelled() => break, _ = writer_token.cancelled() => break,
} }
} }
// Send final window update for remaining consumed bytes
if consumed_since_update > 0 {
let frame = encode_window_update(stream_id, FRAME_WINDOW_UPDATE_BACK, consumed_since_update);
let _ = wub_tx.try_send(frame);
}
let _ = up_write.shutdown().await; let _ = up_write.shutdown().await;
}); });
// Forward data from SmartProxy back to edge // Forward data from SmartProxy back to edge via writer channel
// with per-stream flow control (check send_window before reading)
let mut buf = vec![0u8; 32768]; let mut buf = vec![0u8; 32768];
loop { loop {
// Wait for send window to have capacity (with stall timeout)
loop {
let w = send_window.load(Ordering::Acquire);
if w > 0 { break; }
tokio::select! {
_ = window_notify.notified() => continue,
_ = stream_token.cancelled() => break,
_ = tokio::time::sleep(Duration::from_secs(120)) => {
log::warn!("Stream {} download stalled (window empty for 120s)", stream_id);
break;
}
}
}
if stream_token.is_cancelled() { break; }
// Limit read size to available window.
// IMPORTANT: if window is 0 (stall timeout fired), we must NOT
// read into an empty buffer — read(&mut buf[..0]) returns Ok(0)
// which would be falsely interpreted as EOF.
let w = send_window.load(Ordering::Acquire) as usize;
if w == 0 {
log::warn!("Stream {} download: window still 0 after stall timeout, closing", stream_id);
break;
}
let max_read = w.min(buf.len());
tokio::select! { tokio::select! {
read_result = up_read.read(&mut buf) => { read_result = up_read.read(&mut buf[..max_read]) => {
match read_result { match read_result {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
send_window.fetch_sub(n as u32, Ordering::Release);
let frame = let frame =
encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]); encode_frame(stream_id, FRAME_DATA_BACK, &buf[..n]);
let mut w = writer_clone.lock().await; if data_writer_tx.send(frame).await.is_err() {
if w.write_all(&frame).await.is_err() { log::warn!("Stream {} data channel closed, closing", stream_id);
break; break;
} }
} }
@@ -465,11 +632,11 @@ async fn handle_edge_connection(
} }
} }
// Send CLOSE_BACK to edge (only if not cancelled) // Send CLOSE_BACK via DATA channel (must arrive AFTER last DATA_BACK).
// Use send().await to guarantee delivery (try_send silently drops if full).
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let mut w = writer_clone.lock().await; let _ = data_writer_tx.send(close_frame).await;
let _ = w.write_all(&close_frame).await;
} }
writer_for_edge_data.abort(); writer_for_edge_data.abort();
@@ -479,34 +646,67 @@ async fn handle_edge_connection(
if let Err(e) = result { if let Err(e) = result {
log::error!("Stream {} error: {}", stream_id, e); log::error!("Stream {} error: {}", stream_id, e);
// Send CLOSE_BACK on error (only if not cancelled) // Send CLOSE_BACK via DATA channel on error (must arrive after any DATA_BACK).
// Use send().await to guarantee delivery.
if !stream_token.is_cancelled() { if !stream_token.is_cancelled() {
let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]); let close_frame = encode_frame(stream_id, FRAME_CLOSE_BACK, &[]);
let mut w = writer_clone.lock().await; let _ = data_writer_tx.send(close_frame).await;
let _ = w.write_all(&close_frame).await;
} }
} }
// Clean up stream // Clean up stream (guard against duplicate if FRAME_CLOSE already removed it)
{ let was_present = {
let mut s = streams_clone.lock().await; let mut s = streams_clone.lock().await;
s.remove(&stream_id); s.remove(&stream_id).is_some()
};
if was_present {
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
edge_id: edge_id_clone,
stream_id,
});
} }
let _ = event_tx_clone.try_send(HubEvent::StreamClosed {
edge_id: edge_id_clone,
stream_id,
});
}); });
} }
FRAME_DATA => { FRAME_DATA => {
let s = streams.lock().await; // Non-blocking dispatch to per-stream channel.
if let Some(tx) = s.get(&frame.stream_id) { // With flow control, the sender should rarely exceed the channel capacity.
let _ = tx.send(frame.payload).await; let mut s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
if state.data_tx.try_send(frame.payload).is_err() {
log::warn!("Stream {} data channel full, closing stream", frame.stream_id);
if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
}
}
}
}
FRAME_WINDOW_UPDATE => {
// Edge consumed data — increase our send window for this stream
if let Some(increment) = decode_window_update(&frame.payload) {
if increment > 0 {
let s = streams.lock().await;
if let Some(state) = s.get(&frame.stream_id) {
let prev = state.send_window.fetch_add(increment, Ordering::Release);
if prev + increment > MAX_WINDOW_SIZE {
state.send_window.store(MAX_WINDOW_SIZE, Ordering::Release);
}
state.window_notify.notify_one();
}
}
} }
} }
FRAME_CLOSE => { FRAME_CLOSE => {
let mut s = streams.lock().await; let mut s = streams.lock().await;
s.remove(&frame.stream_id); if let Some(state) = s.remove(&frame.stream_id) {
state.cancel_token.cancel();
let _ = event_tx.try_send(HubEvent::StreamClosed {
edge_id: edge_id.clone(),
stream_id: frame.stream_id,
});
}
}
FRAME_PONG => {
log::debug!("Received PONG from edge {}", edge_id);
} }
_ => { _ => {
log::warn!("Unexpected frame type {} from edge", frame.frame_type); log::warn!("Unexpected frame type {} from edge", frame.frame_type);
@@ -523,6 +723,19 @@ async fn handle_edge_connection(
} }
} }
} }
_ = ping_ticker.tick() => {
let ping_frame = encode_frame(0, FRAME_PING, &[]);
if frame_writer_tx.try_send(ping_frame).is_err() {
log::warn!("Failed to send PING to edge {}, writer channel full/closed", edge_id);
break;
}
log::trace!("Sent PING to edge {}", edge_id);
}
_ = &mut liveness_deadline => {
log::warn!("Edge {} liveness timeout (no frames for {}s), disconnecting",
edge_id, liveness_timeout_dur.as_secs());
break;
}
_ = edge_token.cancelled() => { _ = edge_token.cancelled() => {
log::info!("Edge {} cancelled by hub", edge_id); log::info!("Edge {} cancelled by hub", edge_id);
break; break;
@@ -533,6 +746,7 @@ async fn handle_edge_connection(
// Cleanup: cancel edge token to propagate to all child tasks // Cleanup: cancel edge token to propagate to all child tasks
edge_token.cancel(); edge_token.cancel();
config_handle.abort(); config_handle.abort();
writer_handle.abort();
{ {
let mut edges = connected.lock().await; let mut edges = connected.lock().await;
edges.remove(&edge_id); edges.remove(&edge_id);
@@ -749,10 +963,12 @@ mod tests {
fn test_hub_event_edge_connected_serialize() { fn test_hub_event_edge_connected_serialize() {
let event = HubEvent::EdgeConnected { let event = HubEvent::EdgeConnected {
edge_id: "edge-1".to_string(), edge_id: "edge-1".to_string(),
peer_addr: "203.0.113.5".to_string(),
}; };
let json = serde_json::to_value(&event).unwrap(); let json = serde_json::to_value(&event).unwrap();
assert_eq!(json["type"], "edgeConnected"); assert_eq!(json["type"], "edgeConnected");
assert_eq!(json["edgeId"], "edge-1"); assert_eq!(json["edgeId"], "edge-1");
assert_eq!(json["peerAddr"], "203.0.113.5");
} }
#[test] #[test]

View File

@@ -7,6 +7,10 @@ pub const FRAME_CLOSE: u8 = 0x03;
pub const FRAME_DATA_BACK: u8 = 0x04; pub const FRAME_DATA_BACK: u8 = 0x04;
pub const FRAME_CLOSE_BACK: u8 = 0x05; pub const FRAME_CLOSE_BACK: u8 = 0x05;
pub const FRAME_CONFIG: u8 = 0x06; // Hub -> Edge: configuration update pub const FRAME_CONFIG: u8 = 0x06; // Hub -> Edge: configuration update
pub const FRAME_PING: u8 = 0x07; // Hub -> Edge: heartbeat probe
pub const FRAME_PONG: u8 = 0x08; // Edge -> Hub: heartbeat response
pub const FRAME_WINDOW_UPDATE: u8 = 0x09; // Edge -> Hub: per-stream flow control
pub const FRAME_WINDOW_UPDATE_BACK: u8 = 0x0A; // Hub -> Edge: per-stream flow control
// Frame header size: 4 (stream_id) + 1 (type) + 4 (length) = 9 bytes // Frame header size: 4 (stream_id) + 1 (type) + 4 (length) = 9 bytes
pub const FRAME_HEADER_SIZE: usize = 9; pub const FRAME_HEADER_SIZE: usize = 9;
@@ -14,6 +18,28 @@ pub const FRAME_HEADER_SIZE: usize = 9;
// Maximum payload size (16 MB) // Maximum payload size (16 MB)
pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024; pub const MAX_PAYLOAD_SIZE: u32 = 16 * 1024 * 1024;
// Per-stream flow control constants
/// Initial per-stream window size (4 MB). Sized for full throughput at high RTT:
/// at 100ms RTT, this sustains ~40 MB/s per stream.
pub const INITIAL_STREAM_WINDOW: u32 = 4 * 1024 * 1024;
/// Send WINDOW_UPDATE after consuming this many bytes (half the initial window).
pub const WINDOW_UPDATE_THRESHOLD: u32 = INITIAL_STREAM_WINDOW / 2;
/// Maximum window size to prevent overflow.
pub const MAX_WINDOW_SIZE: u32 = 16 * 1024 * 1024;
/// Encode a WINDOW_UPDATE frame for a specific stream.
pub fn encode_window_update(stream_id: u32, frame_type: u8, increment: u32) -> Vec<u8> {
encode_frame(stream_id, frame_type, &increment.to_be_bytes())
}
/// Decode a WINDOW_UPDATE payload into a byte increment. Returns None if payload is malformed.
pub fn decode_window_update(payload: &[u8]) -> Option<u32> {
if payload.len() != 4 {
return None;
}
Some(u32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]))
}
/// A single multiplexed frame. /// A single multiplexed frame.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Frame { pub struct Frame {
@@ -261,6 +287,8 @@ mod tests {
FRAME_DATA_BACK, FRAME_DATA_BACK,
FRAME_CLOSE_BACK, FRAME_CLOSE_BACK,
FRAME_CONFIG, FRAME_CONFIG,
FRAME_PING,
FRAME_PONG,
]; ];
let mut data = Vec::new(); let mut data = Vec::new();
@@ -293,4 +321,19 @@ mod tests {
assert_eq!(frame.frame_type, FRAME_CLOSE); assert_eq!(frame.frame_type, FRAME_CLOSE);
assert!(frame.payload.is_empty()); assert!(frame.payload.is_empty());
} }
#[test]
fn test_encode_frame_ping_pong() {
// PING: stream_id=0, empty payload (control frame)
let ping = encode_frame(0, FRAME_PING, &[]);
assert_eq!(ping[4], FRAME_PING);
assert_eq!(&ping[0..4], &0u32.to_be_bytes());
assert_eq!(ping.len(), FRAME_HEADER_SIZE);
// PONG: stream_id=0, empty payload (control frame)
let pong = encode_frame(0, FRAME_PONG, &[]);
assert_eq!(pong[4], FRAME_PONG);
assert_eq!(&pong[0..4], &0u32.to_be_bytes());
assert_eq!(pong.len(), FRAME_HEADER_SIZE);
}
} }

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@serve.zone/remoteingress', name: '@serve.zone/remoteingress',
version: '4.0.0', version: '4.5.10',
description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.' description: 'Edge ingress tunnel for DcRouter - accepts incoming TCP connections at network edge and tunnels them to DcRouter SmartProxy preserving client IP via PROXY protocol v1.'
} }

View File

@@ -40,9 +40,16 @@ export interface IEdgeConfig {
secret: string; secret: string;
} }
const MAX_RESTART_ATTEMPTS = 10;
const MAX_RESTART_BACKOFF_MS = 30_000;
export class RemoteIngressEdge extends EventEmitter { export class RemoteIngressEdge extends EventEmitter {
private bridge: InstanceType<typeof plugins.smartrust.RustBridge<TEdgeCommands>>; private bridge: InstanceType<typeof plugins.smartrust.RustBridge<TEdgeCommands>>;
private started = false; private started = false;
private stopping = false;
private savedConfig: IEdgeConfig | null = null;
private restartBackoffMs = 1000;
private restartAttempts = 0;
private statusInterval: ReturnType<typeof setInterval> | undefined; private statusInterval: ReturnType<typeof setInterval> | undefined;
constructor() { constructor() {
@@ -109,11 +116,17 @@ export class RemoteIngressEdge extends EventEmitter {
edgeConfig = config; edgeConfig = config;
} }
this.savedConfig = edgeConfig;
this.stopping = false;
const spawned = await this.bridge.spawn(); const spawned = await this.bridge.spawn();
if (!spawned) { if (!spawned) {
throw new Error('Failed to spawn remoteingress-bin'); throw new Error('Failed to spawn remoteingress-bin');
} }
// Register crash recovery handler
this.bridge.on('exit', this.handleCrashRecovery);
await this.bridge.sendCommand('startEdge', { await this.bridge.sendCommand('startEdge', {
hubHost: edgeConfig.hubHost, hubHost: edgeConfig.hubHost,
hubPort: edgeConfig.hubPort ?? 8443, hubPort: edgeConfig.hubPort ?? 8443,
@@ -122,6 +135,8 @@ export class RemoteIngressEdge extends EventEmitter {
}); });
this.started = true; this.started = true;
this.restartAttempts = 0;
this.restartBackoffMs = 1000;
// Start periodic status logging // Start periodic status logging
this.statusInterval = setInterval(async () => { this.statusInterval = setInterval(async () => {
@@ -142,6 +157,7 @@ export class RemoteIngressEdge extends EventEmitter {
* Stop the edge and kill the Rust process. * Stop the edge and kill the Rust process.
*/ */
public async stop(): Promise<void> { public async stop(): Promise<void> {
this.stopping = true;
if (this.statusInterval) { if (this.statusInterval) {
clearInterval(this.statusInterval); clearInterval(this.statusInterval);
this.statusInterval = undefined; this.statusInterval = undefined;
@@ -152,6 +168,7 @@ export class RemoteIngressEdge extends EventEmitter {
} catch { } catch {
// Process may already be dead // Process may already be dead
} }
this.bridge.removeListener('exit', this.handleCrashRecovery);
this.bridge.kill(); this.bridge.kill();
this.started = false; this.started = false;
} }
@@ -170,4 +187,55 @@ export class RemoteIngressEdge extends EventEmitter {
public get running(): boolean { public get running(): boolean {
return this.bridge.running; return this.bridge.running;
} }
/**
* Handle unexpected Rust binary crash — auto-restart with backoff.
*/
private handleCrashRecovery = async (code: number | null, signal: string | null) => {
if (this.stopping || !this.started || !this.savedConfig) {
return;
}
console.error(
`[RemoteIngressEdge] Rust binary crashed (code=${code}, signal=${signal}), ` +
`attempt ${this.restartAttempts + 1}/${MAX_RESTART_ATTEMPTS}`
);
this.started = false;
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
console.error('[RemoteIngressEdge] Max restart attempts reached, giving up');
this.emit('crashRecoveryFailed');
return;
}
await new Promise(resolve => setTimeout(resolve, this.restartBackoffMs));
this.restartBackoffMs = Math.min(this.restartBackoffMs * 2, MAX_RESTART_BACKOFF_MS);
this.restartAttempts++;
try {
const spawned = await this.bridge.spawn();
if (!spawned) {
console.error('[RemoteIngressEdge] Failed to respawn binary');
return;
}
this.bridge.on('exit', this.handleCrashRecovery);
await this.bridge.sendCommand('startEdge', {
hubHost: this.savedConfig.hubHost,
hubPort: this.savedConfig.hubPort ?? 8443,
edgeId: this.savedConfig.edgeId,
secret: this.savedConfig.secret,
});
this.started = true;
this.restartAttempts = 0;
this.restartBackoffMs = 1000;
console.log('[RemoteIngressEdge] Successfully recovered from crash');
this.emit('crashRecovered');
} catch (err) {
console.error(`[RemoteIngressEdge] Crash recovery failed: ${err}`);
}
};
} }

View File

@@ -11,6 +11,8 @@ type THubCommands = {
params: { params: {
tunnelPort: number; tunnelPort: number;
targetHost?: string; targetHost?: string;
tlsCertPem?: string;
tlsKeyPem?: string;
}; };
result: { started: boolean }; result: { started: boolean };
}; };
@@ -33,6 +35,7 @@ type THubCommands = {
edgeId: string; edgeId: string;
connectedAt: number; connectedAt: number;
activeStreams: number; activeStreams: number;
peerAddr: string;
}>; }>;
}; };
}; };
@@ -41,11 +44,25 @@ type THubCommands = {
export interface IHubConfig { export interface IHubConfig {
tunnelPort?: number; tunnelPort?: number;
targetHost?: string; targetHost?: string;
tls?: {
certPem?: string;
keyPem?: string;
};
} }
type TAllowedEdge = { id: string; secret: string; listenPorts?: number[]; stunIntervalSecs?: number };
const MAX_RESTART_ATTEMPTS = 10;
const MAX_RESTART_BACKOFF_MS = 30_000;
export class RemoteIngressHub extends EventEmitter { export class RemoteIngressHub extends EventEmitter {
private bridge: InstanceType<typeof plugins.smartrust.RustBridge<THubCommands>>; private bridge: InstanceType<typeof plugins.smartrust.RustBridge<THubCommands>>;
private started = false; private started = false;
private stopping = false;
private savedConfig: IHubConfig | null = null;
private savedEdges: TAllowedEdge[] = [];
private restartBackoffMs = 1000;
private restartAttempts = 0;
constructor() { constructor() {
super(); super();
@@ -73,7 +90,7 @@ export class RemoteIngressHub extends EventEmitter {
}); });
// Forward events from Rust binary // Forward events from Rust binary
this.bridge.on('management:edgeConnected', (data: { edgeId: string }) => { this.bridge.on('management:edgeConnected', (data: { edgeId: string; peerAddr: string }) => {
this.emit('edgeConnected', data); this.emit('edgeConnected', data);
}); });
this.bridge.on('management:edgeDisconnected', (data: { edgeId: string }) => { this.bridge.on('management:edgeDisconnected', (data: { edgeId: string }) => {
@@ -91,29 +108,42 @@ export class RemoteIngressHub extends EventEmitter {
* Start the hub — spawns the Rust binary and starts the tunnel server. * Start the hub — spawns the Rust binary and starts the tunnel server.
*/ */
public async start(config: IHubConfig = {}): Promise<void> { public async start(config: IHubConfig = {}): Promise<void> {
this.savedConfig = config;
this.stopping = false;
const spawned = await this.bridge.spawn(); const spawned = await this.bridge.spawn();
if (!spawned) { if (!spawned) {
throw new Error('Failed to spawn remoteingress-bin'); throw new Error('Failed to spawn remoteingress-bin');
} }
// Register crash recovery handler
this.bridge.on('exit', this.handleCrashRecovery);
await this.bridge.sendCommand('startHub', { await this.bridge.sendCommand('startHub', {
tunnelPort: config.tunnelPort ?? 8443, tunnelPort: config.tunnelPort ?? 8443,
targetHost: config.targetHost ?? '127.0.0.1', targetHost: config.targetHost ?? '127.0.0.1',
...(config.tls?.certPem && config.tls?.keyPem
? { tlsCertPem: config.tls.certPem, tlsKeyPem: config.tls.keyPem }
: {}),
}); });
this.started = true; this.started = true;
this.restartAttempts = 0;
this.restartBackoffMs = 1000;
} }
/** /**
* Stop the hub and kill the Rust process. * Stop the hub and kill the Rust process.
*/ */
public async stop(): Promise<void> { public async stop(): Promise<void> {
this.stopping = true;
if (this.started) { if (this.started) {
try { try {
await this.bridge.sendCommand('stopHub', {} as Record<string, never>); await this.bridge.sendCommand('stopHub', {} as Record<string, never>);
} catch { } catch {
// Process may already be dead // Process may already be dead
} }
this.bridge.removeListener('exit', this.handleCrashRecovery);
this.bridge.kill(); this.bridge.kill();
this.started = false; this.started = false;
} }
@@ -122,7 +152,8 @@ export class RemoteIngressHub extends EventEmitter {
/** /**
* Update the list of allowed edges that can connect to this hub. * Update the list of allowed edges that can connect to this hub.
*/ */
public async updateAllowedEdges(edges: Array<{ id: string; secret: string; listenPorts?: number[]; stunIntervalSecs?: number }>): Promise<void> { public async updateAllowedEdges(edges: TAllowedEdge[]): Promise<void> {
this.savedEdges = edges;
await this.bridge.sendCommand('updateAllowedEdges', { edges }); await this.bridge.sendCommand('updateAllowedEdges', { edges });
} }
@@ -139,4 +170,62 @@ export class RemoteIngressHub extends EventEmitter {
public get running(): boolean { public get running(): boolean {
return this.bridge.running; return this.bridge.running;
} }
/**
* Handle unexpected Rust binary crash — auto-restart with backoff.
*/
private handleCrashRecovery = async (code: number | null, signal: string | null) => {
if (this.stopping || !this.started || !this.savedConfig) {
return;
}
console.error(
`[RemoteIngressHub] Rust binary crashed (code=${code}, signal=${signal}), ` +
`attempt ${this.restartAttempts + 1}/${MAX_RESTART_ATTEMPTS}`
);
this.started = false;
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
console.error('[RemoteIngressHub] Max restart attempts reached, giving up');
this.emit('crashRecoveryFailed');
return;
}
await new Promise(resolve => setTimeout(resolve, this.restartBackoffMs));
this.restartBackoffMs = Math.min(this.restartBackoffMs * 2, MAX_RESTART_BACKOFF_MS);
this.restartAttempts++;
try {
const spawned = await this.bridge.spawn();
if (!spawned) {
console.error('[RemoteIngressHub] Failed to respawn binary');
return;
}
this.bridge.on('exit', this.handleCrashRecovery);
const config = this.savedConfig;
await this.bridge.sendCommand('startHub', {
tunnelPort: config.tunnelPort ?? 8443,
targetHost: config.targetHost ?? '127.0.0.1',
...(config.tls?.certPem && config.tls?.keyPem
? { tlsCertPem: config.tls.certPem, tlsKeyPem: config.tls.keyPem }
: {}),
});
// Restore allowed edges
if (this.savedEdges.length > 0) {
await this.bridge.sendCommand('updateAllowedEdges', { edges: this.savedEdges });
}
this.started = true;
this.restartAttempts = 0;
this.restartBackoffMs = 1000;
console.log('[RemoteIngressHub] Successfully recovered from crash');
this.emit('crashRecovered');
} catch (err) {
console.error(`[RemoteIngressHub] Crash recovery failed: ${err}`);
}
};
} }