BREAKING CHANGE(core): replace the TypeScript database engine with a Rust-backed embedded server and bridge
This commit is contained in:
18
rust/crates/rustdb-wire/Cargo.toml
Normal file
18
rust/crates/rustdb-wire/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "rustdb-wire"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
description = "MongoDB-compatible wire protocol parser and encoder for RustDb"
|
||||
|
||||
[dependencies]
|
||||
bson = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
tokio-util = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
crc32fast = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true }
|
||||
49
rust/crates/rustdb-wire/src/codec.rs
Normal file
49
rust/crates/rustdb-wire/src/codec.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use bytes::{Buf, BytesMut};
|
||||
use tokio_util::codec::{Decoder, Encoder};
|
||||
|
||||
use crate::error::WireError;
|
||||
use crate::parser::{parse_message, ParsedCommand};
|
||||
|
||||
/// Tokio codec for framing wire protocol messages on a TCP stream.
|
||||
///
|
||||
/// The wire protocol is naturally length-prefixed:
|
||||
/// the first 4 bytes of each message contain the total message length.
|
||||
pub struct WireCodec;
|
||||
|
||||
impl Decoder for WireCodec {
|
||||
type Item = ParsedCommand;
|
||||
type Error = WireError;
|
||||
|
||||
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
|
||||
if src.len() < 4 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Peek at message length
|
||||
let msg_len = i32::from_le_bytes([src[0], src[1], src[2], src[3]]) as usize;
|
||||
|
||||
if src.len() < msg_len {
|
||||
// Reserve space for the rest of the message
|
||||
src.reserve(msg_len - src.len());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
match parse_message(src)? {
|
||||
Some((cmd, bytes_consumed)) => {
|
||||
src.advance(bytes_consumed);
|
||||
Ok(Some(cmd))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encoder for raw byte responses (already serialized by the command handlers).
|
||||
impl Encoder<Vec<u8>> for WireCodec {
|
||||
type Error = WireError;
|
||||
|
||||
fn encode(&mut self, item: Vec<u8>, dst: &mut BytesMut) -> Result<(), Self::Error> {
|
||||
dst.extend_from_slice(&item);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
142
rust/crates/rustdb-wire/src/encoder.rs
Normal file
142
rust/crates/rustdb-wire/src/encoder.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
use bson::Document;
|
||||
|
||||
use crate::opcodes::*;
|
||||
|
||||
/// Encode an OP_MSG response.
|
||||
pub fn encode_op_msg_response(
|
||||
response_to: i32,
|
||||
response: &Document,
|
||||
request_id: i32,
|
||||
) -> Vec<u8> {
|
||||
let body_bson = bson::to_vec(response).expect("failed to serialize BSON response");
|
||||
|
||||
// Header (16) + flagBits (4) + section type (1) + body BSON
|
||||
let message_length = 16 + 4 + 1 + body_bson.len();
|
||||
|
||||
let mut buf = Vec::with_capacity(message_length);
|
||||
|
||||
// Header
|
||||
buf.extend_from_slice(&(message_length as i32).to_le_bytes());
|
||||
buf.extend_from_slice(&request_id.to_le_bytes());
|
||||
buf.extend_from_slice(&response_to.to_le_bytes());
|
||||
buf.extend_from_slice(&OP_MSG.to_le_bytes());
|
||||
|
||||
// Flag bits (0 = no flags)
|
||||
buf.extend_from_slice(&0u32.to_le_bytes());
|
||||
|
||||
// Section type 0 (body)
|
||||
buf.push(SECTION_BODY);
|
||||
|
||||
// Body BSON
|
||||
buf.extend_from_slice(&body_bson);
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
/// Encode an OP_REPLY response (legacy, for OP_QUERY responses).
|
||||
pub fn encode_op_reply_response(
|
||||
response_to: i32,
|
||||
documents: &[Document],
|
||||
request_id: i32,
|
||||
cursor_id: i64,
|
||||
) -> Vec<u8> {
|
||||
let doc_buffers: Vec<Vec<u8>> = documents
|
||||
.iter()
|
||||
.map(|doc| bson::to_vec(doc).expect("failed to serialize BSON document"))
|
||||
.collect();
|
||||
let total_docs_size: usize = doc_buffers.iter().map(|b| b.len()).sum();
|
||||
|
||||
// Header (16) + responseFlags (4) + cursorID (8) + startingFrom (4) + numberReturned (4) + docs
|
||||
let message_length = 16 + 4 + 8 + 4 + 4 + total_docs_size;
|
||||
|
||||
let mut buf = Vec::with_capacity(message_length);
|
||||
|
||||
// Header
|
||||
buf.extend_from_slice(&(message_length as i32).to_le_bytes());
|
||||
buf.extend_from_slice(&request_id.to_le_bytes());
|
||||
buf.extend_from_slice(&response_to.to_le_bytes());
|
||||
buf.extend_from_slice(&OP_REPLY.to_le_bytes());
|
||||
|
||||
// OP_REPLY fields
|
||||
buf.extend_from_slice(&0i32.to_le_bytes()); // responseFlags
|
||||
buf.extend_from_slice(&cursor_id.to_le_bytes()); // cursorID
|
||||
buf.extend_from_slice(&0i32.to_le_bytes()); // startingFrom
|
||||
buf.extend_from_slice(&(documents.len() as i32).to_le_bytes()); // numberReturned
|
||||
|
||||
// Documents
|
||||
for doc_buf in &doc_buffers {
|
||||
buf.extend_from_slice(doc_buf);
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
/// Encode an error response as OP_MSG.
|
||||
pub fn encode_error_response(
|
||||
response_to: i32,
|
||||
error_code: i32,
|
||||
error_message: &str,
|
||||
request_id: i32,
|
||||
) -> Vec<u8> {
|
||||
let response = bson::doc! {
|
||||
"ok": 0,
|
||||
"errmsg": error_message,
|
||||
"code": error_code,
|
||||
"codeName": error_code_name(error_code),
|
||||
};
|
||||
encode_op_msg_response(response_to, &response, request_id)
|
||||
}
|
||||
|
||||
/// Map error codes to their code names.
|
||||
pub fn error_code_name(code: i32) -> &'static str {
|
||||
match code {
|
||||
0 => "OK",
|
||||
1 => "InternalError",
|
||||
2 => "BadValue",
|
||||
13 => "Unauthorized",
|
||||
26 => "NamespaceNotFound",
|
||||
27 => "IndexNotFound",
|
||||
48 => "NamespaceExists",
|
||||
59 => "CommandNotFound",
|
||||
66 => "ImmutableField",
|
||||
73 => "InvalidNamespace",
|
||||
85 => "IndexOptionsConflict",
|
||||
112 => "WriteConflict",
|
||||
121 => "DocumentValidationFailure",
|
||||
211 => "KeyNotFound",
|
||||
251 => "NoSuchTransaction",
|
||||
11000 => "DuplicateKey",
|
||||
11001 => "DuplicateKeyValue",
|
||||
_ => "UnknownError",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_op_msg_roundtrip() {
|
||||
let doc = bson::doc! { "ok": 1 };
|
||||
let encoded = encode_op_msg_response(1, &doc, 2);
|
||||
|
||||
// Verify header
|
||||
let msg_len = i32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]);
|
||||
assert_eq!(msg_len as usize, encoded.len());
|
||||
|
||||
let op_code = i32::from_le_bytes([encoded[12], encoded[13], encoded[14], encoded[15]]);
|
||||
assert_eq!(op_code, OP_MSG);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_op_reply() {
|
||||
let docs = vec![bson::doc! { "ok": 1 }];
|
||||
let encoded = encode_op_reply_response(1, &docs, 2, 0);
|
||||
|
||||
let msg_len = i32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]);
|
||||
assert_eq!(msg_len as usize, encoded.len());
|
||||
|
||||
let op_code = i32::from_le_bytes([encoded[12], encoded[13], encoded[14], encoded[15]]);
|
||||
assert_eq!(op_code, OP_REPLY);
|
||||
}
|
||||
}
|
||||
27
rust/crates/rustdb-wire/src/error.rs
Normal file
27
rust/crates/rustdb-wire/src/error.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
/// Errors from wire protocol parsing/encoding.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WireError {
|
||||
#[error("Incomplete message: need {needed} bytes, have {have}")]
|
||||
Incomplete { needed: usize, have: usize },
|
||||
|
||||
#[error("Unsupported opCode: {0}")]
|
||||
UnsupportedOpCode(i32),
|
||||
|
||||
#[error("Missing command body section in OP_MSG")]
|
||||
MissingBody,
|
||||
|
||||
#[error("Unknown section type: {0}")]
|
||||
UnknownSectionType(u8),
|
||||
|
||||
#[error("BSON deserialization error: {0}")]
|
||||
BsonError(#[from] bson::de::Error),
|
||||
|
||||
#[error("BSON serialization error: {0}")]
|
||||
BsonSerError(#[from] bson::ser::Error),
|
||||
|
||||
#[error("IO error: {0}")]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
#[error("Checksum mismatch: expected {expected}, got {actual}")]
|
||||
ChecksumMismatch { expected: u32, actual: u32 },
|
||||
}
|
||||
11
rust/crates/rustdb-wire/src/lib.rs
Normal file
11
rust/crates/rustdb-wire/src/lib.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
mod codec;
|
||||
mod error;
|
||||
mod opcodes;
|
||||
mod parser;
|
||||
mod encoder;
|
||||
|
||||
pub use codec::WireCodec;
|
||||
pub use error::WireError;
|
||||
pub use opcodes::*;
|
||||
pub use parser::*;
|
||||
pub use encoder::*;
|
||||
19
rust/crates/rustdb-wire/src/opcodes.rs
Normal file
19
rust/crates/rustdb-wire/src/opcodes.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
/// Wire protocol op codes
|
||||
pub const OP_REPLY: i32 = 1;
|
||||
pub const OP_UPDATE: i32 = 2001;
|
||||
pub const OP_INSERT: i32 = 2002;
|
||||
pub const OP_QUERY: i32 = 2004;
|
||||
pub const OP_GET_MORE: i32 = 2005;
|
||||
pub const OP_DELETE: i32 = 2006;
|
||||
pub const OP_KILL_CURSORS: i32 = 2007;
|
||||
pub const OP_COMPRESSED: i32 = 2012;
|
||||
pub const OP_MSG: i32 = 2013;
|
||||
|
||||
/// OP_MSG section types
|
||||
pub const SECTION_BODY: u8 = 0;
|
||||
pub const SECTION_DOCUMENT_SEQUENCE: u8 = 1;
|
||||
|
||||
/// OP_MSG flag bits
|
||||
pub const MSG_FLAG_CHECKSUM_PRESENT: u32 = 1 << 0;
|
||||
pub const MSG_FLAG_MORE_TO_COME: u32 = 1 << 1;
|
||||
pub const MSG_FLAG_EXHAUST_ALLOWED: u32 = 1 << 16;
|
||||
236
rust/crates/rustdb-wire/src/parser.rs
Normal file
236
rust/crates/rustdb-wire/src/parser.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
use bson::Document;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::error::WireError;
|
||||
use crate::opcodes::*;
|
||||
|
||||
/// Parsed wire protocol message header (16 bytes).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MessageHeader {
|
||||
pub message_length: i32,
|
||||
pub request_id: i32,
|
||||
pub response_to: i32,
|
||||
pub op_code: i32,
|
||||
}
|
||||
|
||||
/// A parsed OP_MSG section.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum OpMsgSection {
|
||||
/// Section type 0: single BSON document body.
|
||||
Body(Document),
|
||||
/// Section type 1: named document sequence for bulk operations.
|
||||
DocumentSequence {
|
||||
identifier: String,
|
||||
documents: Vec<Document>,
|
||||
},
|
||||
}
|
||||
|
||||
/// A fully parsed command extracted from any message type.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedCommand {
|
||||
pub command_name: String,
|
||||
pub command: Document,
|
||||
pub database: String,
|
||||
pub request_id: i32,
|
||||
pub op_code: i32,
|
||||
/// Document sequences from OP_MSG section type 1 (e.g., "documents" for insert).
|
||||
pub document_sequences: Option<HashMap<String, Vec<Document>>>,
|
||||
}
|
||||
|
||||
/// Parse a message header from a byte slice (must be >= 16 bytes).
|
||||
pub fn parse_header(buf: &[u8]) -> MessageHeader {
|
||||
MessageHeader {
|
||||
message_length: i32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]),
|
||||
request_id: i32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]),
|
||||
response_to: i32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]),
|
||||
op_code: i32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a complete message from a buffer.
|
||||
/// Returns the parsed command and bytes consumed, or None if not enough data.
|
||||
pub fn parse_message(buf: &[u8]) -> Result<Option<(ParsedCommand, usize)>, WireError> {
|
||||
if buf.len() < 16 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let header = parse_header(buf);
|
||||
let msg_len = header.message_length as usize;
|
||||
|
||||
if buf.len() < msg_len {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let message_buf = &buf[..msg_len];
|
||||
|
||||
match header.op_code {
|
||||
OP_MSG => parse_op_msg(message_buf, &header).map(|cmd| Some((cmd, msg_len))),
|
||||
OP_QUERY => parse_op_query(message_buf, &header).map(|cmd| Some((cmd, msg_len))),
|
||||
other => Err(WireError::UnsupportedOpCode(other)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an OP_MSG message.
|
||||
fn parse_op_msg(buf: &[u8], header: &MessageHeader) -> Result<ParsedCommand, WireError> {
|
||||
let mut offset = 16; // skip header
|
||||
|
||||
let flag_bits = u32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
|
||||
offset += 4;
|
||||
|
||||
let mut body: Option<Document> = None;
|
||||
let mut document_sequences: HashMap<String, Vec<Document>> = HashMap::new();
|
||||
|
||||
// Parse sections until end (or checksum)
|
||||
let message_end = if flag_bits & MSG_FLAG_CHECKSUM_PRESENT != 0 {
|
||||
header.message_length as usize - 4
|
||||
} else {
|
||||
header.message_length as usize
|
||||
};
|
||||
|
||||
while offset < message_end {
|
||||
let section_type = buf[offset];
|
||||
offset += 1;
|
||||
|
||||
match section_type {
|
||||
SECTION_BODY => {
|
||||
let doc_size = i32::from_le_bytes([
|
||||
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
|
||||
]) as usize;
|
||||
let doc = bson::from_slice(&buf[offset..offset + doc_size])?;
|
||||
body = Some(doc);
|
||||
offset += doc_size;
|
||||
}
|
||||
SECTION_DOCUMENT_SEQUENCE => {
|
||||
let section_size = i32::from_le_bytes([
|
||||
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
|
||||
]) as usize;
|
||||
let section_end = offset + section_size;
|
||||
offset += 4;
|
||||
|
||||
// Read identifier (C string, null-terminated)
|
||||
let id_start = offset;
|
||||
while offset < section_end && buf[offset] != 0 {
|
||||
offset += 1;
|
||||
}
|
||||
let identifier = std::str::from_utf8(&buf[id_start..offset])
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
offset += 1; // skip null terminator
|
||||
|
||||
// Read documents
|
||||
let mut documents = Vec::new();
|
||||
while offset < section_end {
|
||||
let doc_size = i32::from_le_bytes([
|
||||
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
|
||||
]) as usize;
|
||||
let doc = bson::from_slice(&buf[offset..offset + doc_size])?;
|
||||
documents.push(doc);
|
||||
offset += doc_size;
|
||||
}
|
||||
|
||||
document_sequences.insert(identifier, documents);
|
||||
}
|
||||
other => return Err(WireError::UnknownSectionType(other)),
|
||||
}
|
||||
}
|
||||
|
||||
let command = body.ok_or(WireError::MissingBody)?;
|
||||
let command_name = command
|
||||
.keys()
|
||||
.next()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_default();
|
||||
let database = command
|
||||
.get_str("$db")
|
||||
.unwrap_or("admin")
|
||||
.to_string();
|
||||
|
||||
Ok(ParsedCommand {
|
||||
command_name,
|
||||
command,
|
||||
database,
|
||||
request_id: header.request_id,
|
||||
op_code: header.op_code,
|
||||
document_sequences: if document_sequences.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(document_sequences)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse an OP_QUERY message (legacy, used for initial driver handshake).
|
||||
fn parse_op_query(buf: &[u8], header: &MessageHeader) -> Result<ParsedCommand, WireError> {
|
||||
let mut offset = 16; // skip header
|
||||
|
||||
let _flags = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
|
||||
offset += 4;
|
||||
|
||||
// Read full collection name (C string)
|
||||
let name_start = offset;
|
||||
while offset < buf.len() && buf[offset] != 0 {
|
||||
offset += 1;
|
||||
}
|
||||
let full_collection_name = std::str::from_utf8(&buf[name_start..offset])
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
offset += 1; // skip null terminator
|
||||
|
||||
let _number_to_skip = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
|
||||
offset += 4;
|
||||
|
||||
let _number_to_return = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
|
||||
offset += 4;
|
||||
|
||||
// Read query document
|
||||
let doc_size = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]) as usize;
|
||||
let query: Document = bson::from_slice(&buf[offset..offset + doc_size])?;
|
||||
|
||||
// Extract database from collection name (format: "dbname.$cmd")
|
||||
let parts: Vec<&str> = full_collection_name.splitn(2, '.').collect();
|
||||
let database = parts.first().unwrap_or(&"admin").to_string();
|
||||
|
||||
let mut command_name = query
|
||||
.keys()
|
||||
.next()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "find".to_string());
|
||||
|
||||
// Map legacy isMaster/ismaster to hello
|
||||
if parts.get(1) == Some(&"$cmd") {
|
||||
if command_name == "isMaster" || command_name == "ismaster" {
|
||||
command_name = "hello".to_string();
|
||||
}
|
||||
} else {
|
||||
command_name = "find".to_string();
|
||||
}
|
||||
|
||||
Ok(ParsedCommand {
|
||||
command_name,
|
||||
command: query,
|
||||
database,
|
||||
request_id: header.request_id,
|
||||
op_code: header.op_code,
|
||||
document_sequences: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_header() {
|
||||
let mut buf = [0u8; 16];
|
||||
buf[0..4].copy_from_slice(&100i32.to_le_bytes()); // messageLength
|
||||
buf[4..8].copy_from_slice(&42i32.to_le_bytes()); // requestID
|
||||
buf[8..12].copy_from_slice(&0i32.to_le_bytes()); // responseTo
|
||||
buf[12..16].copy_from_slice(&OP_MSG.to_le_bytes()); // opCode
|
||||
|
||||
let header = parse_header(&buf);
|
||||
assert_eq!(header.message_length, 100);
|
||||
assert_eq!(header.request_id, 42);
|
||||
assert_eq!(header.response_to, 0);
|
||||
assert_eq!(header.op_code, OP_MSG);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user