BREAKING CHANGE(core): replace the TypeScript database engine with a Rust-backed embedded server and bridge

This commit is contained in:
2026-03-26 19:48:27 +00:00
parent 8ec2046908
commit e23a951dbe
106 changed files with 11567 additions and 10678 deletions

View File

@@ -0,0 +1,18 @@
[package]
name = "rustdb-wire"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
description = "MongoDB-compatible wire protocol parser and encoder for RustDb"
[dependencies]
bson = { workspace = true }
bytes = { workspace = true }
tokio-util = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
crc32fast = { workspace = true }
[dev-dependencies]
tokio = { workspace = true }

View File

@@ -0,0 +1,49 @@
use bytes::{Buf, BytesMut};
use tokio_util::codec::{Decoder, Encoder};
use crate::error::WireError;
use crate::parser::{parse_message, ParsedCommand};
/// Tokio codec for framing wire protocol messages on a TCP stream.
///
/// The wire protocol is naturally length-prefixed:
/// the first 4 bytes of each message contain the total message length.
pub struct WireCodec;
impl Decoder for WireCodec {
type Item = ParsedCommand;
type Error = WireError;
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
if src.len() < 4 {
return Ok(None);
}
// Peek at message length
let msg_len = i32::from_le_bytes([src[0], src[1], src[2], src[3]]) as usize;
if src.len() < msg_len {
// Reserve space for the rest of the message
src.reserve(msg_len - src.len());
return Ok(None);
}
match parse_message(src)? {
Some((cmd, bytes_consumed)) => {
src.advance(bytes_consumed);
Ok(Some(cmd))
}
None => Ok(None),
}
}
}
/// Encoder for raw byte responses (already serialized by the command handlers).
impl Encoder<Vec<u8>> for WireCodec {
type Error = WireError;
fn encode(&mut self, item: Vec<u8>, dst: &mut BytesMut) -> Result<(), Self::Error> {
dst.extend_from_slice(&item);
Ok(())
}
}

View File

@@ -0,0 +1,142 @@
use bson::Document;
use crate::opcodes::*;
/// Encode an OP_MSG response.
pub fn encode_op_msg_response(
response_to: i32,
response: &Document,
request_id: i32,
) -> Vec<u8> {
let body_bson = bson::to_vec(response).expect("failed to serialize BSON response");
// Header (16) + flagBits (4) + section type (1) + body BSON
let message_length = 16 + 4 + 1 + body_bson.len();
let mut buf = Vec::with_capacity(message_length);
// Header
buf.extend_from_slice(&(message_length as i32).to_le_bytes());
buf.extend_from_slice(&request_id.to_le_bytes());
buf.extend_from_slice(&response_to.to_le_bytes());
buf.extend_from_slice(&OP_MSG.to_le_bytes());
// Flag bits (0 = no flags)
buf.extend_from_slice(&0u32.to_le_bytes());
// Section type 0 (body)
buf.push(SECTION_BODY);
// Body BSON
buf.extend_from_slice(&body_bson);
buf
}
/// Encode an OP_REPLY response (legacy, for OP_QUERY responses).
pub fn encode_op_reply_response(
response_to: i32,
documents: &[Document],
request_id: i32,
cursor_id: i64,
) -> Vec<u8> {
let doc_buffers: Vec<Vec<u8>> = documents
.iter()
.map(|doc| bson::to_vec(doc).expect("failed to serialize BSON document"))
.collect();
let total_docs_size: usize = doc_buffers.iter().map(|b| b.len()).sum();
// Header (16) + responseFlags (4) + cursorID (8) + startingFrom (4) + numberReturned (4) + docs
let message_length = 16 + 4 + 8 + 4 + 4 + total_docs_size;
let mut buf = Vec::with_capacity(message_length);
// Header
buf.extend_from_slice(&(message_length as i32).to_le_bytes());
buf.extend_from_slice(&request_id.to_le_bytes());
buf.extend_from_slice(&response_to.to_le_bytes());
buf.extend_from_slice(&OP_REPLY.to_le_bytes());
// OP_REPLY fields
buf.extend_from_slice(&0i32.to_le_bytes()); // responseFlags
buf.extend_from_slice(&cursor_id.to_le_bytes()); // cursorID
buf.extend_from_slice(&0i32.to_le_bytes()); // startingFrom
buf.extend_from_slice(&(documents.len() as i32).to_le_bytes()); // numberReturned
// Documents
for doc_buf in &doc_buffers {
buf.extend_from_slice(doc_buf);
}
buf
}
/// Encode an error response as OP_MSG.
pub fn encode_error_response(
response_to: i32,
error_code: i32,
error_message: &str,
request_id: i32,
) -> Vec<u8> {
let response = bson::doc! {
"ok": 0,
"errmsg": error_message,
"code": error_code,
"codeName": error_code_name(error_code),
};
encode_op_msg_response(response_to, &response, request_id)
}
/// Map error codes to their code names.
pub fn error_code_name(code: i32) -> &'static str {
match code {
0 => "OK",
1 => "InternalError",
2 => "BadValue",
13 => "Unauthorized",
26 => "NamespaceNotFound",
27 => "IndexNotFound",
48 => "NamespaceExists",
59 => "CommandNotFound",
66 => "ImmutableField",
73 => "InvalidNamespace",
85 => "IndexOptionsConflict",
112 => "WriteConflict",
121 => "DocumentValidationFailure",
211 => "KeyNotFound",
251 => "NoSuchTransaction",
11000 => "DuplicateKey",
11001 => "DuplicateKeyValue",
_ => "UnknownError",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_op_msg_roundtrip() {
let doc = bson::doc! { "ok": 1 };
let encoded = encode_op_msg_response(1, &doc, 2);
// Verify header
let msg_len = i32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]);
assert_eq!(msg_len as usize, encoded.len());
let op_code = i32::from_le_bytes([encoded[12], encoded[13], encoded[14], encoded[15]]);
assert_eq!(op_code, OP_MSG);
}
#[test]
fn test_encode_op_reply() {
let docs = vec![bson::doc! { "ok": 1 }];
let encoded = encode_op_reply_response(1, &docs, 2, 0);
let msg_len = i32::from_le_bytes([encoded[0], encoded[1], encoded[2], encoded[3]]);
assert_eq!(msg_len as usize, encoded.len());
let op_code = i32::from_le_bytes([encoded[12], encoded[13], encoded[14], encoded[15]]);
assert_eq!(op_code, OP_REPLY);
}
}

View File

@@ -0,0 +1,27 @@
/// Errors from wire protocol parsing/encoding.
#[derive(Debug, thiserror::Error)]
pub enum WireError {
#[error("Incomplete message: need {needed} bytes, have {have}")]
Incomplete { needed: usize, have: usize },
#[error("Unsupported opCode: {0}")]
UnsupportedOpCode(i32),
#[error("Missing command body section in OP_MSG")]
MissingBody,
#[error("Unknown section type: {0}")]
UnknownSectionType(u8),
#[error("BSON deserialization error: {0}")]
BsonError(#[from] bson::de::Error),
#[error("BSON serialization error: {0}")]
BsonSerError(#[from] bson::ser::Error),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Checksum mismatch: expected {expected}, got {actual}")]
ChecksumMismatch { expected: u32, actual: u32 },
}

View File

@@ -0,0 +1,11 @@
mod codec;
mod error;
mod opcodes;
mod parser;
mod encoder;
pub use codec::WireCodec;
pub use error::WireError;
pub use opcodes::*;
pub use parser::*;
pub use encoder::*;

View File

@@ -0,0 +1,19 @@
/// Wire protocol op codes
pub const OP_REPLY: i32 = 1;
pub const OP_UPDATE: i32 = 2001;
pub const OP_INSERT: i32 = 2002;
pub const OP_QUERY: i32 = 2004;
pub const OP_GET_MORE: i32 = 2005;
pub const OP_DELETE: i32 = 2006;
pub const OP_KILL_CURSORS: i32 = 2007;
pub const OP_COMPRESSED: i32 = 2012;
pub const OP_MSG: i32 = 2013;
/// OP_MSG section types
pub const SECTION_BODY: u8 = 0;
pub const SECTION_DOCUMENT_SEQUENCE: u8 = 1;
/// OP_MSG flag bits
pub const MSG_FLAG_CHECKSUM_PRESENT: u32 = 1 << 0;
pub const MSG_FLAG_MORE_TO_COME: u32 = 1 << 1;
pub const MSG_FLAG_EXHAUST_ALLOWED: u32 = 1 << 16;

View File

@@ -0,0 +1,236 @@
use bson::Document;
use std::collections::HashMap;
use crate::error::WireError;
use crate::opcodes::*;
/// Parsed wire protocol message header (16 bytes).
#[derive(Debug, Clone)]
pub struct MessageHeader {
pub message_length: i32,
pub request_id: i32,
pub response_to: i32,
pub op_code: i32,
}
/// A parsed OP_MSG section.
#[derive(Debug, Clone)]
pub enum OpMsgSection {
/// Section type 0: single BSON document body.
Body(Document),
/// Section type 1: named document sequence for bulk operations.
DocumentSequence {
identifier: String,
documents: Vec<Document>,
},
}
/// A fully parsed command extracted from any message type.
#[derive(Debug, Clone)]
pub struct ParsedCommand {
pub command_name: String,
pub command: Document,
pub database: String,
pub request_id: i32,
pub op_code: i32,
/// Document sequences from OP_MSG section type 1 (e.g., "documents" for insert).
pub document_sequences: Option<HashMap<String, Vec<Document>>>,
}
/// Parse a message header from a byte slice (must be >= 16 bytes).
pub fn parse_header(buf: &[u8]) -> MessageHeader {
MessageHeader {
message_length: i32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]),
request_id: i32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]),
response_to: i32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]),
op_code: i32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
}
}
/// Parse a complete message from a buffer.
/// Returns the parsed command and bytes consumed, or None if not enough data.
pub fn parse_message(buf: &[u8]) -> Result<Option<(ParsedCommand, usize)>, WireError> {
if buf.len() < 16 {
return Ok(None);
}
let header = parse_header(buf);
let msg_len = header.message_length as usize;
if buf.len() < msg_len {
return Ok(None);
}
let message_buf = &buf[..msg_len];
match header.op_code {
OP_MSG => parse_op_msg(message_buf, &header).map(|cmd| Some((cmd, msg_len))),
OP_QUERY => parse_op_query(message_buf, &header).map(|cmd| Some((cmd, msg_len))),
other => Err(WireError::UnsupportedOpCode(other)),
}
}
/// Parse an OP_MSG message.
fn parse_op_msg(buf: &[u8], header: &MessageHeader) -> Result<ParsedCommand, WireError> {
let mut offset = 16; // skip header
let flag_bits = u32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
offset += 4;
let mut body: Option<Document> = None;
let mut document_sequences: HashMap<String, Vec<Document>> = HashMap::new();
// Parse sections until end (or checksum)
let message_end = if flag_bits & MSG_FLAG_CHECKSUM_PRESENT != 0 {
header.message_length as usize - 4
} else {
header.message_length as usize
};
while offset < message_end {
let section_type = buf[offset];
offset += 1;
match section_type {
SECTION_BODY => {
let doc_size = i32::from_le_bytes([
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
]) as usize;
let doc = bson::from_slice(&buf[offset..offset + doc_size])?;
body = Some(doc);
offset += doc_size;
}
SECTION_DOCUMENT_SEQUENCE => {
let section_size = i32::from_le_bytes([
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
]) as usize;
let section_end = offset + section_size;
offset += 4;
// Read identifier (C string, null-terminated)
let id_start = offset;
while offset < section_end && buf[offset] != 0 {
offset += 1;
}
let identifier = std::str::from_utf8(&buf[id_start..offset])
.unwrap_or("")
.to_string();
offset += 1; // skip null terminator
// Read documents
let mut documents = Vec::new();
while offset < section_end {
let doc_size = i32::from_le_bytes([
buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3],
]) as usize;
let doc = bson::from_slice(&buf[offset..offset + doc_size])?;
documents.push(doc);
offset += doc_size;
}
document_sequences.insert(identifier, documents);
}
other => return Err(WireError::UnknownSectionType(other)),
}
}
let command = body.ok_or(WireError::MissingBody)?;
let command_name = command
.keys()
.next()
.map(|s| s.to_string())
.unwrap_or_default();
let database = command
.get_str("$db")
.unwrap_or("admin")
.to_string();
Ok(ParsedCommand {
command_name,
command,
database,
request_id: header.request_id,
op_code: header.op_code,
document_sequences: if document_sequences.is_empty() {
None
} else {
Some(document_sequences)
},
})
}
/// Parse an OP_QUERY message (legacy, used for initial driver handshake).
fn parse_op_query(buf: &[u8], header: &MessageHeader) -> Result<ParsedCommand, WireError> {
let mut offset = 16; // skip header
let _flags = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
offset += 4;
// Read full collection name (C string)
let name_start = offset;
while offset < buf.len() && buf[offset] != 0 {
offset += 1;
}
let full_collection_name = std::str::from_utf8(&buf[name_start..offset])
.unwrap_or("")
.to_string();
offset += 1; // skip null terminator
let _number_to_skip = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
offset += 4;
let _number_to_return = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]);
offset += 4;
// Read query document
let doc_size = i32::from_le_bytes([buf[offset], buf[offset + 1], buf[offset + 2], buf[offset + 3]]) as usize;
let query: Document = bson::from_slice(&buf[offset..offset + doc_size])?;
// Extract database from collection name (format: "dbname.$cmd")
let parts: Vec<&str> = full_collection_name.splitn(2, '.').collect();
let database = parts.first().unwrap_or(&"admin").to_string();
let mut command_name = query
.keys()
.next()
.map(|s| s.to_string())
.unwrap_or_else(|| "find".to_string());
// Map legacy isMaster/ismaster to hello
if parts.get(1) == Some(&"$cmd") {
if command_name == "isMaster" || command_name == "ismaster" {
command_name = "hello".to_string();
}
} else {
command_name = "find".to_string();
}
Ok(ParsedCommand {
command_name,
command: query,
database,
request_id: header.request_id,
op_code: header.op_code,
document_sequences: None,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_header() {
let mut buf = [0u8; 16];
buf[0..4].copy_from_slice(&100i32.to_le_bytes()); // messageLength
buf[4..8].copy_from_slice(&42i32.to_le_bytes()); // requestID
buf[8..12].copy_from_slice(&0i32.to_le_bytes()); // responseTo
buf[12..16].copy_from_slice(&OP_MSG.to_le_bytes()); // opCode
let header = parse_header(&buf);
assert_eq!(header.message_length, 100);
assert_eq!(header.request_id, 42);
assert_eq!(header.response_to, 0);
assert_eq!(header.op_code, OP_MSG);
}
}