Files
smartmta/rust/crates/mailer-smtp/src/data.rs

290 lines
10 KiB
Rust

//! Email DATA phase processor.
//!
//! Handles dot-unstuffing, end-of-data detection, size enforcement,
//! and streaming accumulation of email data.
/// Result of processing a chunk of DATA input.
#[derive(Debug, Clone, PartialEq)]
pub enum DataAction {
/// More data needed — continue accumulating.
Continue,
/// End-of-data detected. The complete message body is ready.
Complete,
/// Message size limit exceeded.
SizeExceeded,
}
/// Streaming email data accumulator.
///
/// Processes incoming bytes from the DATA phase, handling:
/// - CRLF line ending normalization
/// - Dot-unstuffing (RFC 5321 §4.5.2)
/// - End-of-data marker detection (`<CRLF>.<CRLF>`)
/// - Size enforcement
pub struct DataAccumulator {
/// Accumulated message bytes.
buffer: Vec<u8>,
/// Maximum allowed size in bytes. 0 = unlimited.
max_size: u64,
/// Whether we've detected end-of-data.
complete: bool,
/// Whether the current position is at the start of a line.
at_line_start: bool,
/// Partial state for cross-chunk boundary handling.
partial: PartialState,
}
/// Tracks partial sequences that span chunk boundaries.
#[derive(Debug, Clone, Copy, PartialEq)]
enum PartialState {
/// No partial sequence.
None,
/// Saw `\r`, waiting for `\n`.
Cr,
/// At line start, saw `.`, waiting to determine dot-stuffing vs end-of-data.
Dot,
/// At line start, saw `.\r`, waiting for `\n` (end-of-data) or other.
DotCr,
}
impl DataAccumulator {
/// Create a new accumulator with the given size limit.
pub fn new(max_size: u64) -> Self {
Self {
buffer: Vec::with_capacity(8192),
max_size,
complete: false,
at_line_start: true, // First byte is at start of first line
partial: PartialState::None,
}
}
/// Process a chunk of incoming data.
///
/// Returns the action to take: continue, complete, or size exceeded.
pub fn process_chunk(&mut self, chunk: &[u8]) -> DataAction {
if self.complete {
return DataAction::Complete;
}
for &byte in chunk {
match self.partial {
PartialState::None => {
if self.at_line_start && byte == b'.' {
self.partial = PartialState::Dot;
} else if byte == b'\r' {
self.partial = PartialState::Cr;
} else {
self.buffer.push(byte);
self.at_line_start = false;
}
}
PartialState::Cr => {
if byte == b'\n' {
self.buffer.extend_from_slice(b"\r\n");
self.at_line_start = true;
self.partial = PartialState::None;
} else {
// Bare CR — emit it and process current byte
self.buffer.push(b'\r');
self.at_line_start = false;
self.partial = PartialState::None;
// Re-process current byte
if byte == b'\r' {
self.partial = PartialState::Cr;
} else {
self.buffer.push(byte);
}
}
}
PartialState::Dot => {
if byte == b'\r' {
self.partial = PartialState::DotCr;
} else if byte == b'.' {
// Dot-unstuffing: \r\n.. → \r\n.
// Emit one dot, consume the other
self.buffer.push(b'.');
self.at_line_start = false;
self.partial = PartialState::None;
} else {
// Dot at line start but not stuffing or end-of-data
self.buffer.push(b'.');
self.buffer.push(byte);
self.at_line_start = false;
self.partial = PartialState::None;
}
}
PartialState::DotCr => {
if byte == b'\n' {
// End-of-data: <CRLF>.<CRLF>
// Remove the trailing \r\n from the buffer
// (it was part of the terminator, not the message)
if self.buffer.ends_with(b"\r\n") {
let new_len = self.buffer.len() - 2;
self.buffer.truncate(new_len);
}
self.complete = true;
return DataAction::Complete;
} else {
// Not end-of-data — emit .\r and process current byte
self.buffer.push(b'.');
self.buffer.push(b'\r');
self.at_line_start = false;
self.partial = PartialState::None;
// Re-process current byte
if byte == b'\r' {
self.partial = PartialState::Cr;
} else {
self.buffer.push(byte);
}
}
}
}
// Check size limit
if self.max_size > 0 && self.buffer.len() as u64 > self.max_size {
return DataAction::SizeExceeded;
}
}
DataAction::Continue
}
/// Consume the accumulator and return the complete message data.
///
/// Returns `None` if end-of-data has not been detected.
pub fn into_message(self) -> Option<Vec<u8>> {
if !self.complete {
return None;
}
Some(self.buffer)
}
/// Get a reference to the accumulated data so far.
pub fn data(&self) -> &[u8] {
&self.buffer
}
/// Get the current accumulated size.
pub fn size(&self) -> usize {
self.buffer.len()
}
/// Whether end-of-data has been detected.
pub fn is_complete(&self) -> bool {
self.complete
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_message() {
let mut acc = DataAccumulator::new(0);
let data = b"Subject: Test\r\n\r\nHello world\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Subject: Test\r\n\r\nHello world");
}
#[test]
fn test_dot_unstuffing() {
let mut acc = DataAccumulator::new(0);
// A line starting with ".." should become "."
let data = b"Line 1\r\n..dot-stuffed\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Line 1\r\n.dot-stuffed");
}
#[test]
fn test_multiple_chunks() {
let mut acc = DataAccumulator::new(0);
assert_eq!(acc.process_chunk(b"Subject: Test\r\n"), DataAction::Continue);
assert_eq!(acc.process_chunk(b"\r\nBody line 1\r\n"), DataAction::Continue);
assert_eq!(acc.process_chunk(b"Body line 2\r\n.\r\n"), DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Subject: Test\r\n\r\nBody line 1\r\nBody line 2");
}
#[test]
fn test_end_of_data_spanning_chunks() {
let mut acc = DataAccumulator::new(0);
assert_eq!(acc.process_chunk(b"Body\r\n"), DataAction::Continue);
assert_eq!(acc.process_chunk(b".\r"), DataAction::Continue);
assert_eq!(acc.process_chunk(b"\n"), DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Body");
}
#[test]
fn test_size_limit() {
let mut acc = DataAccumulator::new(10);
let data = b"This is definitely more than 10 bytes\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::SizeExceeded);
}
#[test]
fn test_not_complete() {
let mut acc = DataAccumulator::new(0);
acc.process_chunk(b"partial data");
assert!(!acc.is_complete());
assert!(acc.into_message().is_none());
}
#[test]
fn test_empty_message() {
let mut acc = DataAccumulator::new(0);
let action = acc.process_chunk(b".\r\n");
assert_eq!(action, DataAction::Complete);
let msg = acc.into_message().unwrap();
assert!(msg.is_empty());
}
#[test]
fn test_dot_not_at_line_start() {
let mut acc = DataAccumulator::new(0);
let data = b"Hello.World\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Hello.World");
}
#[test]
fn test_multiple_dots_in_line() {
let mut acc = DataAccumulator::new(0);
let data = b"...\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::Complete);
// First dot at line start is dot-unstuffed, leaving ".."
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"..");
}
#[test]
fn test_crlf_dot_spanning_three_chunks() {
let mut acc = DataAccumulator::new(0);
assert_eq!(acc.process_chunk(b"Body\r"), DataAction::Continue);
assert_eq!(acc.process_chunk(b"\n."), DataAction::Continue);
assert_eq!(acc.process_chunk(b"\r\n"), DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Body");
}
#[test]
fn test_bare_cr() {
let mut acc = DataAccumulator::new(0);
let data = b"Hello\rWorld\r\n.\r\n";
let action = acc.process_chunk(data);
assert_eq!(action, DataAction::Complete);
let msg = acc.into_message().unwrap();
assert_eq!(msg, b"Hello\rWorld");
}
}