Files
smartmta/rust/crates/mailer-core/src/bounce.rs

486 lines
15 KiB
Rust
Raw Normal View History

use regex::Regex;
use serde::{Deserialize, Serialize};
use std::sync::LazyLock;
/// Type of email bounce.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BounceType {
// Hard bounces
InvalidRecipient,
DomainNotFound,
MailboxFull,
MailboxInactive,
Blocked,
SpamRelated,
PolicyRelated,
// Soft bounces
ServerUnavailable,
TemporaryFailure,
QuotaExceeded,
NetworkError,
Timeout,
// Special
AutoResponse,
ChallengeResponse,
Unknown,
}
/// Broad category of a bounce.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BounceCategory {
Hard,
Soft,
AutoResponse,
Unknown,
}
impl BounceType {
/// Get the category for this bounce type.
pub fn category(&self) -> BounceCategory {
match self {
BounceType::InvalidRecipient
| BounceType::DomainNotFound
| BounceType::MailboxFull
| BounceType::MailboxInactive
| BounceType::Blocked
| BounceType::SpamRelated
| BounceType::PolicyRelated => BounceCategory::Hard,
BounceType::ServerUnavailable
| BounceType::TemporaryFailure
| BounceType::QuotaExceeded
| BounceType::NetworkError
| BounceType::Timeout => BounceCategory::Soft,
BounceType::AutoResponse | BounceType::ChallengeResponse => {
BounceCategory::AutoResponse
}
BounceType::Unknown => BounceCategory::Unknown,
}
}
}
/// Result of bounce detection.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BounceDetection {
pub bounce_type: BounceType,
pub category: BounceCategory,
}
/// Pattern set for a bounce type: compiled regexes for matching against SMTP responses.
struct BouncePatterns {
bounce_type: BounceType,
patterns: Vec<Regex>,
}
/// All bounce detection patterns, compiled once.
static BOUNCE_PATTERNS: LazyLock<Vec<BouncePatterns>> = LazyLock::new(|| {
vec![
BouncePatterns {
bounce_type: BounceType::InvalidRecipient,
patterns: compile_patterns(&[
r"(?i)no such user",
r"(?i)user unknown",
r"(?i)does not exist",
r"(?i)invalid recipient",
r"(?i)unknown recipient",
r"(?i)no mailbox",
r"(?i)user not found",
r"(?i)recipient address rejected",
r"(?i)550 5\.1\.1",
]),
},
BouncePatterns {
bounce_type: BounceType::DomainNotFound,
patterns: compile_patterns(&[
r"(?i)domain not found",
r"(?i)unknown domain",
r"(?i)no such domain",
r"(?i)host not found",
r"(?i)domain invalid",
r"(?i)550 5\.1\.2",
]),
},
BouncePatterns {
bounce_type: BounceType::MailboxFull,
patterns: compile_patterns(&[
r"(?i)mailbox full",
r"(?i)over quota",
r"(?i)quota exceeded",
r"(?i)552 5\.2\.2",
]),
},
BouncePatterns {
bounce_type: BounceType::MailboxInactive,
patterns: compile_patterns(&[
r"(?i)mailbox disabled",
r"(?i)mailbox inactive",
r"(?i)account disabled",
r"(?i)mailbox not active",
r"(?i)account suspended",
]),
},
BouncePatterns {
bounce_type: BounceType::Blocked,
patterns: compile_patterns(&[
r"(?i)blocked",
r"(?i)rejected",
r"(?i)denied",
r"(?i)blacklisted",
r"(?i)prohibited",
r"(?i)refused",
r"(?i)550 5\.7\.",
]),
},
BouncePatterns {
bounce_type: BounceType::SpamRelated,
patterns: compile_patterns(&[
r"(?i)spam",
r"(?i)bulk mail",
r"(?i)content rejected",
r"(?i)message rejected",
r"(?i)550 5\.7\.1",
]),
},
BouncePatterns {
bounce_type: BounceType::ServerUnavailable,
patterns: compile_patterns(&[
r"(?i)server unavailable",
r"(?i)service unavailable",
r"(?i)try again later",
r"(?i)try later",
r"(?i)451 4\.3\.",
r"(?i)421 4\.3\.",
]),
},
BouncePatterns {
bounce_type: BounceType::TemporaryFailure,
patterns: compile_patterns(&[
r"(?i)temporary failure",
r"(?i)temporary error",
r"(?i)temporary problem",
r"(?i)try again",
r"(?i)451 4\.",
]),
},
BouncePatterns {
bounce_type: BounceType::QuotaExceeded,
patterns: compile_patterns(&[
r"(?i)quota temporarily exceeded",
r"(?i)mailbox temporarily full",
r"(?i)452 4\.2\.2",
]),
},
BouncePatterns {
bounce_type: BounceType::NetworkError,
patterns: compile_patterns(&[
r"(?i)network error",
r"(?i)connection error",
r"(?i)connection timed out",
r"(?i)routing error",
r"(?i)421 4\.4\.",
]),
},
BouncePatterns {
bounce_type: BounceType::Timeout,
patterns: compile_patterns(&[
r"(?i)timed out",
r"(?i)timeout",
r"(?i)450 4\.4\.2",
]),
},
BouncePatterns {
bounce_type: BounceType::AutoResponse,
patterns: compile_patterns(&[
r"(?i)auto[- ]reply",
r"(?i)auto[- ]response",
r"(?i)vacation",
r"(?i)out of office",
r"(?i)away from office",
r"(?i)on vacation",
r"(?i)automatic reply",
]),
},
BouncePatterns {
bounce_type: BounceType::ChallengeResponse,
patterns: compile_patterns(&[
r"(?i)challenge[- ]response",
r"(?i)verify your email",
r"(?i)confirm your email",
r"(?i)email verification",
]),
},
]
});
/// Regex for detecting bounce email subjects.
static BOUNCE_SUBJECT_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)mail delivery|delivery (?:failed|status|notification)|failure notice|returned mail|undeliverable|delivery problem")
.expect("invalid bounce subject regex")
});
/// Regex for extracting recipient from bounce messages.
static BOUNCE_RECIPIENT_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(?:failed recipient|to[:=]\s*|recipient:|delivery failed:)\s*<?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})>?")
.expect("invalid bounce recipient regex")
});
/// Regex for extracting diagnostic code.
static DIAGNOSTIC_CODE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)diagnostic(?:-|\s+)code:\s*(.+)")
.expect("invalid diagnostic code regex")
});
/// Regex for extracting status code.
static STATUS_CODE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)status(?:-|\s+)code:\s*([0-9.]+)")
.expect("invalid status code regex")
});
/// Regex for DSN original-recipient.
static DSN_ORIGINAL_RECIPIENT_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)original-recipient:.*?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})")
.expect("invalid DSN original-recipient regex")
});
/// Regex for DSN final-recipient.
static DSN_FINAL_RECIPIENT_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)final-recipient:.*?([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})")
.expect("invalid DSN final-recipient regex")
});
fn compile_patterns(patterns: &[&str]) -> Vec<Regex> {
patterns
.iter()
.map(|p| Regex::new(p).expect("invalid bounce pattern regex"))
.collect()
}
/// Detect bounce type from an SMTP response, diagnostic code, or status code.
pub fn detect_bounce_type(
smtp_response: Option<&str>,
diagnostic_code: Option<&str>,
status_code: Option<&str>,
) -> BounceDetection {
// Check all text sources against patterns
let texts: Vec<&str> = [smtp_response, diagnostic_code, status_code]
.into_iter()
.flatten()
.collect();
for bp in BOUNCE_PATTERNS.iter() {
for text in &texts {
for pattern in &bp.patterns {
if pattern.is_match(text) {
return BounceDetection {
bounce_type: bp.bounce_type,
category: bp.bounce_type.category(),
};
}
}
}
}
// Fallback: parse DSN status code (class.subject.detail)
if let Some(code) = status_code {
if let Some(detection) = parse_dsn_status(code) {
return detection;
}
}
// Try to find DSN code in SMTP response
if let Some(resp) = smtp_response {
if let Some(code) = STATUS_CODE_RE.captures(resp).and_then(|c| c.get(1)) {
if let Some(detection) = parse_dsn_status(code.as_str()) {
return detection;
}
}
}
BounceDetection {
bounce_type: BounceType::Unknown,
category: BounceCategory::Unknown,
}
}
/// Parse a DSN enhanced status code like "5.1.1" or "4.2.2".
fn parse_dsn_status(code: &str) -> Option<BounceDetection> {
let parts: Vec<&str> = code.split('.').collect();
if parts.len() < 2 {
return None;
}
let class: u8 = parts[0].parse().ok()?;
let subject: u8 = parts[1].parse().ok()?;
let bounce_type = match (class, subject) {
(5, 1) => BounceType::InvalidRecipient,
(5, 2) => BounceType::MailboxFull,
(5, 7) => BounceType::Blocked,
(5, _) => BounceType::PolicyRelated,
(4, 2) => BounceType::QuotaExceeded,
(4, 3) => BounceType::ServerUnavailable,
(4, 4) => BounceType::NetworkError,
(4, _) => BounceType::TemporaryFailure,
_ => return None,
};
Some(BounceDetection {
category: bounce_type.category(),
bounce_type,
})
}
/// Check if a subject line looks like a bounce notification.
pub fn is_bounce_subject(subject: &str) -> bool {
BOUNCE_SUBJECT_RE.is_match(subject)
}
/// Extract the bounced recipient email from a bounce message body.
pub fn extract_bounce_recipient(body: &str) -> Option<String> {
BOUNCE_RECIPIENT_RE
.captures(body)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.or_else(|| {
DSN_FINAL_RECIPIENT_RE
.captures(body)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
})
.or_else(|| {
DSN_ORIGINAL_RECIPIENT_RE
.captures(body)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
})
}
/// Extract the diagnostic code from a bounce message body.
pub fn extract_diagnostic_code(body: &str) -> Option<String> {
DIAGNOSTIC_CODE_RE
.captures(body)
.and_then(|c| c.get(1))
.map(|m| m.as_str().trim().to_string())
}
/// Extract the status code from a bounce message body.
pub fn extract_status_code(body: &str) -> Option<String> {
STATUS_CODE_RE
.captures(body)
.and_then(|c| c.get(1))
.map(|m| m.as_str().trim().to_string())
}
/// Calculate retry delay using exponential backoff.
///
/// * `retry_count` - Number of retries so far (0-based)
/// * `initial_delay_ms` - Initial delay in milliseconds (default 15 min = 900_000)
/// * `max_delay_ms` - Maximum delay in milliseconds (default 24h = 86_400_000)
/// * `backoff_factor` - Multiplier per retry (default 2.0)
pub fn retry_delay_ms(
retry_count: u32,
initial_delay_ms: u64,
max_delay_ms: u64,
backoff_factor: f64,
) -> u64 {
let delay = (initial_delay_ms as f64) * backoff_factor.powi(retry_count as i32);
(delay as u64).min(max_delay_ms)
}
/// Default retry delay with standard parameters.
pub fn default_retry_delay_ms(retry_count: u32) -> u64 {
retry_delay_ms(
retry_count,
15 * 60 * 1000, // 15 minutes
24 * 60 * 60 * 1000, // 24 hours
2.0,
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_invalid_recipient() {
let result = detect_bounce_type(Some("550 5.1.1 User unknown"), None, None);
assert_eq!(result.bounce_type, BounceType::InvalidRecipient);
assert_eq!(result.category, BounceCategory::Hard);
}
#[test]
fn test_detect_mailbox_full() {
let result = detect_bounce_type(Some("552 5.2.2 Mailbox full"), None, None);
assert_eq!(result.bounce_type, BounceType::MailboxFull);
assert_eq!(result.category, BounceCategory::Hard);
}
#[test]
fn test_detect_temporary_failure() {
let result = detect_bounce_type(Some("451 4.3.0 Try again later"), None, None);
assert_eq!(result.bounce_type, BounceType::ServerUnavailable);
assert_eq!(result.category, BounceCategory::Soft);
}
#[test]
fn test_detect_auto_response() {
let result = detect_bounce_type(Some("Auto-reply: Out of office"), None, None);
assert_eq!(result.bounce_type, BounceType::AutoResponse);
assert_eq!(result.category, BounceCategory::AutoResponse);
}
#[test]
fn test_detect_from_dsn_status() {
let result = detect_bounce_type(None, None, Some("5.1.1"));
assert_eq!(result.bounce_type, BounceType::InvalidRecipient);
let result = detect_bounce_type(None, None, Some("4.4.1"));
assert_eq!(result.bounce_type, BounceType::NetworkError);
}
#[test]
fn test_detect_unknown() {
let result = detect_bounce_type(Some("Something weird happened"), None, None);
assert_eq!(result.bounce_type, BounceType::Unknown);
}
#[test]
fn test_is_bounce_subject() {
assert!(is_bounce_subject("Mail Delivery Failure"));
assert!(is_bounce_subject("Delivery Status Notification"));
assert!(is_bounce_subject("Returned mail: see transcript for details"));
assert!(is_bounce_subject("Undeliverable: Your message"));
assert!(!is_bounce_subject("Hello World"));
assert!(!is_bounce_subject("Meeting tomorrow"));
}
#[test]
fn test_extract_bounce_recipient() {
let body = "Delivery to the following recipient failed:\n recipient: user@example.com";
assert_eq!(
extract_bounce_recipient(body),
Some("user@example.com".to_string())
);
let body = "Final-Recipient: rfc822;bounce@test.org";
assert_eq!(
extract_bounce_recipient(body),
Some("bounce@test.org".to_string())
);
}
#[test]
fn test_retry_delay() {
assert_eq!(default_retry_delay_ms(0), 900_000); // 15 min
assert_eq!(default_retry_delay_ms(1), 1_800_000); // 30 min
assert_eq!(default_retry_delay_ms(2), 3_600_000); // 1 hour
// Capped at 24h
assert_eq!(default_retry_delay_ms(20), 86_400_000);
}
}