feat(security): migrate content scanning and bounce detection to Rust security bridge; add scanContent IPC command and Rust content scanner with tests; update TS RustSecurityBridge and callers, and adjust CI package references

This commit is contained in:
2026-02-10 21:19:13 +00:00
parent b82468ab1e
commit 15a45089aa
21 changed files with 844 additions and 1530 deletions

View File

@@ -17,3 +17,4 @@ hickory-resolver.workspace = true
ipnet.workspace = true
rustls-pki-types.workspace = true
psl.workspace = true
regex.workspace = true

View File

@@ -0,0 +1,515 @@
//! Content scanning for email threat detection.
//!
//! Provides pattern-based scanning of email subjects, text bodies, HTML bodies,
//! and attachment filenames for phishing, spam, malware, suspicious links,
//! script injection, and sensitive data patterns.
use regex::Regex;
use serde::Serialize;
use std::sync::LazyLock;
// ---------------------------------------------------------------------------
// Result types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ContentScanResult {
pub threat_score: u32,
pub threat_type: Option<String>,
pub threat_details: Option<String>,
pub scanned_elements: Vec<String>,
}
// ---------------------------------------------------------------------------
// Pattern definitions (compiled once via LazyLock)
// ---------------------------------------------------------------------------
static PHISHING_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"(?i)(?:verify|confirm|update|login).*(?:account|password|details)").unwrap(),
Regex::new(r"(?i)urgent.*(?:action|attention|required)").unwrap(),
Regex::new(r"(?i)(?:paypal|apple|microsoft|amazon|google|bank).*(?:verify|confirm|suspend)").unwrap(),
Regex::new(r"(?i)your.*(?:account).*(?:suspended|compromised|locked)").unwrap(),
Regex::new(r"(?i)\b(?:password reset|security alert|security notice)\b").unwrap(),
]
});
static SPAM_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"(?i)\b(?:viagra|cialis|enlargement|diet pill|lose weight fast|cheap meds)\b").unwrap(),
Regex::new(r"(?i)\b(?:million dollars|lottery winner|prize claim|inheritance|rich widow)\b").unwrap(),
Regex::new(r"(?i)\b(?:earn from home|make money fast|earn \$\d{3,}/day)\b").unwrap(),
Regex::new(r"(?i)\b(?:limited time offer|act now|exclusive deal|only \d+ left)\b").unwrap(),
Regex::new(r"(?i)\b(?:forex|stock tip|investment opportunity|cryptocurrency|bitcoin)\b").unwrap(),
]
});
static MALWARE_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"(?i)(?:attached file|see attachment).*(?:invoice|receipt|statement|document)").unwrap(),
Regex::new(r"(?i)open.*(?:the attached|this attachment)").unwrap(),
Regex::new(r"(?i)(?:enable|allow).*(?:macros|content|editing)").unwrap(),
Regex::new(r"(?i)download.*(?:attachment|file|document)").unwrap(),
Regex::new(r"(?i)\b(?:ransomware protection|virus alert|malware detected)\b").unwrap(),
]
});
static SUSPICIOUS_LINK_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"(?i)https?://bit\.ly/").unwrap(),
Regex::new(r"(?i)https?://goo\.gl/").unwrap(),
Regex::new(r"(?i)https?://t\.co/").unwrap(),
Regex::new(r"(?i)https?://tinyurl\.com/").unwrap(),
Regex::new(r"(?i)https?://(?:\d{1,3}\.){3}\d{1,3}").unwrap(),
Regex::new(r"(?i)https?://.*\.(?:xyz|top|club|gq|cf)/").unwrap(),
Regex::new(r"(?i)(?:login|account|signin|auth).*\.(?:xyz|top|club|gq|cf|tk|ml|ga|pw|ws|buzz)\b").unwrap(),
]
});
static SCRIPT_INJECTION_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"(?is)<script.*>.*</script>").unwrap(),
Regex::new(r"(?i)javascript:").unwrap(),
Regex::new(r#"(?i)on(?:click|load|mouse|error|focus|blur)=".*""#).unwrap(),
Regex::new(r"(?i)document\.(?:cookie|write|location)").unwrap(),
Regex::new(r"(?i)eval\s*\(").unwrap(),
]
});
static SENSITIVE_DATA_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"\b(?:\d{3}-\d{2}-\d{4}|\d{9})\b").unwrap(),
Regex::new(r"\b\d{13,16}\b").unwrap(),
Regex::new(r"\b(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})\b").unwrap(),
]
});
/// Link extraction from HTML href attributes.
static HREF_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"(?i)href=["'](https?://[^"']+)["']"#).unwrap()
});
/// Executable file extensions that are considered dangerous.
static EXECUTABLE_EXTENSIONS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
".exe", ".dll", ".bat", ".cmd", ".msi", ".vbs", ".ps1",
".sh", ".jar", ".py", ".com", ".scr", ".pif", ".hta", ".cpl",
".reg", ".vba", ".lnk", ".wsf", ".msp", ".mst",
]
});
/// Document extensions that may contain macros.
static MACRO_DOCUMENT_EXTENSIONS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
".doc", ".docm", ".xls", ".xlsm", ".ppt", ".pptm",
".dotm", ".xlsb", ".ppam", ".potm",
]
});
// ---------------------------------------------------------------------------
// HTML helpers
// ---------------------------------------------------------------------------
/// Strip HTML tags and decode common entities to produce plain text.
fn extract_text_from_html(html: &str) -> String {
// Remove style and script blocks first
let no_style = Regex::new(r"(?is)<style[^>]*>.*?</style>").unwrap();
let no_script = Regex::new(r"(?is)<script[^>]*>.*?</script>").unwrap();
let no_tags = Regex::new(r"<[^>]+>").unwrap();
let text = no_style.replace_all(html, " ");
let text = no_script.replace_all(&text, " ");
let text = no_tags.replace_all(&text, " ");
text.replace("&nbsp;", " ")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&amp;", "&")
.replace("&quot;", "\"")
.replace("&apos;", "'")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
/// Extract all href links from HTML.
fn extract_links_from_html(html: &str) -> Vec<String> {
HREF_PATTERN
.captures_iter(html)
.filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
.collect()
}
// ---------------------------------------------------------------------------
// Scoring helpers
// ---------------------------------------------------------------------------
fn matches_any(text: &str, patterns: &[Regex]) -> bool {
patterns.iter().any(|p| p.is_match(text))
}
// ---------------------------------------------------------------------------
// Main scan entry point
// ---------------------------------------------------------------------------
/// Scan email content for threats.
///
/// This mirrors the TypeScript ContentScanner logic — scanning the subject,
/// text body, HTML body, and attachment filenames against predefined patterns.
/// Returns an aggregate threat score and the highest-severity threat type.
pub fn scan_content(
subject: Option<&str>,
text_body: Option<&str>,
html_body: Option<&str>,
attachment_names: &[String],
) -> ContentScanResult {
let mut score: u32 = 0;
let mut threat_type: Option<String> = None;
let mut threat_details: Option<String> = None;
let mut scanned: Vec<String> = Vec::new();
// Helper: upgrade threat info only if the new finding is more severe.
macro_rules! record {
($new_score:expr, $ttype:expr, $details:expr) => {
score += $new_score;
// Always adopt the threat type from the highest-scoring match.
threat_type = Some($ttype.to_string());
threat_details = Some($details.to_string());
};
}
// ── Subject scanning ──────────────────────────────────────────────
if let Some(subj) = subject {
scanned.push("subject".into());
if matches_any(subj, &PHISHING_PATTERNS) {
record!(25, "phishing", format!("Subject contains potential phishing indicators: {}", subj));
} else if matches_any(subj, &SPAM_PATTERNS) {
record!(15, "spam", format!("Subject contains potential spam indicators: {}", subj));
}
}
// ── Text body scanning ────────────────────────────────────────────
if let Some(text) = text_body {
scanned.push("text".into());
// Check each category and accumulate score (same order as TS)
for pat in SUSPICIOUS_LINK_PATTERNS.iter() {
if pat.is_match(text) {
score += 20;
if threat_type.as_deref() != Some("suspicious_link") {
threat_type = Some("suspicious_link".into());
threat_details = Some("Text contains suspicious links".into());
}
}
}
for pat in PHISHING_PATTERNS.iter() {
if pat.is_match(text) {
score += 25;
threat_type = Some("phishing".into());
threat_details = Some("Text contains potential phishing indicators".into());
}
}
for pat in SPAM_PATTERNS.iter() {
if pat.is_match(text) {
score += 15;
if threat_type.is_none() {
threat_type = Some("spam".into());
threat_details = Some("Text contains potential spam indicators".into());
}
}
}
for pat in MALWARE_PATTERNS.iter() {
if pat.is_match(text) {
score += 30;
threat_type = Some("malware".into());
threat_details = Some("Text contains potential malware indicators".into());
}
}
for pat in SENSITIVE_DATA_PATTERNS.iter() {
if pat.is_match(text) {
score += 25;
if threat_type.is_none() {
threat_type = Some("sensitive_data".into());
threat_details = Some("Text contains potentially sensitive data patterns".into());
}
}
}
}
// ── HTML body scanning ────────────────────────────────────────────
if let Some(html) = html_body {
scanned.push("html".into());
// Script injection check
for pat in SCRIPT_INJECTION_PATTERNS.iter() {
if pat.is_match(html) {
score += 40;
if threat_type.as_deref() != Some("xss") {
threat_type = Some("xss".into());
threat_details = Some("HTML contains potentially malicious script content".into());
}
}
}
// Extract text from HTML and scan (half score to avoid double counting)
let text_content = extract_text_from_html(html);
if !text_content.is_empty() {
let mut html_text_score: u32 = 0;
let mut html_text_type: Option<String> = None;
let mut html_text_details: Option<String> = None;
// Re-run text patterns on extracted HTML text
for pat in SUSPICIOUS_LINK_PATTERNS.iter() {
if pat.is_match(&text_content) {
html_text_score += 20;
html_text_type = Some("suspicious_link".into());
html_text_details = Some("Text contains suspicious links".into());
}
}
for pat in PHISHING_PATTERNS.iter() {
if pat.is_match(&text_content) {
html_text_score += 25;
html_text_type = Some("phishing".into());
html_text_details = Some("Text contains potential phishing indicators".into());
}
}
for pat in SPAM_PATTERNS.iter() {
if pat.is_match(&text_content) {
html_text_score += 15;
if html_text_type.is_none() {
html_text_type = Some("spam".into());
html_text_details = Some("Text contains potential spam indicators".into());
}
}
}
for pat in MALWARE_PATTERNS.iter() {
if pat.is_match(&text_content) {
html_text_score += 30;
html_text_type = Some("malware".into());
html_text_details = Some("Text contains potential malware indicators".into());
}
}
for pat in SENSITIVE_DATA_PATTERNS.iter() {
if pat.is_match(&text_content) {
html_text_score += 25;
if html_text_type.is_none() {
html_text_type = Some("sensitive_data".into());
html_text_details = Some("Text contains potentially sensitive data patterns".into());
}
}
}
if html_text_score > 0 {
// Add half of the text content score to avoid double counting
score += html_text_score / 2;
if let Some(t) = html_text_type {
if threat_type.is_none() || html_text_score > score {
threat_type = Some(t);
threat_details = html_text_details;
}
}
}
}
// Extract and check links from HTML
let links = extract_links_from_html(html);
if !links.is_empty() {
let mut suspicious_count = 0u32;
for link in &links {
if matches_any(link, &SUSPICIOUS_LINK_PATTERNS) {
suspicious_count += 1;
}
}
if suspicious_count > 0 {
let pct = (suspicious_count as f64 / links.len() as f64) * 100.0;
let additional = std::cmp::min(40, (pct / 2.5) as u32);
score += additional;
if additional > 20 || threat_type.is_none() {
threat_type = Some("suspicious_link".into());
threat_details = Some(format!(
"HTML contains {} suspicious links out of {} total links",
suspicious_count,
links.len()
));
}
}
}
}
// ── Attachment filename scanning ──────────────────────────────────
for name in attachment_names {
let lower = name.to_lowercase();
scanned.push(format!("attachment:{}", lower));
// Check executable extensions
for ext in EXECUTABLE_EXTENSIONS.iter() {
if lower.ends_with(ext) {
score += 70;
threat_type = Some("executable".into());
threat_details = Some(format!(
"Attachment has a potentially dangerous extension: {}",
name
));
break;
}
}
// Check macro document extensions
for ext in MACRO_DOCUMENT_EXTENSIONS.iter() {
if lower.ends_with(ext) {
// Flag macro-capable documents (lower score than executables)
score += 20;
if threat_type.is_none() {
threat_type = Some("malicious_macro".into());
threat_details = Some(format!(
"Attachment is a macro-capable document: {}",
name
));
}
break;
}
}
}
ContentScanResult {
threat_score: score,
threat_type,
threat_details,
scanned_elements: scanned,
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_clean_content() {
let result = scan_content(
Some("Project Update"),
Some("The project is on track."),
None,
&[],
);
assert_eq!(result.threat_score, 0);
assert!(result.threat_type.is_none());
}
#[test]
fn test_phishing_subject() {
let result = scan_content(
Some("URGENT: Verify your bank account details immediately"),
None,
None,
&[],
);
assert!(result.threat_score >= 25);
assert_eq!(result.threat_type.as_deref(), Some("phishing"));
}
#[test]
fn test_spam_body() {
let result = scan_content(
None,
Some("Win a million dollars in the lottery winner contest!"),
None,
&[],
);
assert!(result.threat_score >= 15);
assert_eq!(result.threat_type.as_deref(), Some("spam"));
}
#[test]
fn test_suspicious_links() {
let result = scan_content(
None,
Some("Check out https://bit.ly/2x3F5 for more info"),
None,
&[],
);
assert!(result.threat_score >= 20);
assert_eq!(result.threat_type.as_deref(), Some("suspicious_link"));
}
#[test]
fn test_script_injection() {
let result = scan_content(
None,
None,
Some("<p>Hello</p><script>document.cookie='steal';</script>"),
&[],
);
assert!(result.threat_score >= 40);
assert_eq!(result.threat_type.as_deref(), Some("xss"));
}
#[test]
fn test_executable_attachment() {
let result = scan_content(
None,
None,
None,
&["update.exe".into()],
);
assert!(result.threat_score >= 70);
assert_eq!(result.threat_type.as_deref(), Some("executable"));
}
#[test]
fn test_macro_document() {
let result = scan_content(
None,
None,
None,
&["report.docm".into()],
);
assert!(result.threat_score >= 20);
assert_eq!(result.threat_type.as_deref(), Some("malicious_macro"));
}
#[test]
fn test_malware_indicators() {
let result = scan_content(
None,
Some("Please enable macros to view this document properly."),
None,
&[],
);
assert!(result.threat_score >= 30);
assert_eq!(result.threat_type.as_deref(), Some("malware"));
}
#[test]
fn test_html_link_extraction() {
let result = scan_content(
None,
None,
Some(r#"<a href="https://bit.ly/abc">click</a> and <a href="https://t.co/xyz">here</a>"#),
&[],
);
assert!(result.threat_score > 0);
}
#[test]
fn test_compound_threats() {
let result = scan_content(
Some("URGENT: Verify your account details immediately"),
Some("Your account will be suspended unless you verify at https://bit.ly/2x3F5"),
Some(r#"<a href="https://bit.ly/2x3F5">verify</a>"#),
&["verification.exe".into()],
);
assert!(result.threat_score > 70);
}
}

View File

@@ -1,5 +1,6 @@
//! mailer-security: DKIM, SPF, DMARC verification, and IP reputation checking.
pub mod content_scanner;
pub mod dkim;
pub mod dmarc;
pub mod error;