import * as plugins from '../plugins.js'; import * as paths from '../paths.js'; import { logger } from '../logger.js'; import { Email } from '../mta/classes.email.js'; import type { IAttachment } from '../mta/classes.email.js'; import { SecurityLogger, SecurityLogLevel, SecurityEventType } from './classes.securitylogger.js'; import { LRUCache } from 'lru-cache'; /** * Scan result information */ export interface IScanResult { isClean: boolean; // Whether the content is clean (no threats detected) threatType?: string; // Type of threat if detected threatDetails?: string; // Details about the detected threat threatScore: number; // 0 (clean) to 100 (definitely malicious) scannedElements: string[]; // What was scanned (subject, body, attachments, etc.) timestamp: number; // When this scan was performed } /** * Options for content scanner configuration */ export interface IContentScannerOptions { maxCacheSize?: number; // Maximum number of entries to cache cacheTTL?: number; // TTL for cache entries in ms scanSubject?: boolean; // Whether to scan email subjects scanBody?: boolean; // Whether to scan email bodies scanAttachments?: boolean; // Whether to scan attachments maxAttachmentSizeToScan?: number; // Max size of attachments to scan in bytes scanAttachmentNames?: boolean; // Whether to scan attachment filenames blockExecutables?: boolean; // Whether to block executable attachments blockMacros?: boolean; // Whether to block documents with macros customRules?: Array<{ // Custom scanning rules pattern: string | RegExp; // Pattern to match type: string; // Type of threat score: number; // Threat score description: string; // Description of the threat }>; minThreatScore?: number; // Minimum score to consider content as a threat highThreatScore?: number; // Score above which content is considered high threat } /** * Threat categories */ export enum ThreatCategory { SPAM = 'spam', PHISHING = 'phishing', MALWARE = 'malware', EXECUTABLE = 'executable', SUSPICIOUS_LINK = 'suspicious_link', MALICIOUS_MACRO = 'malicious_macro', XSS = 'xss', SENSITIVE_DATA = 'sensitive_data', BLACKLISTED_CONTENT = 'blacklisted_content', CUSTOM_RULE = 'custom_rule' } /** * Content Scanner for detecting malicious email content */ export class ContentScanner { private static instance: ContentScanner; private scanCache: LRUCache; private options: Required; // Predefined patterns for common threats private static readonly MALICIOUS_PATTERNS = { // Phishing patterns phishing: [ /(?:verify|confirm|update|login).*(?:account|password|details)/i, /urgent.*(?:action|attention|required)/i, /(?:paypal|apple|microsoft|amazon|google|bank).*(?:verify|confirm|suspend)/i, /your.*(?:account).*(?:suspended|compromised|locked)/i, /\b(?:password reset|security alert|security notice)\b/i ], // Spam indicators spam: [ /\b(?:viagra|cialis|enlargement|diet pill|lose weight fast|cheap meds)\b/i, /\b(?:million dollars|lottery winner|prize claim|inheritance|rich widow)\b/i, /\b(?:earn from home|make money fast|earn \$\d{3,}\/day)\b/i, /\b(?:limited time offer|act now|exclusive deal|only \d+ left)\b/i, /\b(?:forex|stock tip|investment opportunity|cryptocurrency|bitcoin)\b/i ], // Malware indicators in text malware: [ /(?:attached file|see attachment).*(?:invoice|receipt|statement|document)/i, /open.*(?:the attached|this attachment)/i, /(?:enable|allow).*(?:macros|content|editing)/i, /download.*(?:attachment|file|document)/i, /\b(?:ransomware protection|virus alert|malware detected)\b/i ], // Suspicious links suspiciousLinks: [ /https?:\/\/bit\.ly\//i, /https?:\/\/goo\.gl\//i, /https?:\/\/t\.co\//i, /https?:\/\/tinyurl\.com\//i, /https?:\/\/(?:\d{1,3}\.){3}\d{1,3}/i, // IP address URLs /https?:\/\/.*\.(?:xyz|top|club|gq|cf)\//i, // Suspicious TLDs /(?:login|account|signin|auth).*\.(?!gov|edu|com|org|net)\w+\.\w+/i, // Login pages on unusual domains ], // XSS and script injection scriptInjection: [ /.*<\/script>/is, /javascript:/i, /on(?:click|load|mouse|error|focus|blur)=".*"/i, /document\.(?:cookie|write|location)/i, /eval\s*\(/i ], // Sensitive data patterns sensitiveData: [ /\b(?:\d{3}-\d{2}-\d{4}|\d{9})\b/, // SSN /\b\d{13,16}\b/, // Credit card numbers /\b(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})\b/ // Possible Base64 ] }; // Common executable extensions private static readonly EXECUTABLE_EXTENSIONS = [ '.exe', '.dll', '.bat', '.cmd', '.msi', '.js', '.vbs', '.ps1', '.sh', '.jar', '.py', '.com', '.scr', '.pif', '.hta', '.cpl', '.reg', '.vba', '.lnk', '.wsf', '.msi', '.msp', '.mst' ]; // Document formats that may contain macros private static readonly MACRO_DOCUMENT_EXTENSIONS = [ '.doc', '.docm', '.xls', '.xlsm', '.ppt', '.pptm', '.dotm', '.xlsb', '.ppam', '.potm' ]; /** * Default options for the content scanner */ private static readonly DEFAULT_OPTIONS: Required = { maxCacheSize: 10000, cacheTTL: 24 * 60 * 60 * 1000, // 24 hours scanSubject: true, scanBody: true, scanAttachments: true, maxAttachmentSizeToScan: 10 * 1024 * 1024, // 10MB scanAttachmentNames: true, blockExecutables: true, blockMacros: true, customRules: [], minThreatScore: 30, // Minimum score to consider content as a threat highThreatScore: 70 // Score above which content is considered high threat }; /** * Constructor for the ContentScanner * @param options Configuration options */ constructor(options: IContentScannerOptions = {}) { // Merge with default options this.options = { ...ContentScanner.DEFAULT_OPTIONS, ...options }; // Initialize cache this.scanCache = new LRUCache({ max: this.options.maxCacheSize, ttl: this.options.cacheTTL, }); logger.log('info', 'ContentScanner initialized'); } /** * Get the singleton instance of the scanner * @param options Configuration options * @returns Singleton scanner instance */ public static getInstance(options: IContentScannerOptions = {}): ContentScanner { if (!ContentScanner.instance) { ContentScanner.instance = new ContentScanner(options); } return ContentScanner.instance; } /** * Scan an email for malicious content * @param email The email to scan * @returns Scan result */ public async scanEmail(email: Email): Promise { try { // Generate a cache key from the email const cacheKey = this.generateCacheKey(email); // Check cache first const cachedResult = this.scanCache.get(cacheKey); if (cachedResult) { logger.log('info', `Using cached scan result for email ${email.getMessageId()}`); return cachedResult; } // Initialize scan result const result: IScanResult = { isClean: true, threatScore: 0, scannedElements: [], timestamp: Date.now() }; // List of scan promises const scanPromises: Array> = []; // Scan subject if (this.options.scanSubject && email.subject) { scanPromises.push(this.scanSubject(email.subject, result)); } // Scan body content if (this.options.scanBody) { if (email.text) { scanPromises.push(this.scanTextContent(email.text, result)); } if (email.html) { scanPromises.push(this.scanHtmlContent(email.html, result)); } } // Scan attachments if (this.options.scanAttachments && email.attachments && email.attachments.length > 0) { for (const attachment of email.attachments) { scanPromises.push(this.scanAttachment(attachment, result)); } } // Run all scans in parallel await Promise.all(scanPromises); // Determine if the email is clean based on threat score result.isClean = result.threatScore < this.options.minThreatScore; // Save to cache this.scanCache.set(cacheKey, result); // Log high threat findings if (result.threatScore >= this.options.highThreatScore) { this.logHighThreatFound(email, result); } else if (!result.isClean) { this.logThreatFound(email, result); } return result; } catch (error) { logger.log('error', `Error scanning email: ${error.message}`, { messageId: email.getMessageId(), error: error.stack }); // Return a safe default with error indication return { isClean: true, // Let it pass if scanner fails (configure as desired) threatScore: 0, scannedElements: ['error'], timestamp: Date.now(), threatType: 'scan_error', threatDetails: `Scan error: ${error.message}` }; } } /** * Generate a cache key from an email * @param email The email to generate a key for * @returns Cache key */ private generateCacheKey(email: Email): string { // Use message ID if available if (email.getMessageId()) { return `email:${email.getMessageId()}`; } // Fallback to a hash of key content const contentToHash = [ email.from, email.subject || '', email.text?.substring(0, 1000) || '', email.html?.substring(0, 1000) || '', email.attachments?.length || 0 ].join(':'); return `email:${plugins.crypto.createHash('sha256').update(contentToHash).digest('hex')}`; } /** * Scan email subject for threats * @param subject The subject to scan * @param result The scan result to update */ private async scanSubject(subject: string, result: IScanResult): Promise { result.scannedElements.push('subject'); // Check against phishing patterns for (const pattern of ContentScanner.MALICIOUS_PATTERNS.phishing) { if (pattern.test(subject)) { result.threatScore += 25; result.threatType = ThreatCategory.PHISHING; result.threatDetails = `Subject contains potential phishing indicators: ${subject}`; return; } } // Check against spam patterns for (const pattern of ContentScanner.MALICIOUS_PATTERNS.spam) { if (pattern.test(subject)) { result.threatScore += 15; result.threatType = ThreatCategory.SPAM; result.threatDetails = `Subject contains potential spam indicators: ${subject}`; return; } } // Check custom rules for (const rule of this.options.customRules) { const pattern = rule.pattern instanceof RegExp ? rule.pattern : new RegExp(rule.pattern, 'i'); if (pattern.test(subject)) { result.threatScore += rule.score; result.threatType = rule.type; result.threatDetails = rule.description; return; } } } /** * Scan plain text content for threats * @param text The text content to scan * @param result The scan result to update */ private async scanTextContent(text: string, result: IScanResult): Promise { result.scannedElements.push('text'); // Check suspicious links for (const pattern of ContentScanner.MALICIOUS_PATTERNS.suspiciousLinks) { if (pattern.test(text)) { result.threatScore += 20; if (!result.threatType || result.threatScore > (result.threatType === ThreatCategory.SUSPICIOUS_LINK ? 0 : 20)) { result.threatType = ThreatCategory.SUSPICIOUS_LINK; result.threatDetails = `Text contains suspicious links`; } } } // Check phishing for (const pattern of ContentScanner.MALICIOUS_PATTERNS.phishing) { if (pattern.test(text)) { result.threatScore += 25; if (!result.threatType || result.threatScore > (result.threatType === ThreatCategory.PHISHING ? 0 : 25)) { result.threatType = ThreatCategory.PHISHING; result.threatDetails = `Text contains potential phishing indicators`; } } } // Check spam for (const pattern of ContentScanner.MALICIOUS_PATTERNS.spam) { if (pattern.test(text)) { result.threatScore += 15; if (!result.threatType || result.threatScore > (result.threatType === ThreatCategory.SPAM ? 0 : 15)) { result.threatType = ThreatCategory.SPAM; result.threatDetails = `Text contains potential spam indicators`; } } } // Check malware indicators for (const pattern of ContentScanner.MALICIOUS_PATTERNS.malware) { if (pattern.test(text)) { result.threatScore += 30; if (!result.threatType || result.threatScore > (result.threatType === ThreatCategory.MALWARE ? 0 : 30)) { result.threatType = ThreatCategory.MALWARE; result.threatDetails = `Text contains potential malware indicators`; } } } // Check sensitive data for (const pattern of ContentScanner.MALICIOUS_PATTERNS.sensitiveData) { if (pattern.test(text)) { result.threatScore += 25; if (!result.threatType || result.threatScore > (result.threatType === ThreatCategory.SENSITIVE_DATA ? 0 : 25)) { result.threatType = ThreatCategory.SENSITIVE_DATA; result.threatDetails = `Text contains potentially sensitive data patterns`; } } } // Check custom rules for (const rule of this.options.customRules) { const pattern = rule.pattern instanceof RegExp ? rule.pattern : new RegExp(rule.pattern, 'i'); if (pattern.test(text)) { result.threatScore += rule.score; if (!result.threatType || result.threatScore > 20) { result.threatType = rule.type; result.threatDetails = rule.description; } } } } /** * Scan HTML content for threats * @param html The HTML content to scan * @param result The scan result to update */ private async scanHtmlContent(html: string, result: IScanResult): Promise { result.scannedElements.push('html'); // Check for script injection for (const pattern of ContentScanner.MALICIOUS_PATTERNS.scriptInjection) { if (pattern.test(html)) { result.threatScore += 40; if (!result.threatType || result.threatType !== ThreatCategory.XSS) { result.threatType = ThreatCategory.XSS; result.threatDetails = `HTML contains potentially malicious script content`; } } } // Extract text content from HTML for further scanning const textContent = this.extractTextFromHtml(html); if (textContent) { // We'll leverage the text scanning but not double-count threat score const tempResult: IScanResult = { isClean: true, threatScore: 0, scannedElements: [], timestamp: Date.now() }; await this.scanTextContent(textContent, tempResult); // Only add additional threat types if they're more severe if (tempResult.threatType && tempResult.threatScore > 0) { // Add half of the text content score to avoid double counting result.threatScore += Math.floor(tempResult.threatScore / 2); // Adopt the threat type if more severe or no existing type if (!result.threatType || tempResult.threatScore > result.threatScore) { result.threatType = tempResult.threatType; result.threatDetails = tempResult.threatDetails; } } } // Extract and check links from HTML const links = this.extractLinksFromHtml(html); if (links.length > 0) { // Check for suspicious links let suspiciousLinks = 0; for (const link of links) { for (const pattern of ContentScanner.MALICIOUS_PATTERNS.suspiciousLinks) { if (pattern.test(link)) { suspiciousLinks++; break; } } } if (suspiciousLinks > 0) { // Add score based on percentage of suspicious links const suspiciousPercentage = (suspiciousLinks / links.length) * 100; const additionalScore = Math.min(40, Math.floor(suspiciousPercentage / 2.5)); result.threatScore += additionalScore; if (!result.threatType || additionalScore > 20) { result.threatType = ThreatCategory.SUSPICIOUS_LINK; result.threatDetails = `HTML contains ${suspiciousLinks} suspicious links out of ${links.length} total links`; } } } } /** * Scan an attachment for threats * @param attachment The attachment to scan * @param result The scan result to update */ private async scanAttachment(attachment: IAttachment, result: IScanResult): Promise { const filename = attachment.filename.toLowerCase(); result.scannedElements.push(`attachment:${filename}`); // Skip large attachments if configured if (attachment.content && attachment.content.length > this.options.maxAttachmentSizeToScan) { logger.log('info', `Skipping scan of large attachment: ${filename} (${attachment.content.length} bytes)`); return; } // Check filename for executable extensions if (this.options.blockExecutables) { for (const ext of ContentScanner.EXECUTABLE_EXTENSIONS) { if (filename.endsWith(ext)) { result.threatScore += 70; // High score for executable attachments result.threatType = ThreatCategory.EXECUTABLE; result.threatDetails = `Attachment has a potentially dangerous extension: ${filename}`; return; // No need to scan contents if filename already flagged } } } // Check for Office documents with macros if (this.options.blockMacros) { for (const ext of ContentScanner.MACRO_DOCUMENT_EXTENSIONS) { if (filename.endsWith(ext)) { // For Office documents, check if they contain macros // This is a simplified check - a real implementation would use specialized libraries // to detect macros in Office documents if (attachment.content && this.likelyContainsMacros(attachment)) { result.threatScore += 60; result.threatType = ThreatCategory.MALICIOUS_MACRO; result.threatDetails = `Attachment appears to contain macros: ${filename}`; return; } } } } // Perform basic content analysis if we have content buffer if (attachment.content) { // Convert to string for scanning, with a limit to prevent memory issues const textContent = this.extractTextFromBuffer(attachment.content); if (textContent) { // Scan for malicious patterns in attachment content for (const category in ContentScanner.MALICIOUS_PATTERNS) { const patterns = ContentScanner.MALICIOUS_PATTERNS[category]; for (const pattern of patterns) { if (pattern.test(textContent)) { result.threatScore += 30; if (!result.threatType) { result.threatType = this.mapCategoryToThreatType(category); result.threatDetails = `Attachment content contains suspicious patterns: ${filename}`; } break; } } } } // Check for PE headers (Windows executables) if (attachment.content.length > 64 && attachment.content[0] === 0x4D && attachment.content[1] === 0x5A) { // 'MZ' header result.threatScore += 80; result.threatType = ThreatCategory.EXECUTABLE; result.threatDetails = `Attachment contains executable code: ${filename}`; } } } /** * Extract links from HTML content * @param html HTML content * @returns Array of extracted links */ private extractLinksFromHtml(html: string): string[] { const links: string[] = []; // Simple regex-based extraction - a real implementation might use a proper HTML parser const matches = html.match(/href=["'](https?:\/\/[^"']+)["']/gi); if (matches) { for (const match of matches) { const linkMatch = match.match(/href=["'](https?:\/\/[^"']+)["']/i); if (linkMatch && linkMatch[1]) { links.push(linkMatch[1]); } } } return links; } /** * Extract plain text from HTML * @param html HTML content * @returns Extracted text */ private extractTextFromHtml(html: string): string { // Remove HTML tags and decode entities - simplified version return html .replace(/]*>.*?<\/style>/gs, '') .replace(/]*>.*?<\/script>/gs, '') .replace(/<[^>]+>/g, ' ') .replace(/ /g, ' ') .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/\s+/g, ' ') .trim(); } /** * Extract text from a binary buffer for scanning * @param buffer Binary content * @returns Extracted text (may be partial) */ private extractTextFromBuffer(buffer: Buffer): string { try { // Limit the amount we convert to avoid memory issues const sampleSize = Math.min(buffer.length, 100 * 1024); // 100KB max sample const sample = buffer.slice(0, sampleSize); // Try to convert to string, filtering out non-printable chars return sample.toString('utf8') .replace(/[\x00-\x09\x0B-\x1F\x7F-\x9F]/g, '') // Remove control chars .replace(/\uFFFD/g, ''); // Remove replacement char } catch (error) { logger.log('warn', `Error extracting text from buffer: ${error.message}`); return ''; } } /** * Check if an Office document likely contains macros * This is a simplified check - real implementation would use specialized libraries * @param attachment The attachment to check * @returns Whether the file likely contains macros */ private likelyContainsMacros(attachment: IAttachment): boolean { // Simple heuristic: look for VBA/macro related strings // This is a simplified approach and not comprehensive const content = this.extractTextFromBuffer(attachment.content); const macroIndicators = [ /vbaProject\.bin/i, /Microsoft VBA/i, /\bVBA\b/, /Auto_Open/i, /AutoExec/i, /DocumentOpen/i, /AutoOpen/i, /\bExecute\(/i, /\bShell\(/i, /\bCreateObject\(/i ]; for (const indicator of macroIndicators) { if (indicator.test(content)) { return true; } } return false; } /** * Map a pattern category to a threat type * @param category The pattern category * @returns The corresponding threat type */ private mapCategoryToThreatType(category: string): string { switch (category) { case 'phishing': return ThreatCategory.PHISHING; case 'spam': return ThreatCategory.SPAM; case 'malware': return ThreatCategory.MALWARE; case 'suspiciousLinks': return ThreatCategory.SUSPICIOUS_LINK; case 'scriptInjection': return ThreatCategory.XSS; case 'sensitiveData': return ThreatCategory.SENSITIVE_DATA; default: return ThreatCategory.BLACKLISTED_CONTENT; } } /** * Log a high threat finding to the security logger * @param email The email containing the threat * @param result The scan result */ private logHighThreatFound(email: Email, result: IScanResult): void { SecurityLogger.getInstance().logEvent({ level: SecurityLogLevel.ERROR, type: SecurityEventType.MALWARE, message: `High threat content detected in email from ${email.from} to ${email.to.join(', ')}`, details: { messageId: email.getMessageId(), threatType: result.threatType, threatDetails: result.threatDetails, threatScore: result.threatScore, scannedElements: result.scannedElements, subject: email.subject }, success: false, domain: email.getFromDomain() }); } /** * Log a threat finding to the security logger * @param email The email containing the threat * @param result The scan result */ private logThreatFound(email: Email, result: IScanResult): void { SecurityLogger.getInstance().logEvent({ level: SecurityLogLevel.WARN, type: SecurityEventType.SPAM, message: `Suspicious content detected in email from ${email.from} to ${email.to.join(', ')}`, details: { messageId: email.getMessageId(), threatType: result.threatType, threatDetails: result.threatDetails, threatScore: result.threatScore, scannedElements: result.scannedElements, subject: email.subject }, success: false, domain: email.getFromDomain() }); } /** * Get threat level description based on score * @param score Threat score * @returns Threat level description */ public static getThreatLevel(score: number): 'none' | 'low' | 'medium' | 'high' { if (score < 20) { return 'none'; } else if (score < 40) { return 'low'; } else if (score < 70) { return 'medium'; } else { return 'high'; } } }