Update 2026-05-13 16:43:53
This commit is contained in:
@@ -0,0 +1,552 @@
|
||||
"""Email channel implementation using IMAP polling + SMTP replies."""
|
||||
|
||||
import asyncio
|
||||
import html
|
||||
import imaplib
|
||||
import re
|
||||
import smtplib
|
||||
import ssl
|
||||
from datetime import date
|
||||
from email import policy
|
||||
from email.header import decode_header, make_header
|
||||
from email.message import EmailMessage
|
||||
from email.parser import BytesParser
|
||||
from email.utils import parseaddr
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import Field
|
||||
|
||||
from nanobot.bus.events import OutboundMessage
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.channels.base import BaseChannel
|
||||
from nanobot.config.schema import Base
|
||||
|
||||
|
||||
class EmailConfig(Base):
|
||||
"""Email channel configuration (IMAP inbound + SMTP outbound)."""
|
||||
|
||||
enabled: bool = False
|
||||
consent_granted: bool = False
|
||||
|
||||
imap_host: str = ""
|
||||
imap_port: int = 993
|
||||
imap_username: str = ""
|
||||
imap_password: str = ""
|
||||
imap_mailbox: str = "INBOX"
|
||||
imap_use_ssl: bool = True
|
||||
|
||||
smtp_host: str = ""
|
||||
smtp_port: int = 587
|
||||
smtp_username: str = ""
|
||||
smtp_password: str = ""
|
||||
smtp_use_tls: bool = True
|
||||
smtp_use_ssl: bool = False
|
||||
from_address: str = ""
|
||||
|
||||
auto_reply_enabled: bool = True
|
||||
poll_interval_seconds: int = 30
|
||||
mark_seen: bool = True
|
||||
max_body_chars: int = 12000
|
||||
subject_prefix: str = "Re: "
|
||||
allow_from: list[str] = Field(default_factory=list)
|
||||
|
||||
# Email authentication verification (anti-spoofing)
|
||||
verify_dkim: bool = True # Require Authentication-Results with dkim=pass
|
||||
verify_spf: bool = True # Require Authentication-Results with spf=pass
|
||||
|
||||
|
||||
class EmailChannel(BaseChannel):
|
||||
"""
|
||||
Email channel.
|
||||
|
||||
Inbound:
|
||||
- Poll IMAP mailbox for unread messages.
|
||||
- Convert each message into an inbound event.
|
||||
|
||||
Outbound:
|
||||
- Send responses via SMTP back to the sender address.
|
||||
"""
|
||||
|
||||
name = "email"
|
||||
display_name = "Email"
|
||||
_IMAP_MONTHS = (
|
||||
"Jan",
|
||||
"Feb",
|
||||
"Mar",
|
||||
"Apr",
|
||||
"May",
|
||||
"Jun",
|
||||
"Jul",
|
||||
"Aug",
|
||||
"Sep",
|
||||
"Oct",
|
||||
"Nov",
|
||||
"Dec",
|
||||
)
|
||||
_IMAP_RECONNECT_MARKERS = (
|
||||
"disconnected for inactivity",
|
||||
"eof occurred in violation of protocol",
|
||||
"socket error",
|
||||
"connection reset",
|
||||
"broken pipe",
|
||||
"bye",
|
||||
)
|
||||
_IMAP_MISSING_MAILBOX_MARKERS = (
|
||||
"mailbox doesn't exist",
|
||||
"select failed",
|
||||
"no such mailbox",
|
||||
"can't open mailbox",
|
||||
"does not exist",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def default_config(cls) -> dict[str, Any]:
|
||||
return EmailConfig().model_dump(by_alias=True)
|
||||
|
||||
def __init__(self, config: Any, bus: MessageBus):
|
||||
if isinstance(config, dict):
|
||||
config = EmailConfig.model_validate(config)
|
||||
super().__init__(config, bus)
|
||||
self.config: EmailConfig = config
|
||||
self._last_subject_by_chat: dict[str, str] = {}
|
||||
self._last_message_id_by_chat: dict[str, str] = {}
|
||||
self._processed_uids: set[str] = set() # Capped to prevent unbounded growth
|
||||
self._MAX_PROCESSED_UIDS = 100000
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start polling IMAP for inbound emails."""
|
||||
if not self.config.consent_granted:
|
||||
logger.warning(
|
||||
"Email channel disabled: consent_granted is false. "
|
||||
"Set channels.email.consentGranted=true after explicit user permission."
|
||||
)
|
||||
return
|
||||
|
||||
if not self._validate_config():
|
||||
return
|
||||
|
||||
self._running = True
|
||||
if not self.config.verify_dkim and not self.config.verify_spf:
|
||||
logger.warning(
|
||||
"Email channel: DKIM and SPF verification are both DISABLED. "
|
||||
"Emails with spoofed From headers will be accepted. "
|
||||
"Set verify_dkim=true and verify_spf=true for anti-spoofing protection."
|
||||
)
|
||||
logger.info("Starting Email channel (IMAP polling mode)...")
|
||||
|
||||
poll_seconds = max(5, int(self.config.poll_interval_seconds))
|
||||
while self._running:
|
||||
try:
|
||||
inbound_items = await asyncio.to_thread(self._fetch_new_messages)
|
||||
for item in inbound_items:
|
||||
sender = item["sender"]
|
||||
subject = item.get("subject", "")
|
||||
message_id = item.get("message_id", "")
|
||||
|
||||
if subject:
|
||||
self._last_subject_by_chat[sender] = subject
|
||||
if message_id:
|
||||
self._last_message_id_by_chat[sender] = message_id
|
||||
|
||||
await self._handle_message(
|
||||
sender_id=sender,
|
||||
chat_id=sender,
|
||||
content=item["content"],
|
||||
metadata=item.get("metadata", {}),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Email polling error: {}", e)
|
||||
|
||||
await asyncio.sleep(poll_seconds)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop polling loop."""
|
||||
self._running = False
|
||||
|
||||
async def send(self, msg: OutboundMessage) -> None:
|
||||
"""Send email via SMTP."""
|
||||
if not self.config.consent_granted:
|
||||
logger.warning("Skip email send: consent_granted is false")
|
||||
return
|
||||
|
||||
if not self.config.smtp_host:
|
||||
logger.warning("Email channel SMTP host not configured")
|
||||
return
|
||||
|
||||
to_addr = msg.chat_id.strip()
|
||||
if not to_addr:
|
||||
logger.warning("Email channel missing recipient address")
|
||||
return
|
||||
|
||||
# Determine if this is a reply (recipient has sent us an email before)
|
||||
is_reply = to_addr in self._last_subject_by_chat
|
||||
force_send = bool((msg.metadata or {}).get("force_send"))
|
||||
|
||||
# autoReplyEnabled only controls automatic replies, not proactive sends
|
||||
if is_reply and not self.config.auto_reply_enabled and not force_send:
|
||||
logger.info("Skip automatic email reply to {}: auto_reply_enabled is false", to_addr)
|
||||
return
|
||||
|
||||
base_subject = self._last_subject_by_chat.get(to_addr, "nanobot reply")
|
||||
subject = self._reply_subject(base_subject)
|
||||
if msg.metadata and isinstance(msg.metadata.get("subject"), str):
|
||||
override = msg.metadata["subject"].strip()
|
||||
if override:
|
||||
subject = override
|
||||
|
||||
email_msg = EmailMessage()
|
||||
email_msg["From"] = self.config.from_address or self.config.smtp_username or self.config.imap_username
|
||||
email_msg["To"] = to_addr
|
||||
email_msg["Subject"] = subject
|
||||
email_msg.set_content(msg.content or "")
|
||||
|
||||
in_reply_to = self._last_message_id_by_chat.get(to_addr)
|
||||
if in_reply_to:
|
||||
email_msg["In-Reply-To"] = in_reply_to
|
||||
email_msg["References"] = in_reply_to
|
||||
|
||||
try:
|
||||
await asyncio.to_thread(self._smtp_send, email_msg)
|
||||
except Exception as e:
|
||||
logger.error("Error sending email to {}: {}", to_addr, e)
|
||||
raise
|
||||
|
||||
def _validate_config(self) -> bool:
|
||||
missing = []
|
||||
if not self.config.imap_host:
|
||||
missing.append("imap_host")
|
||||
if not self.config.imap_username:
|
||||
missing.append("imap_username")
|
||||
if not self.config.imap_password:
|
||||
missing.append("imap_password")
|
||||
if not self.config.smtp_host:
|
||||
missing.append("smtp_host")
|
||||
if not self.config.smtp_username:
|
||||
missing.append("smtp_username")
|
||||
if not self.config.smtp_password:
|
||||
missing.append("smtp_password")
|
||||
|
||||
if missing:
|
||||
logger.error("Email channel not configured, missing: {}", ', '.join(missing))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _smtp_send(self, msg: EmailMessage) -> None:
|
||||
timeout = 30
|
||||
if self.config.smtp_use_ssl:
|
||||
with smtplib.SMTP_SSL(
|
||||
self.config.smtp_host,
|
||||
self.config.smtp_port,
|
||||
timeout=timeout,
|
||||
) as smtp:
|
||||
smtp.login(self.config.smtp_username, self.config.smtp_password)
|
||||
smtp.send_message(msg)
|
||||
return
|
||||
|
||||
with smtplib.SMTP(self.config.smtp_host, self.config.smtp_port, timeout=timeout) as smtp:
|
||||
if self.config.smtp_use_tls:
|
||||
smtp.starttls(context=ssl.create_default_context())
|
||||
smtp.login(self.config.smtp_username, self.config.smtp_password)
|
||||
smtp.send_message(msg)
|
||||
|
||||
def _fetch_new_messages(self) -> list[dict[str, Any]]:
|
||||
"""Poll IMAP and return parsed unread messages."""
|
||||
return self._fetch_messages(
|
||||
search_criteria=("UNSEEN",),
|
||||
mark_seen=self.config.mark_seen,
|
||||
dedupe=True,
|
||||
limit=0,
|
||||
)
|
||||
|
||||
def fetch_messages_between_dates(
|
||||
self,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
limit: int = 20,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Fetch messages in [start_date, end_date) by IMAP date search.
|
||||
|
||||
This is used for historical summarization tasks (e.g. "yesterday").
|
||||
"""
|
||||
if end_date <= start_date:
|
||||
return []
|
||||
|
||||
return self._fetch_messages(
|
||||
search_criteria=(
|
||||
"SINCE",
|
||||
self._format_imap_date(start_date),
|
||||
"BEFORE",
|
||||
self._format_imap_date(end_date),
|
||||
),
|
||||
mark_seen=False,
|
||||
dedupe=False,
|
||||
limit=max(1, int(limit)),
|
||||
)
|
||||
|
||||
def _fetch_messages(
|
||||
self,
|
||||
search_criteria: tuple[str, ...],
|
||||
mark_seen: bool,
|
||||
dedupe: bool,
|
||||
limit: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
messages: list[dict[str, Any]] = []
|
||||
cycle_uids: set[str] = set()
|
||||
|
||||
for attempt in range(2):
|
||||
try:
|
||||
self._fetch_messages_once(
|
||||
search_criteria,
|
||||
mark_seen,
|
||||
dedupe,
|
||||
limit,
|
||||
messages,
|
||||
cycle_uids,
|
||||
)
|
||||
return messages
|
||||
except Exception as exc:
|
||||
if attempt == 1 or not self._is_stale_imap_error(exc):
|
||||
raise
|
||||
logger.warning("Email IMAP connection went stale, retrying once: {}", exc)
|
||||
|
||||
return messages
|
||||
|
||||
def _fetch_messages_once(
|
||||
self,
|
||||
search_criteria: tuple[str, ...],
|
||||
mark_seen: bool,
|
||||
dedupe: bool,
|
||||
limit: int,
|
||||
messages: list[dict[str, Any]],
|
||||
cycle_uids: set[str],
|
||||
) -> None:
|
||||
"""Fetch messages by arbitrary IMAP search criteria."""
|
||||
mailbox = self.config.imap_mailbox or "INBOX"
|
||||
|
||||
if self.config.imap_use_ssl:
|
||||
client = imaplib.IMAP4_SSL(self.config.imap_host, self.config.imap_port)
|
||||
else:
|
||||
client = imaplib.IMAP4(self.config.imap_host, self.config.imap_port)
|
||||
|
||||
try:
|
||||
client.login(self.config.imap_username, self.config.imap_password)
|
||||
try:
|
||||
status, _ = client.select(mailbox)
|
||||
except Exception as exc:
|
||||
if self._is_missing_mailbox_error(exc):
|
||||
logger.warning("Email mailbox unavailable, skipping poll for {}: {}", mailbox, exc)
|
||||
return messages
|
||||
raise
|
||||
if status != "OK":
|
||||
logger.warning("Email mailbox select returned {}, skipping poll for {}", status, mailbox)
|
||||
return messages
|
||||
|
||||
status, data = client.search(None, *search_criteria)
|
||||
if status != "OK" or not data:
|
||||
return messages
|
||||
|
||||
ids = data[0].split()
|
||||
if limit > 0 and len(ids) > limit:
|
||||
ids = ids[-limit:]
|
||||
for imap_id in ids:
|
||||
status, fetched = client.fetch(imap_id, "(BODY.PEEK[] UID)")
|
||||
if status != "OK" or not fetched:
|
||||
continue
|
||||
|
||||
raw_bytes = self._extract_message_bytes(fetched)
|
||||
if raw_bytes is None:
|
||||
continue
|
||||
|
||||
uid = self._extract_uid(fetched)
|
||||
if uid and uid in cycle_uids:
|
||||
continue
|
||||
if dedupe and uid and uid in self._processed_uids:
|
||||
continue
|
||||
|
||||
parsed = BytesParser(policy=policy.default).parsebytes(raw_bytes)
|
||||
sender = parseaddr(parsed.get("From", ""))[1].strip().lower()
|
||||
if not sender:
|
||||
continue
|
||||
|
||||
# --- Anti-spoofing: verify Authentication-Results ---
|
||||
spf_pass, dkim_pass = self._check_authentication_results(parsed)
|
||||
if self.config.verify_spf and not spf_pass:
|
||||
logger.warning(
|
||||
"Email from {} rejected: SPF verification failed "
|
||||
"(no 'spf=pass' in Authentication-Results header)",
|
||||
sender,
|
||||
)
|
||||
continue
|
||||
if self.config.verify_dkim and not dkim_pass:
|
||||
logger.warning(
|
||||
"Email from {} rejected: DKIM verification failed "
|
||||
"(no 'dkim=pass' in Authentication-Results header)",
|
||||
sender,
|
||||
)
|
||||
continue
|
||||
|
||||
subject = self._decode_header_value(parsed.get("Subject", ""))
|
||||
date_value = parsed.get("Date", "")
|
||||
message_id = parsed.get("Message-ID", "").strip()
|
||||
body = self._extract_text_body(parsed)
|
||||
|
||||
if not body:
|
||||
body = "(empty email body)"
|
||||
|
||||
body = body[: self.config.max_body_chars]
|
||||
content = (
|
||||
f"[EMAIL-CONTEXT] Email received.\n"
|
||||
f"From: {sender}\n"
|
||||
f"Subject: {subject}\n"
|
||||
f"Date: {date_value}\n\n"
|
||||
f"{body}"
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"message_id": message_id,
|
||||
"subject": subject,
|
||||
"date": date_value,
|
||||
"sender_email": sender,
|
||||
"uid": uid,
|
||||
}
|
||||
messages.append(
|
||||
{
|
||||
"sender": sender,
|
||||
"subject": subject,
|
||||
"message_id": message_id,
|
||||
"content": content,
|
||||
"metadata": metadata,
|
||||
}
|
||||
)
|
||||
|
||||
if uid:
|
||||
cycle_uids.add(uid)
|
||||
if dedupe and uid:
|
||||
self._processed_uids.add(uid)
|
||||
# mark_seen is the primary dedup; this set is a safety net
|
||||
if len(self._processed_uids) > self._MAX_PROCESSED_UIDS:
|
||||
# Evict a random half to cap memory; mark_seen is the primary dedup
|
||||
self._processed_uids = set(list(self._processed_uids)[len(self._processed_uids) // 2:])
|
||||
|
||||
if mark_seen:
|
||||
client.store(imap_id, "+FLAGS", "\\Seen")
|
||||
finally:
|
||||
try:
|
||||
client.logout()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _is_stale_imap_error(cls, exc: Exception) -> bool:
|
||||
message = str(exc).lower()
|
||||
return any(marker in message for marker in cls._IMAP_RECONNECT_MARKERS)
|
||||
|
||||
@classmethod
|
||||
def _is_missing_mailbox_error(cls, exc: Exception) -> bool:
|
||||
message = str(exc).lower()
|
||||
return any(marker in message for marker in cls._IMAP_MISSING_MAILBOX_MARKERS)
|
||||
|
||||
@classmethod
|
||||
def _format_imap_date(cls, value: date) -> str:
|
||||
"""Format date for IMAP search (always English month abbreviations)."""
|
||||
month = cls._IMAP_MONTHS[value.month - 1]
|
||||
return f"{value.day:02d}-{month}-{value.year}"
|
||||
|
||||
@staticmethod
|
||||
def _extract_message_bytes(fetched: list[Any]) -> bytes | None:
|
||||
for item in fetched:
|
||||
if isinstance(item, tuple) and len(item) >= 2 and isinstance(item[1], (bytes, bytearray)):
|
||||
return bytes(item[1])
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _extract_uid(fetched: list[Any]) -> str:
|
||||
for item in fetched:
|
||||
if isinstance(item, tuple) and item and isinstance(item[0], (bytes, bytearray)):
|
||||
head = bytes(item[0]).decode("utf-8", errors="ignore")
|
||||
m = re.search(r"UID\s+(\d+)", head)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _decode_header_value(value: str) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
return str(make_header(decode_header(value)))
|
||||
except Exception:
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def _extract_text_body(cls, msg: Any) -> str:
|
||||
"""Best-effort extraction of readable body text."""
|
||||
if msg.is_multipart():
|
||||
plain_parts: list[str] = []
|
||||
html_parts: list[str] = []
|
||||
for part in msg.walk():
|
||||
if part.get_content_disposition() == "attachment":
|
||||
continue
|
||||
content_type = part.get_content_type()
|
||||
try:
|
||||
payload = part.get_content()
|
||||
except Exception:
|
||||
payload_bytes = part.get_payload(decode=True) or b""
|
||||
charset = part.get_content_charset() or "utf-8"
|
||||
payload = payload_bytes.decode(charset, errors="replace")
|
||||
if not isinstance(payload, str):
|
||||
continue
|
||||
if content_type == "text/plain":
|
||||
plain_parts.append(payload)
|
||||
elif content_type == "text/html":
|
||||
html_parts.append(payload)
|
||||
if plain_parts:
|
||||
return "\n\n".join(plain_parts).strip()
|
||||
if html_parts:
|
||||
return cls._html_to_text("\n\n".join(html_parts)).strip()
|
||||
return ""
|
||||
|
||||
try:
|
||||
payload = msg.get_content()
|
||||
except Exception:
|
||||
payload_bytes = msg.get_payload(decode=True) or b""
|
||||
charset = msg.get_content_charset() or "utf-8"
|
||||
payload = payload_bytes.decode(charset, errors="replace")
|
||||
if not isinstance(payload, str):
|
||||
return ""
|
||||
if msg.get_content_type() == "text/html":
|
||||
return cls._html_to_text(payload).strip()
|
||||
return payload.strip()
|
||||
|
||||
@staticmethod
|
||||
def _check_authentication_results(parsed_msg: Any) -> tuple[bool, bool]:
|
||||
"""Parse Authentication-Results headers for SPF and DKIM verdicts.
|
||||
|
||||
Returns:
|
||||
A tuple of (spf_pass, dkim_pass) booleans.
|
||||
"""
|
||||
spf_pass = False
|
||||
dkim_pass = False
|
||||
for ar_header in parsed_msg.get_all("Authentication-Results") or []:
|
||||
ar_lower = ar_header.lower()
|
||||
if re.search(r"\bspf\s*=\s*pass\b", ar_lower):
|
||||
spf_pass = True
|
||||
if re.search(r"\bdkim\s*=\s*pass\b", ar_lower):
|
||||
dkim_pass = True
|
||||
return spf_pass, dkim_pass
|
||||
|
||||
@staticmethod
|
||||
def _html_to_text(raw_html: str) -> str:
|
||||
text = re.sub(r"<\s*br\s*/?>", "\n", raw_html, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<\s*/\s*p\s*>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
return html.unescape(text)
|
||||
|
||||
def _reply_subject(self, base_subject: str) -> str:
|
||||
subject = (base_subject or "").strip() or "nanobot reply"
|
||||
prefix = self.config.subject_prefix or "Re: "
|
||||
if subject.lower().startswith("re:"):
|
||||
return subject
|
||||
return f"{prefix}{subject}"
|
||||
Reference in New Issue
Block a user