382 lines
15 KiB
Python
382 lines
15 KiB
Python
import os
|
|
import socket
|
|
import logging
|
|
|
|
from functools import wraps
|
|
from base64 import b64encode
|
|
from time import sleep
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any, Union, TypeVar, Tuple
|
|
|
|
from email.message import EmailMessage
|
|
from email.policy import default as policy
|
|
from email.headerregistry import UniqueDateHeader, UniqueAddressHeader, UniqueUnstructuredHeader
|
|
from email.parser import BytesParser
|
|
from imaplib import IMAP4_SSL, IMAP4
|
|
|
|
from .config import EmailConfig, MailReaderMainConfig
|
|
|
|
|
|
logger = logging.getLogger('Email Reader Service')
|
|
|
|
T = TypeVar('T')
|
|
|
|
|
|
def retry_on_connection_error(max_retries: int = 3, delay: int = 5, backoff: int = 2, exceptions=(Exception,)):
|
|
"""
|
|
Retry decorator with exponential backoff for handling connection errors
|
|
|
|
Args:
|
|
max_retries: Maximum number of retries
|
|
delay: Initial delay between retries in seconds
|
|
backoff: Backoff multiplier
|
|
exceptions: Tuple of exceptions to catch
|
|
Returns: Decorated function
|
|
"""
|
|
def decorator(func):
|
|
@wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
mtries, mdelay = max_retries, delay
|
|
while mtries > 0:
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except exceptions as e:
|
|
logger.warning(f"Connection error in {func.__name__}: {str(e)}, retrying in {mdelay}s...")
|
|
sleep(mdelay)
|
|
mtries -= 1
|
|
mdelay *= backoff
|
|
return func(*args, **kwargs)
|
|
return wrapper
|
|
return decorator
|
|
|
|
|
|
class Mails:
|
|
"""Class representing an email with attachments and metadata"""
|
|
|
|
def __init__(self, mail_id: bytes, mail_data: bytes):
|
|
"""
|
|
Initialize a mail object
|
|
Args: mail_id: Unique identifier for the email, mail_data: Raw email data
|
|
"""
|
|
self.id: bytes = mail_id
|
|
self.raw_data: bytes = mail_data
|
|
self.attachments: List[Dict[str, Union[str, bytes]]] = []
|
|
self.message: EmailMessage = BytesParser(policy=policy).parsebytes(mail_data)
|
|
self.subject: UniqueUnstructuredHeader = self.message.get('Subject', '') or ''
|
|
self.from_: UniqueAddressHeader = self.message.get('From', '') or ''
|
|
self.to: UniqueAddressHeader = self.message.get('To', '') or ''
|
|
self.date: UniqueDateHeader = self.message.get('Date', '') or ''
|
|
self.body_text: str = self._get_body_text()
|
|
self._extract_attachments()
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""
|
|
Convert mail object to dictionary representation
|
|
Returns: Dictionary representation of mail
|
|
"""
|
|
return {
|
|
'id': self.id.decode('utf-8'),
|
|
'attachments': [{
|
|
'filename': attachment['filename'], 'content_type': attachment['content_type'], 'charset': attachment['charset'],
|
|
'data': b64encode(attachment['data']).decode(attachment['charset'], errors='replace')
|
|
} for attachment in self.attachments],
|
|
'subject': str(self.subject),
|
|
'from_': {
|
|
"display_name": self.from_.addresses[0].display_name, "username": self.from_.addresses[0].username,
|
|
"domain": self.from_.addresses[0].domain, "mail": f"{self.from_.addresses[0].username}@{self.from_.addresses[0].domain}"
|
|
},
|
|
'to': [
|
|
{
|
|
"display_name": address.display_name, "username": address.username, "domain": address.domain,
|
|
"mail": f"{address.username}@{address.domain}" } for address in self.to.addresses
|
|
], 'date': str(self.date.datetime), 'body_text': str(self.body_text)
|
|
}
|
|
|
|
def _get_body_text(self) -> str:
|
|
"""
|
|
Extract plain text body from email
|
|
Returns: Plain text body of email
|
|
"""
|
|
body = self.message.get_body(preferencelist=('plain',))
|
|
if body is not None:
|
|
return body.get_content() or ''
|
|
if self.message.is_multipart():
|
|
for part in self.message.walk():
|
|
if part.get_content_type() == 'text/plain' and (part.get_content_disposition() or '') != 'attachment':
|
|
try:
|
|
return part.get_content() or ''
|
|
except Exception:
|
|
payload = part.get_payload(decode=True) or b''
|
|
return payload.decode(part.get_content_charset() or 'utf-8', errors='replace')
|
|
else:
|
|
if self.message.get_content_type() == 'text/plain':
|
|
try:
|
|
return self.message.get_content() or ''
|
|
except Exception:
|
|
payload = self.message.get_payload(decode=True) or b''
|
|
return payload.decode(self.message.get_content_charset() or 'utf-8', errors='replace')
|
|
return ''
|
|
|
|
def _extract_attachments(self) -> None:
|
|
"""Extract attachments from email"""
|
|
for part in self.message.walk():
|
|
if part.get_content_disposition() == 'attachment':
|
|
filename = part.get_filename()
|
|
if not filename:
|
|
continue
|
|
data = part.get_payload(decode=True) or b''
|
|
charset = part.get_charset() or 'utf-8'
|
|
self.attachments.append({'filename': filename, 'content_type': part.get_content_type(), 'data': data, 'charset': charset})
|
|
|
|
def save_attachments(self, folder: str) -> None:
|
|
"""
|
|
Save attachments to folder
|
|
Args: folder: Folder to save attachments to
|
|
"""
|
|
os.makedirs(folder, exist_ok=True)
|
|
for att in self.attachments:
|
|
with open(os.path.join(folder, att['filename']), 'wb') as f:
|
|
f.write(att['data'])
|
|
|
|
|
|
class EmailReaderService:
|
|
|
|
"""Service for reading emails from mailbox with improved connection resilience"""
|
|
|
|
def __init__(self, config: MailReaderMainConfig):
|
|
"""
|
|
Initialize email reader service
|
|
Args: config: Application configuration
|
|
"""
|
|
self.email_config = EmailConfig()
|
|
self.config = config
|
|
self.mail = None
|
|
self.data: List[Mails] = []
|
|
self.mail_count = 0
|
|
self.is_connected = False
|
|
self.connect_imap()
|
|
|
|
def connect_imap(self) -> bool:
|
|
"""
|
|
Establish IMAP connection with retry mechanism
|
|
Returns: True if connection successful, False otherwise
|
|
"""
|
|
try:
|
|
if self.mail:
|
|
try:
|
|
self.mail.close()
|
|
self.mail.logout()
|
|
except Exception:
|
|
pass
|
|
logger.info(f"Connecting to IMAP server {self.email_config.HOST}:{self.email_config.PORT}")
|
|
self.mail = IMAP4_SSL(self.email_config.HOST, self.email_config.PORT)
|
|
self.is_connected = True
|
|
return True
|
|
except (socket.error, IMAP4.error) as e:
|
|
logger.error(f"Failed to connect to IMAP server: {str(e)}")
|
|
self.is_connected = False
|
|
return False
|
|
|
|
@retry_on_connection_error(max_retries=3, delay=5, exceptions=(socket.error, IMAP4.error, OSError))
|
|
def login_and_connect(self) -> bool:
|
|
"""
|
|
Login to IMAP server and connect to inbox with retry mechanism
|
|
Returns: True if login successful, False otherwise
|
|
Raises: ConnectionError: If connection cannot be established
|
|
"""
|
|
if not self.is_connected:
|
|
if not self.connect_imap():
|
|
raise ConnectionError("Cannot establish connection to IMAP server")
|
|
|
|
try:
|
|
logger.info(f"Logging in as {self.email_config.USERNAME}")
|
|
self.mail.login(self.email_config.USERNAME, self.email_config.PASSWORD)
|
|
self._connect_inbox()
|
|
logger.info("Successfully logged in and connected to inbox")
|
|
return True
|
|
except (socket.error, IMAP4.error) as e:
|
|
logger.error(f"Login failed: {str(e)}")
|
|
self.is_connected = False
|
|
raise
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=3, exceptions=(socket.error, IMAP4.error, OSError))
|
|
def refresh(self) -> Tuple[List[Mails], int, int]:
|
|
"""
|
|
Refresh mail data with connection retry
|
|
Returns: Tuple of (mail data, mail count, data length)
|
|
"""
|
|
try:
|
|
self.mail_count = self._fetch_count()
|
|
self.data = self._fetch_all()
|
|
return self.data, self.mail_count, len(self.data)
|
|
except (socket.error, IMAP4.error) as e:
|
|
logger.error(f"Refresh failed, attempting to reconnect: {str(e)}")
|
|
self.connect_imap()
|
|
self.login_and_connect()
|
|
self.mail_count = self._fetch_count()
|
|
self.data = self._fetch_all()
|
|
return self.data, self.mail_count, len(self.data)
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=2, exceptions=(socket.error, IMAP4.error))
|
|
def _connect_inbox(self) -> None:
|
|
"""
|
|
Connect to INBOX with retry mechanism
|
|
Raises: IMAP4.error: If connection to INBOX fails
|
|
"""
|
|
logger.info("Selecting INBOX folder")
|
|
status, _ = self.mail.select("INBOX")
|
|
if status != 'OK':
|
|
error_msg = "Failed to connect to INBOX"
|
|
logger.error(error_msg)
|
|
raise IMAP4.error(error_msg)
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=2, exceptions=(socket.error, IMAP4.error))
|
|
def _fetch_count(self) -> int:
|
|
"""
|
|
Fetch mail count with retry mechanism
|
|
Returns: Number of emails
|
|
Raises: IMAP4.error: If fetching mail count fails
|
|
"""
|
|
try:
|
|
status, uids = self.mail.uid('SORT', '(REVERSE DATE)', 'UTF-8', 'ALL', 'FROM', f'"{self.config.MAILBOX}"')
|
|
if status != 'OK':
|
|
raise IMAP4.error("Failed to get mail count")
|
|
count = len(uids[0].split()) if uids[0] else 0
|
|
logger.info(f"Found {count} emails from {self.config.MAILBOX}")
|
|
return count
|
|
except (socket.error, IMAP4.error) as e:
|
|
logger.error(f"Error fetching mail count: {str(e)}")
|
|
raise
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=2, exceptions=(socket.error, IMAP4.error))
|
|
def _fetch_all(self) -> List[Mails]:
|
|
"""
|
|
Fetch all mails with retry mechanism
|
|
Returns: List of mail objects
|
|
Raises: IMAP4.error: If fetching mails fails
|
|
"""
|
|
self.data = []
|
|
try:
|
|
status, uids = self.mail.uid('SORT', '(REVERSE DATE)', 'UTF-8', 'ALL', 'FROM', f'"{self.config.MAILBOX}"')
|
|
if status != 'OK':
|
|
raise IMAP4.error("Mail search failed")
|
|
if not uids[0]:
|
|
logger.info("No emails found matching criteria")
|
|
return self.data
|
|
uid_list = uids[0].split()
|
|
logger.info(f"Processing {len(uid_list)} emails")
|
|
for uid in uid_list:
|
|
try:
|
|
status, msg_data = self.mail.uid('fetch', uid, '(RFC822)')
|
|
if status == 'OK' and msg_data[0] is not None:
|
|
self.data.append(Mails(uid, msg_data[0][1]))
|
|
except Exception as e:
|
|
logger.warning(f"Failed to fetch email with UID {uid}: {str(e)}")
|
|
continue
|
|
logger.info(f"Successfully fetched {len(self.data)} emails")
|
|
return self.data
|
|
except (socket.error, IMAP4.error) as e:
|
|
logger.error(f"Error fetching emails: {str(e)}")
|
|
raise
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def move_to_folder(self, uid: Union[str, bytes], folder: str):
|
|
"""
|
|
Move message to folder with retry mechanism
|
|
Args: uid: Email UID, folder: Destination folder
|
|
"""
|
|
try:
|
|
log_uid = uid
|
|
if isinstance(uid, bytes):
|
|
log_uid = uid.decode('utf-8', errors='replace')
|
|
elif isinstance(uid, str):
|
|
uid = uid.encode('utf-8')
|
|
logger.info(f"Moving email {log_uid} to {folder} folder")
|
|
self.mail.uid('MOVE', uid, folder)
|
|
self.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to move email to folder: {str(e)}")
|
|
return False
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def copy_to_folder(self, uid: Union[str, bytes], folder: str):
|
|
"""
|
|
Copy message to folder with retry mechanism
|
|
Args: uid: Email UID, folder: Destination folder
|
|
"""
|
|
try:
|
|
log_uid = uid
|
|
if isinstance(uid, bytes):
|
|
log_uid = uid.decode('utf-8', errors='replace')
|
|
elif isinstance(uid, str):
|
|
uid = uid.encode('utf-8')
|
|
logger.info(f"Copying email {log_uid} to {folder} folder")
|
|
self.mail.uid('COPY', uid, folder)
|
|
self.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to copy email to folder: {str(e)}")
|
|
return False
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def mark_no_attachment(self, uid: Union[str, bytes]):
|
|
"""
|
|
Move message to no attachment folder with retry mechanism
|
|
Args: uid: Email UID
|
|
"""
|
|
self.move_to_folder(uid, self.config.NO_ATTACHMENT_FOLDER)
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def mark_completed(self, uid: Union[str, bytes]):
|
|
"""
|
|
Move message to completed folder with retry mechanism
|
|
Args: uid: Email UID
|
|
"""
|
|
self.move_to_folder(uid, self.config.COMPLETED_FOLDER)
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def delete(self, uid):
|
|
"""
|
|
Delete message with retry mechanism
|
|
Args: uid: Email UID
|
|
"""
|
|
try:
|
|
log_uid = uid
|
|
if isinstance(uid, bytes):
|
|
log_uid = uid.decode('utf-8', errors='replace')
|
|
logger.info(f"Marking email {log_uid} for deletion")
|
|
self.mail.uid('STORE', uid, '+FLAGS', r'(\Deleted)')
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete email: {str(e)}")
|
|
raise
|
|
|
|
@retry_on_connection_error(max_retries=2, delay=1, exceptions=(socket.error, IMAP4.error))
|
|
def commit(self):
|
|
"""
|
|
Commit pending operations with retry mechanism
|
|
Raises: Exception: If commit fails
|
|
"""
|
|
try:
|
|
logger.info("Committing changes (expunge)")
|
|
self.mail.expunge()
|
|
except Exception as e:
|
|
logger.error(f"Failed to commit changes: {str(e)}")
|
|
raise
|
|
|
|
def logout(self):
|
|
"""Logout from IMAP server"""
|
|
if self.mail and self.is_connected:
|
|
try:
|
|
logger.info("Logging out from IMAP server")
|
|
self.mail.close()
|
|
self.mail.logout()
|
|
self.is_connected = False
|
|
except Exception as e:
|
|
logger.warning(f"Logout failed: {str(e)}")
|
|
|
|
@property
|
|
def count(self):
|
|
"""Get count of emails"""
|
|
return len(self.data)
|