import re import enum import datetime from dateparser.search import search_dates from email.header import decode_header def decode(text: str): decoded = decode_header(text)[0] if decoded[1] is None: return str(decoded[0]) try: return str(decoded[0], decoded[1]) except: return str(decoded[0]) class MessageState(enum.Enum): EMPTY = 0 HEADER = 1 BODY = 2 class Message(): __slots__ = 'state', 'headers', 'line', 'content', 'body', 'key', RE_HEADER = re.compile(r'^([A-Za-z0-9\-]+): (.*)$') RE_MESSAGE_ID = re.compile(r'^<([^<>]+)>$') def __init__(self): self.state = MessageState.EMPTY self.headers = dict() self.line = None self.content = '' self.body = None self.key = None def add(self, line: str): if self.line is not None: self.content += self.line if self.state is MessageState.EMPTY: self.state = MessageState.HEADER if self.state is MessageState.HEADER: if line == '\n' or line == '\r\n': self.state = MessageState.BODY elif line[0] == ' ' or line[0] == '\t': self.headers[self.key] += ' ' + decode(line.strip()) else: match = self.RE_HEADER.match(line) if match: self.key = match[1].lower() self.headers[self.key] = decode(match[2].rstrip()) elif self.state is MessageState.BODY: if self.body is None: self.body = '' else: self.body += self.line self.line = line def header(self, key: str): return self.headers.get(key.lower()) def date(self): try: return search_dates(self.headers['date'])[0][1] except: return datetime.datetime.fromtimestamp(0) def sender(self): return self.headers.get('from', 'Unknown') def subject(self): return self.headers.get('subject', '(no subject)') def is_first_line(self): return len(self.headers) == 1 and (self.body == '' or self.body is None) @staticmethod def each_line(text: str): start = 0 end = len(text) while True: try: index = text.index('\n', start, end) yield text[start:index+1] start = index + 1 if start == end: break except ValueError: yield text[start:end] break @staticmethod def parse(text: str): message = MBoxMessage() for line in MBoxMessage.each_line(text): message.add(line) return message