import re import enum import datetime from dateparser.search import search_dates from email.header import decode_header from nntp.tiny.db import DatabaseTable def decode(text: str): decoded = decode_header(text)[0] if decoded[1] is None: return str(decoded[0]) try: return str(decoded[0], decoded[1]) except: return str(decoded[0]) def each_line(text: str): start = 0 end = len(text) while True: try: index = text.index('\n', start, end) yield text[start:index+1] start = index + 1 if start == end: break except ValueError: yield text[start:end] break class MessageState(enum.Enum): EMPTY = 0 HEADER = 1 BODY = 2 class Message(DatabaseTable): __slots__ = ( '_cache', 'id', 'newsgroup_id', 'state', 'headers', 'line', 'content', 'body', '_key' ) name = 'newsgroup_message' key = 'id' columns = ( 'id', 'newsgroup_id', 'created_on', 'message_id', 'parent_id', 'sender', 'subject', 'content' ) RE_HEADER = re.compile(r'^([A-Za-z0-9\-]+): (.*)$') def __init__(self): self._cache = dict() self.id = None self.newsgroup_id = None self.state = MessageState.EMPTY self.headers = None self.line = None self.content = '' self.body = None self._key = None @staticmethod def __from_row__(row): message = Message() message.headers = dict() message.id = row['id'] message.newsgroup_id = row['newsgroup_id'] message.created_on = row['created_on'] message.message_id = row['message_id'] message.parent_id = row['parent_id'] message.sender = row['sender'] message.read(row['content']) return message def __values__(self) -> tuple: return ( self.newsgroup_id, self.created_on, self.message_id, self.parent_id, self.sender, self.subject, self.content ) def header(self, key: str): if self.headers is None: self.read(self.content) return self.headers.get(key.lower()) @property def created_on(self): try: value = self._cache.get('created_on') if value is not None: ret = datetime.datetime.fromisoformat(value) else: value = self.header('Date') if value is None: ret = search_dates(value)[0][1] self._cache['created_on'] = str(ret) except: ret = datetime.datetime.fromtimestamp(0) return ret @created_on.setter def created_on(self, value): self.headers['date'] = str(value) self._cache['created_on'] = str(value) @property def message_id(self) -> str: return self.header('Message-ID') @message_id.setter def message_id(self, value): self.headers['message-id'] = value @property def parent_id(self) -> str: return self.header('References') @parent_id.setter def parent_id(self, value): self.headers['references'] = value @property def sender(self) -> str: return self.headers.get('from', 'Unknown') @sender.setter def sender(self, value): self.headers['from'] = value @property def subject(self) -> str: return self.headers.get('subject', '(no subject)') @subject.setter def subject(self, value): self.headers['subject'] = value def is_first_line(self): return len(self.headers) == 1 and (self.body == '' or self.body is None) def read_line(self, line: str): if self.line is not None: self.content += self.line if self.state is MessageState.EMPTY: self.state = MessageState.HEADER self.headers = dict() if self.state is MessageState.HEADER: if line == '\n' or line == '\r\n': self.state = MessageState.BODY elif line[0] == ' ' or line[0] == '\t': self.headers[self._key] += ' ' + decode(line.strip()) else: match = self.RE_HEADER.match(line) if match: self._key = match[1].lower() self.headers[self._key] = decode(match[2].rstrip()) elif self.state is MessageState.BODY: if self.body is None: self.body = '' else: self.body += self.line self.line = line def read(self, text: str): for line in each_line(text): self.read_line(line) @staticmethod def from_text(text: str): message = Message() for line in each_line(text): message.read_line(line) return message