xenu_nntp/lib/nntp/tiny/message.py

146 lines
3.5 KiB
Python
Raw Normal View History

2024-11-08 15:47:21 -05:00
import re
import enum
import datetime
from dateparser.search import search_dates
from email.header import decode_header
2024-11-08 23:11:09 -05:00
from nntp.tiny.db import DatabaseTable
2024-11-08 15:47:21 -05:00
def decode(text: str):
decoded = decode_header(text)[0]
if decoded[1] is None:
return str(decoded[0])
try:
return str(decoded[0], decoded[1])
except:
return str(decoded[0])
class MessageState(enum.Enum):
EMPTY = 0
HEADER = 1
BODY = 2
2024-11-08 23:11:09 -05:00
class Message(DatabaseTable):
__slots__ = 'newsgroup_id', 'state', 'headers', 'line', 'content', 'body', '_key',
name = 'newsgroup_message'
key = 'id'
columns = (
'newsgroup_id',
'message_id',
'created_on',
'sender',
'subject',
'content'
)
2024-11-08 15:47:21 -05:00
2024-11-08 20:31:15 -05:00
RE_HEADER = re.compile(r'^([A-Za-z0-9\-]+): (.*)$')
RE_MESSAGE_ID = re.compile(r'^<([^<>]+)>$')
2024-11-08 15:47:21 -05:00
def __init__(self):
2024-11-08 23:11:09 -05:00
self.newsgroup_id = None
self.state = MessageState.EMPTY
self.headers = dict()
self.line = None
self.content = ''
self.body = None
self._key = None
@staticmethod
def __from_row__(row):
message = Message()
message.parse(row['content'])
return message
2024-11-08 23:11:09 -05:00
def __values__(self):
return (
self.newsgroup_id,
self.id(),
self.date(),
self.sender(),
self.subject(),
self.content
)
2024-11-08 15:47:21 -05:00
def add(self, line: str):
if self.line is not None:
self.content += self.line
if self.state is MessageState.EMPTY:
self.state = MessageState.HEADER
if self.state is MessageState.HEADER:
if line == '\n' or line == '\r\n':
self.state = MessageState.BODY
elif line[0] == ' ' or line[0] == '\t':
2024-11-08 23:11:09 -05:00
self.headers[self._key] += ' ' + decode(line.strip())
2024-11-08 15:47:21 -05:00
else:
2024-11-08 20:31:15 -05:00
match = self.RE_HEADER.match(line)
2024-11-08 15:47:21 -05:00
if match:
2024-11-08 23:11:09 -05:00
self._key = match[1].lower()
2024-11-08 15:47:21 -05:00
2024-11-08 23:11:09 -05:00
self.headers[self._key] = decode(match[2].rstrip())
2024-11-08 15:47:21 -05:00
elif self.state is MessageState.BODY:
if self.body is None:
self.body = ''
else:
self.body += self.line
self.line = line
def header(self, key: str):
return self.headers.get(key.lower())
2024-11-08 20:31:24 -05:00
def id(self) -> str:
match = self.RE_MESSAGE_ID.match(self.header('Message-ID'))
return match[1]
2024-11-08 15:47:21 -05:00
def date(self):
try:
return search_dates(self.headers['date'])[0][1]
except:
return datetime.datetime.fromtimestamp(0)
def sender(self):
return self.headers.get('from', 'Unknown')
def subject(self):
return self.headers.get('subject', '(no subject)')
def is_first_line(self):
return len(self.headers) == 1 and (self.body == '' or self.body is None)
@staticmethod
def each_line(text: str):
start = 0
end = len(text)
while True:
try:
index = text.index('\n', start, end)
yield text[start:index+1]
start = index + 1
if start == end:
break
except ValueError:
yield text[start:end]
break
@staticmethod
def parse(text: str):
message = MBoxMessage()
for line in MBoxMessage.each_line(text):
message.add(line)
return message