From 5e427890439e1ea24ee6aa4beafcc3221c6a2d18 Mon Sep 17 00:00:00 2001 From: XANTRONIX Development Date: Fri, 8 Nov 2024 15:47:21 -0500 Subject: [PATCH] Commit stuff --- db/newsgroup.sql | 11 ++++ lib/nntp/tiny/db.py | 26 +++++++++ lib/nntp/tiny/mbox.py | 111 ++------------------------------------- lib/nntp/tiny/message.py | 108 +++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 107 deletions(-) create mode 100644 db/newsgroup.sql create mode 100644 lib/nntp/tiny/db.py create mode 100644 lib/nntp/tiny/message.py diff --git a/db/newsgroup.sql b/db/newsgroup.sql new file mode 100644 index 0000000..4b83777 --- /dev/null +++ b/db/newsgroup.sql @@ -0,0 +1,11 @@ +begin transaction; + +create table newsgroup_message ( + id INTEGER PRIMARY KEY NOT NULL, + posted_on DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + sender TEXT NOT NULL, + subject TEXT NOT NULL, + content TEXT NOT NULL +); + +commit; diff --git a/lib/nntp/tiny/db.py b/lib/nntp/tiny/db.py new file mode 100644 index 0000000..5784e3b --- /dev/null +++ b/lib/nntp/tiny/db.py @@ -0,0 +1,26 @@ +import sqlite3 + +from nntp.tiny.message import Message + +class Database(): + __slots__ = 'db', + + def __init__(self, path: str): + self.db = sqlite3.connect(path) + + def message_add(self, message: Message): + sql = """ + insert into + newsgroup_message + (posted_on, sender, subject, content) values (?, ?, ?, ?) + """ + + self.db.execute(sql, ( + message.date().isoformat(), + message.sender(), + message.subject(), + message.content + )) + + def commit(self): + self.db.commit() diff --git a/lib/nntp/tiny/mbox.py b/lib/nntp/tiny/mbox.py index 3dd66c0..ed8a0e1 100644 --- a/lib/nntp/tiny/mbox.py +++ b/lib/nntp/tiny/mbox.py @@ -1,9 +1,6 @@ import re -import enum -import datetime -from dateparser.search import search_dates -from email.header import decode_header +from nntp.tiny.message import Message class MBoxReaderError(Exception): pass @@ -45,7 +42,7 @@ class MBoxReaderBuffer(): return self.lines[line][0:5] == 'From ' def is_header_line(self, line): - return re.match('^([A-Za-z0-9\\-]+): (.*)$', self.lines[line]) is not None + return re.match(Message.HEADER_REGEX, self.lines[line]) is not None def is_start(self): if self.lines[0] is None or self.lines[1] is None: @@ -59,106 +56,6 @@ class MBoxReaderBuffer(): and self.is_header_line(3): return 3 -class MBoxMessageState(enum.Enum): - EMPTY = 0 - HEADER = 1 - BODY = 2 - -def decode(text: str): - decoded = decode_header(text)[0] - - if decoded[1] is None: - return str(decoded[0]) - - try: - return str(decoded[0], decoded[1]) - except: - return str(decoded[0]) - -class MBoxMessage(): - __slots__ = 'state', 'headers', 'line', 'content', 'body', 'key', - - def __init__(self): - self.state = MBoxMessageState.EMPTY - self.headers = dict() - self.line = None - self.content = '' - self.body = None - self.key = None - - def add(self, line: str): - if self.line is not None: - self.content += self.line - - if self.state is MBoxMessageState.EMPTY: - self.state = MBoxMessageState.HEADER - - if self.state is MBoxMessageState.HEADER: - if line == '\n' or line == '\r\n': - self.state = MBoxMessageState.BODY - elif line[0] == ' ' or line[0] == '\t': - self.headers[self.key] += ' ' + decode(line.strip()) - else: - match = re.match('^([^:]+): (.*)$', line) - - if match: - self.key = match[1].lower() - - self.headers[self.key] = decode(match[2].rstrip()) - elif self.state is MBoxMessageState.BODY: - if self.body is None: - self.body = '' - else: - self.body += self.line - - self.line = line - - def header(self, key: str): - return self.headers.get(key.lower()) - - def date(self): - try: - return search_dates(self.headers['date'])[0][1] - except: - return datetime.datetime.fromtimestamp(0) - - def sender(self): - return self.headers.get('from', 'Unknown') - - def subject(self): - return self.headers.get('subject', '(no subject)') - - def is_first_line(self): - return len(self.headers) == 1 and (self.body == '' or self.body is None) - - @staticmethod - def each_line(text: str): - start = 0 - end = len(text) - - while True: - try: - index = text.index('\n', start, end) - - yield text[start:index+1] - - start = index + 1 - - if start == end: - break - except ValueError: - yield text[start:end] - break - - @staticmethod - def parse(text: str): - message = MBoxMessage() - - for line in MBoxMessage.each_line(text): - message.add(line) - - return message - class MBoxReader(): __slots__ = 'path', 'fh', 'line', 'buf', 'message', @@ -186,11 +83,11 @@ class MBoxReader(): if self.buf.is_start(): if self.message is None: - self.message = MBoxMessage() + self.message = Message() else: ret = self.message - self.message = MBoxMessage() + self.message = Message() self.message.add(line) return ret diff --git a/lib/nntp/tiny/message.py b/lib/nntp/tiny/message.py new file mode 100644 index 0000000..67e4936 --- /dev/null +++ b/lib/nntp/tiny/message.py @@ -0,0 +1,108 @@ +import re +import enum +import datetime + +from dateparser.search import search_dates +from email.header import decode_header + +def decode(text: str): + decoded = decode_header(text)[0] + + if decoded[1] is None: + return str(decoded[0]) + + try: + return str(decoded[0], decoded[1]) + except: + return str(decoded[0]) + +class MessageState(enum.Enum): + EMPTY = 0 + HEADER = 1 + BODY = 2 + +class Message(): + __slots__ = 'state', 'headers', 'line', 'content', 'body', 'key', + + HEADER_REGEX = '^([A-Za-z0-9\\-]+): (.*)$' + + def __init__(self): + self.state = MessageState.EMPTY + self.headers = dict() + self.line = None + self.content = '' + self.body = None + self.key = None + + def add(self, line: str): + if self.line is not None: + self.content += self.line + + if self.state is MessageState.EMPTY: + self.state = MessageState.HEADER + + if self.state is MessageState.HEADER: + if line == '\n' or line == '\r\n': + self.state = MessageState.BODY + elif line[0] == ' ' or line[0] == '\t': + self.headers[self.key] += ' ' + decode(line.strip()) + else: + match = re.match(self.HEADER_REGEX, line) + + if match: + self.key = match[1].lower() + + self.headers[self.key] = decode(match[2].rstrip()) + elif self.state is MessageState.BODY: + if self.body is None: + self.body = '' + else: + self.body += self.line + + self.line = line + + def header(self, key: str): + return self.headers.get(key.lower()) + + def date(self): + try: + return search_dates(self.headers['date'])[0][1] + except: + return datetime.datetime.fromtimestamp(0) + + def sender(self): + return self.headers.get('from', 'Unknown') + + def subject(self): + return self.headers.get('subject', '(no subject)') + + def is_first_line(self): + return len(self.headers) == 1 and (self.body == '' or self.body is None) + + @staticmethod + def each_line(text: str): + start = 0 + end = len(text) + + while True: + try: + index = text.index('\n', start, end) + + yield text[start:index+1] + + start = index + 1 + + if start == end: + break + except ValueError: + yield text[start:end] + break + + @staticmethod + def parse(text: str): + message = MBoxMessage() + + for line in MBoxMessage.each_line(text): + message.add(line) + + return message