Commit stuff

This commit is contained in:
XANTRONIX Development 2024-11-08 15:47:21 -05:00
parent d19c02228e
commit 5e42789043
4 changed files with 149 additions and 107 deletions

11
db/newsgroup.sql Normal file
View file

@ -0,0 +1,11 @@
begin transaction;
create table newsgroup_message (
id INTEGER PRIMARY KEY NOT NULL,
posted_on DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
sender TEXT NOT NULL,
subject TEXT NOT NULL,
content TEXT NOT NULL
);
commit;

26
lib/nntp/tiny/db.py Normal file
View file

@ -0,0 +1,26 @@
import sqlite3
from nntp.tiny.message import Message
class Database():
__slots__ = 'db',
def __init__(self, path: str):
self.db = sqlite3.connect(path)
def message_add(self, message: Message):
sql = """
insert into
newsgroup_message
(posted_on, sender, subject, content) values (?, ?, ?, ?)
"""
self.db.execute(sql, (
message.date().isoformat(),
message.sender(),
message.subject(),
message.content
))
def commit(self):
self.db.commit()

View file

@ -1,9 +1,6 @@
import re
import enum
import datetime
from dateparser.search import search_dates
from email.header import decode_header
from nntp.tiny.message import Message
class MBoxReaderError(Exception):
pass
@ -45,7 +42,7 @@ class MBoxReaderBuffer():
return self.lines[line][0:5] == 'From '
def is_header_line(self, line):
return re.match('^([A-Za-z0-9\\-]+): (.*)$', self.lines[line]) is not None
return re.match(Message.HEADER_REGEX, self.lines[line]) is not None
def is_start(self):
if self.lines[0] is None or self.lines[1] is None:
@ -59,106 +56,6 @@ class MBoxReaderBuffer():
and self.is_header_line(3):
return 3
class MBoxMessageState(enum.Enum):
EMPTY = 0
HEADER = 1
BODY = 2
def decode(text: str):
decoded = decode_header(text)[0]
if decoded[1] is None:
return str(decoded[0])
try:
return str(decoded[0], decoded[1])
except:
return str(decoded[0])
class MBoxMessage():
__slots__ = 'state', 'headers', 'line', 'content', 'body', 'key',
def __init__(self):
self.state = MBoxMessageState.EMPTY
self.headers = dict()
self.line = None
self.content = ''
self.body = None
self.key = None
def add(self, line: str):
if self.line is not None:
self.content += self.line
if self.state is MBoxMessageState.EMPTY:
self.state = MBoxMessageState.HEADER
if self.state is MBoxMessageState.HEADER:
if line == '\n' or line == '\r\n':
self.state = MBoxMessageState.BODY
elif line[0] == ' ' or line[0] == '\t':
self.headers[self.key] += ' ' + decode(line.strip())
else:
match = re.match('^([^:]+): (.*)$', line)
if match:
self.key = match[1].lower()
self.headers[self.key] = decode(match[2].rstrip())
elif self.state is MBoxMessageState.BODY:
if self.body is None:
self.body = ''
else:
self.body += self.line
self.line = line
def header(self, key: str):
return self.headers.get(key.lower())
def date(self):
try:
return search_dates(self.headers['date'])[0][1]
except:
return datetime.datetime.fromtimestamp(0)
def sender(self):
return self.headers.get('from', 'Unknown')
def subject(self):
return self.headers.get('subject', '(no subject)')
def is_first_line(self):
return len(self.headers) == 1 and (self.body == '' or self.body is None)
@staticmethod
def each_line(text: str):
start = 0
end = len(text)
while True:
try:
index = text.index('\n', start, end)
yield text[start:index+1]
start = index + 1
if start == end:
break
except ValueError:
yield text[start:end]
break
@staticmethod
def parse(text: str):
message = MBoxMessage()
for line in MBoxMessage.each_line(text):
message.add(line)
return message
class MBoxReader():
__slots__ = 'path', 'fh', 'line', 'buf', 'message',
@ -186,11 +83,11 @@ class MBoxReader():
if self.buf.is_start():
if self.message is None:
self.message = MBoxMessage()
self.message = Message()
else:
ret = self.message
self.message = MBoxMessage()
self.message = Message()
self.message.add(line)
return ret

108
lib/nntp/tiny/message.py Normal file
View file

@ -0,0 +1,108 @@
import re
import enum
import datetime
from dateparser.search import search_dates
from email.header import decode_header
def decode(text: str):
decoded = decode_header(text)[0]
if decoded[1] is None:
return str(decoded[0])
try:
return str(decoded[0], decoded[1])
except:
return str(decoded[0])
class MessageState(enum.Enum):
EMPTY = 0
HEADER = 1
BODY = 2
class Message():
__slots__ = 'state', 'headers', 'line', 'content', 'body', 'key',
HEADER_REGEX = '^([A-Za-z0-9\\-]+): (.*)$'
def __init__(self):
self.state = MessageState.EMPTY
self.headers = dict()
self.line = None
self.content = ''
self.body = None
self.key = None
def add(self, line: str):
if self.line is not None:
self.content += self.line
if self.state is MessageState.EMPTY:
self.state = MessageState.HEADER
if self.state is MessageState.HEADER:
if line == '\n' or line == '\r\n':
self.state = MessageState.BODY
elif line[0] == ' ' or line[0] == '\t':
self.headers[self.key] += ' ' + decode(line.strip())
else:
match = re.match(self.HEADER_REGEX, line)
if match:
self.key = match[1].lower()
self.headers[self.key] = decode(match[2].rstrip())
elif self.state is MessageState.BODY:
if self.body is None:
self.body = ''
else:
self.body += self.line
self.line = line
def header(self, key: str):
return self.headers.get(key.lower())
def date(self):
try:
return search_dates(self.headers['date'])[0][1]
except:
return datetime.datetime.fromtimestamp(0)
def sender(self):
return self.headers.get('from', 'Unknown')
def subject(self):
return self.headers.get('subject', '(no subject)')
def is_first_line(self):
return len(self.headers) == 1 and (self.body == '' or self.body is None)
@staticmethod
def each_line(text: str):
start = 0
end = len(text)
while True:
try:
index = text.index('\n', start, end)
yield text[start:index+1]
start = index + 1
if start == end:
break
except ValueError:
yield text[start:end]
break
@staticmethod
def parse(text: str):
message = MBoxMessage()
for line in MBoxMessage.each_line(text):
message.add(line)
return message