xenu_nntp/lib/nntp/tiny/message.py

402 lines
10 KiB
Python
Raw Normal View History

2024-11-08 15:47:21 -05:00
import re
import enum
import datetime
2024-11-30 17:38:54 -05:00
import uuid
2024-11-08 15:47:21 -05:00
2024-11-11 15:00:19 -05:00
from email.utils import parsedate_to_datetime
2024-11-11 17:20:53 -05:00
from email.header import decode_header, Header
2024-11-08 15:47:21 -05:00
2024-11-08 23:11:09 -05:00
from nntp.tiny.db import DatabaseTable
2024-11-08 15:47:21 -05:00
def decode(text: str):
decoded = decode_header(text)[0]
if decoded[0] == b'':
return ''
if decoded[1] is None:
return text
2024-11-08 15:47:21 -05:00
try:
return str(decoded[0], decoded[1])
except:
return text
2024-11-08 15:47:21 -05:00
2024-11-09 22:25:25 -05:00
def each_line(text: str):
start = 0
end = len(text)
while True:
index = text.find('\n', start, end)
2024-11-09 22:25:25 -05:00
if index < 0:
yield text[start:end]
break
2024-11-09 22:25:25 -05:00
yield text[start:index+1]
2024-11-09 22:25:25 -05:00
start = index + 1
if start == end:
2024-11-09 22:25:25 -05:00
break
2024-11-11 12:37:48 -05:00
def parse_timestamp(timestamp: str):
if timestamp is None or timestamp == '':
return datetime.datetime.fromtimestamp(0)
2024-11-11 15:00:19 -05:00
return parsedate_to_datetime(timestamp)
2024-11-11 12:37:48 -05:00
2024-11-27 19:29:54 -05:00
class MessagePart(enum.Enum):
HEAD = 1
BODY = enum.auto()
WHOLE = enum.auto()
2024-11-27 19:28:28 -05:00
class MessageRange():
__slots__ = 'id', 'min', 'max',
RE_NUM = re.compile(r'^(\d+)$')
RE_RANGE = re.compile(r'^(\d+)-(\d+)$')
RE_RANGE_LOWER = re.compile(r'^(\d+$)-$')
RE_RANGE_UPPER = re.compile(r'^-(\d+$)$')
def __init__(self):
self.id: int = None
self.min: int = None
self.max: int = None
def __str__(self):
if self.id is not None:
return str(self.id)
if self.min is not None and self.max is None:
return "%d-" % (self.min)
elif self.min is not None and self.max is not None:
return "%d-%d" % (self.min, self.max)
elif self.min is None and self.max is not None:
return "-%d" % (self.max)
return "?"
def where(self, table=None):
prefix = '' if table is None else f"{table}."
2024-11-30 20:30:46 -05:00
column = prefix + 'id'
2024-11-27 19:28:28 -05:00
if self.id is not None:
2024-11-30 20:30:46 -05:00
return "%s = %d" % (column, self.id)
2024-11-27 19:28:28 -05:00
if self.min is not None and self.max is None:
2024-11-30 20:30:46 -05:00
return "%s >= %d" % (column, self.min)
2024-11-27 19:28:28 -05:00
elif self.min is not None and self.max is not None:
2024-11-30 20:30:46 -05:00
return "%s >= %d and %s <= %d" % (column, self.min, column, self.max)
2024-11-27 19:28:28 -05:00
elif self.min is None and self.max is not None:
2024-11-30 20:30:46 -05:00
return "%s <= %d" % (column, self.max)
2024-11-27 19:28:28 -05:00
@staticmethod
def parse(r: str):
match = __class__.RE_NUM.match(r)
if match:
obj = __class__()
obj.id = int(match[1])
return obj
match = __class__.RE_RANGE.match(r)
if match:
obj = __class__()
obj.min = int(match[1])
obj.max = int(match[2])
return obj
match = __class__.RE_RANGE_LOWER.match(r)
if match:
obj = __class__()
obj.min = int(match[1])
return obj
match = __class__.RE_RANGE_UPPER.match(r)
if match:
obj = __class__()
obj.max = int(match[1])
return obj
2024-11-08 15:47:21 -05:00
class MessageState(enum.Enum):
EMPTY = 0
HEADER = 1
BODY = 2
2024-11-08 23:11:09 -05:00
class Message(DatabaseTable):
__slots__ = (
'_cache',
'_headers',
2024-11-11 17:20:53 -05:00
'_headers_lc',
'_body',
'_key',
'id',
'state',
'line',
'content',
)
2024-11-08 23:11:09 -05:00
name = 'message'
2024-11-08 23:11:09 -05:00
key = 'id'
columns = (
'id',
2024-11-08 23:11:09 -05:00
'created_on',
'message_id',
2024-11-28 07:59:06 -05:00
'reference_ids',
2024-11-08 23:11:09 -05:00
'sender',
'subject',
'content'
)
2024-11-08 15:47:21 -05:00
2024-11-09 21:43:09 -05:00
RE_HEADER = re.compile(r'^([A-Za-z0-9\-]+): (.*)$')
2024-11-08 15:47:21 -05:00
def __init__(self):
self._cache = dict()
self._headers = None
self._headers_lc = None
self._body = None
self._key = None
self.id = None
self.state = MessageState.EMPTY
self.line = None
self.content = ''
2024-11-08 23:11:09 -05:00
@staticmethod
def __from_row__(row):
message = Message()
#
# Defer parsing the message content until a specific header not already
# assigned to a dedcicated property, or the message body, is required.
#
message.content = row['content']
2024-11-28 07:59:06 -05:00
message.id = row['id']
message.created_on = row['created_on']
message.message_id = row['message_id']
message.reference_ids = row['reference_ids']
message.sender = row['sender']
message.subject = row['subject']
return message
2024-11-10 02:19:08 -05:00
def __values__(self) -> tuple:
2024-11-08 23:11:09 -05:00
return (
self.created_on,
self.message_id,
2024-11-28 07:59:06 -05:00
self.reference_ids,
self.sender,
self.subject,
2024-11-08 23:11:09 -05:00
self.content
)
2024-11-08 15:47:21 -05:00
@property
def headers(self):
if self._headers is None:
self.read(self.content)
return self._headers
@property
def body(self):
if self._body is None:
self.read(self.content)
return self._body
2024-11-11 17:20:53 -05:00
def _header_set(self, key: str, value: str):
self._headers[key] = value
self._headers_lc[key.lower()] = value
def _header_append(self, key: str, value: str):
if key not in self._headers:
self._headers[key] = value
self._headers_lc[key.lower()] = value
else:
self._headers[key] += value
self._headers_lc[key.lower()] += value
def header(self, key: str, default=None):
2024-11-11 01:04:20 -05:00
if self._headers is None:
self.read(self.content)
2024-11-11 17:20:53 -05:00
value = self._headers_lc.get(key.lower(), default)
if value is not None:
return decode(value)
2024-11-08 15:47:21 -05:00
@property
def created_on(self):
2024-11-11 13:24:27 -05:00
value = self._cache.get('created_on')
2024-11-11 13:24:27 -05:00
if value is not None:
return datetime.datetime.fromisoformat(value)
timestamp = self.header('Date')
2024-11-11 13:24:27 -05:00
ret = parse_timestamp(timestamp)
2024-11-11 13:24:27 -05:00
self._cache['created_on'] = str(ret)
return ret
@created_on.setter
def created_on(self, value):
2024-11-11 17:20:53 -05:00
if self._headers is None:
self._cache['created_on'] = str(value)
elif value is not None:
self._header_set('Date', Header(str(value)).encode())
@property
def message_id(self) -> str:
2024-11-11 01:04:20 -05:00
if self._headers is None:
return self._cache.get('message_id')
2024-11-09 21:43:09 -05:00
return self.header('Message-ID')
2024-11-08 20:31:24 -05:00
@message_id.setter
def message_id(self, value):
2024-11-11 01:04:20 -05:00
if self._headers is None:
self._cache['message_id'] = value
2024-11-11 17:20:53 -05:00
elif value is not None:
self._header_set('Message-ID', Header(value).encode())
@property
2024-11-28 07:59:06 -05:00
def reference_ids(self) -> str:
2024-11-11 01:04:20 -05:00
if self._headers is None:
2024-11-28 07:59:06 -05:00
return self._cache.get('reference_ids')
2024-11-11 01:04:20 -05:00
return self.header('References')
2024-11-08 20:31:24 -05:00
2024-11-28 07:59:06 -05:00
@reference_ids.setter
def reference_ids(self, value):
2024-11-11 01:04:20 -05:00
if self._headers is None:
2024-11-28 07:59:06 -05:00
self._cache['reference_ids'] = value
2024-11-11 17:20:53 -05:00
elif value is not None:
self._header_set('References', Header(value).encode())
2024-11-08 15:47:21 -05:00
@property
def sender(self) -> str:
2024-11-11 01:04:20 -05:00
if self._headers is None:
return self._cache.get('sender')
2024-11-11 17:20:53 -05:00
return self.header('From', 'Unknown')
2024-11-08 15:47:21 -05:00
@sender.setter
def sender(self, value):
2024-11-11 01:04:20 -05:00
if self._headers is None:
self._cache['sender'] = value
2024-11-11 17:20:53 -05:00
elif value is not None:
self._header_set('From', Header(value).encode())
@property
def subject(self) -> str:
2024-11-11 01:04:20 -05:00
if self._headers is None:
return self._cache.get('subject', '(no subject)')
2024-11-11 17:20:53 -05:00
return self.header('subject', '(no subject)')
2024-11-08 15:47:21 -05:00
@subject.setter
def subject(self, value):
2024-11-11 01:04:20 -05:00
if self._headers is None:
self._cache['subject'] = value
2024-11-11 17:20:53 -05:00
elif value is not None:
self._header_set('Subject', Header(value).encode())
2024-11-08 15:47:21 -05:00
def is_first_line(self):
return len(self.headers) == 1 and (self._body == '' or self._body is None)
2024-11-08 15:47:21 -05:00
def readline(self, line: str):
if self.line is not None:
self.content += self.line
2024-11-08 15:47:21 -05:00
if self.state is MessageState.EMPTY:
2024-11-11 17:20:53 -05:00
self.state = MessageState.HEADER
self._headers = dict()
self._headers_lc = dict()
2024-11-08 15:47:21 -05:00
if self.state is MessageState.HEADER:
if line == '\n' or line == '\r\n':
self.state = MessageState.BODY
elif line[0] == ' ' or line[0] == '\t':
2024-11-11 17:20:53 -05:00
self._header_append(self._key, ' ' + line.strip())
else:
match = self.RE_HEADER.match(line)
2024-11-08 15:47:21 -05:00
if match:
2024-11-11 17:20:53 -05:00
self._key = match[1]
self._header_append(self._key, match[2].rstrip())
2024-11-08 15:47:21 -05:00
elif self.state is MessageState.BODY:
if self._body is None:
self._body = ''
else:
self._body += self.line
self.line = line
2024-11-08 15:47:21 -05:00
2024-11-30 21:32:58 -05:00
def finish(self):
if self.line:
self.content += self.line
self._body += self.line
2024-12-01 23:20:29 -05:00
self.line = None
2024-11-10 02:19:08 -05:00
def read(self, text: str):
for line in each_line(text):
self.readline(line)
2024-11-10 02:19:08 -05:00
2024-11-30 21:32:58 -05:00
self.finish()
2024-11-30 17:38:54 -05:00
def message_id_assign(self):
sender = self.sender
if sender is None:
return
current = self.message_id
if current is None:
parts = sender.split('@', 2)
remote = 'unknown.host' if len(parts) == 0 else parts[1]
2024-11-30 17:38:54 -05:00
self.message_id = '<%s@%s>' % (
str(uuid.uuid4()),
remote
)
def validate(self):
if self.created_on is None:
return False
if self.sender is None:
return False
message_id = self.message_id
if message_id is None:
return False
elif message_id[0] != '<' or message_id[-1] != '>':
return False
if self.subject is None:
return False
if self.header('Newsgroups') is None:
return False
if self.header('Path') is None:
return False
return True
2024-11-08 15:47:21 -05:00
@staticmethod
2024-11-10 02:19:08 -05:00
def from_text(text: str):
2024-11-09 13:32:38 -05:00
message = Message()
2024-11-08 15:47:21 -05:00
2024-11-09 22:25:25 -05:00
for line in each_line(text):
message.readline(line)
2024-11-08 15:47:21 -05:00
return message