diff --git a/lib/nntp/tiny/message.py b/lib/nntp/tiny/message.py index c0f9673..933eb8e 100644 --- a/lib/nntp/tiny/message.py +++ b/lib/nntp/tiny/message.py @@ -2,8 +2,7 @@ import re import enum import datetime -from dateparser.search import search_dates -from email.header import decode_header +from email.header import decode_header from nntp.tiny.db import DatabaseTable @@ -42,6 +41,61 @@ def each_line(text: str): yield text[start:end] break +DATE_MONTHS = { + 'jan': 1, 'january': 1, + 'feb': 2, 'february': 2, + 'mar': 3, 'march': 3, + 'apr': 4, 'april': 4, + 'may': 5, + 'jun': 6, 'june': 6, + 'jul': 7, 'july': 7, + 'aug': 8, 'august': 8, + 'sep': 9, 'september': 9, + 'oct': 10, 'october': 10, + 'nov': 11, 'november': 11, + 'dec': 12, 'december': 12 +} + +DATE_RE = [ + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}):(?P\d{2}) (?P[+\-])(?P\d{2})(?P\d{2})$'), + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}):(?P\d{2}) (?P[+\-])(?P\d{2})(?P\d{2}) \((?:[A-Za-z ]+)\)$'), + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}):(?P\d{2}) (?:GMT|UTC)$'), + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}) (?P[+\-])(?P\d{2})(?P\d{2})$'), + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}) (?P[+\-])(?P\d{2})(?P\d{2}) \((?:[A-Z ]+)\)$'), + re.compile(r'^(?:[A-Za-z]+),\s+(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}) (?:GMT|UTC)$'), + re.compile(r'^(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}):(?P\d{2}) (?P[+\-])(?P\d{2})(?P\d{2})$'), + re.compile(r'^(?P
\d{1,2}) (?P[A-Za-z]+) (?P\d{4}) (?P\d{2}):(?P\d{2}):(?P\d{2}) (?:GMT|UTC)$'), +] + +def parse_timestamp(timestamp: str): + if timestamp is None or timestamp == '': + return datetime.datetime.fromtimestamp(0) + + for re in DATE_RE: + match = re.match(timestamp) + + if match is not None: + capture = match.groupdict() + mult = -1 if capture.get('offset_sign', '+') == '-' else 1 + tz = datetime.timezone(datetime.timedelta( + hours = mult * int(capture.get('offset_hour', 0)), + minutes = mult * int(capture.get('offset_minute', 0)) + )) + + yyyy = int(capture['yyyy']) + + if 'month' in capture: + mm = DATE_MONTHS[capture['month'].lower()] + else: + mm = int(capture['mm']) + + dd = int(capture['dd']) + hh = int(capture.get('hh', 0)) + MM = int(capture.get('MM', 0)) + ss = int(capture.get('ss', 0)) + + return datetime.datetime(yyyy, mm, dd, hh, MM, ss, 0, tz) + class MessageState(enum.Enum): EMPTY = 0 HEADER = 1 @@ -147,7 +201,7 @@ class Message(DatabaseTable): value = self.header('Date') if value is not None: - ret = search_dates(value)[0][1] + ret = parse_timestamp(value) self._cache['created_on'] = str(ret) except: