From 54e5e7d0e4c9753b448c332d633643b0c4738241 Mon Sep 17 00:00:00 2001 From: XANTRONIX Development Date: Thu, 7 Nov 2024 17:30:13 -0500 Subject: [PATCH] Implement MIME header decoding --- lib/nntp/tiny/mbox.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/nntp/tiny/mbox.py b/lib/nntp/tiny/mbox.py index d94e495..851512a 100644 --- a/lib/nntp/tiny/mbox.py +++ b/lib/nntp/tiny/mbox.py @@ -2,6 +2,7 @@ import re import enum from dateparser.search import search_dates +from email.header import decode_header class MBoxReaderError(Exception): pass @@ -62,6 +63,14 @@ class MBoxMessageState(enum.Enum): HEADER = 1 BODY = 2 +def decode(text: str): + decoded = decode_header(text)[0] + + if decoded[1] is None: + return decoded[0] + + return str(decoded[0], decoded[1]) + class MBoxMessage(): __slots__ = 'state', 'headers', 'line', 'content', 'body', 'key', @@ -84,14 +93,14 @@ class MBoxMessage(): if line == '\n' or line == '\r\n': self.state = MBoxMessageState.BODY elif line[0] == ' ' or line[0] == '\t': - self.headers[self.key] += ' ' + line.strip() + self.headers[self.key] += ' ' + decode(line.strip()) else: match = re.match('^([^:]+): (.*)$', line) if match: self.key = match[1].lower() - self.headers[self.key] = match[2].rstrip() + self.headers[self.key] = decode(match[2].rstrip()) elif self.state is MBoxMessageState.BODY: if self.body is None: self.body = ''