Implement MIME header decoding

This commit is contained in:
XANTRONIX Development 2024-11-07 17:30:13 -05:00
parent 23b854fbc4
commit 54e5e7d0e4

View file

@ -2,6 +2,7 @@ import re
import enum
from dateparser.search import search_dates
from email.header import decode_header
class MBoxReaderError(Exception):
pass
@ -62,6 +63,14 @@ class MBoxMessageState(enum.Enum):
HEADER = 1
BODY = 2
def decode(text: str):
decoded = decode_header(text)[0]
if decoded[1] is None:
return decoded[0]
return str(decoded[0], decoded[1])
class MBoxMessage():
__slots__ = 'state', 'headers', 'line', 'content', 'body', 'key',
@ -84,14 +93,14 @@ class MBoxMessage():
if line == '\n' or line == '\r\n':
self.state = MBoxMessageState.BODY
elif line[0] == ' ' or line[0] == '\t':
self.headers[self.key] += ' ' + line.strip()
self.headers[self.key] += ' ' + decode(line.strip())
else:
match = re.match('^([^:]+): (.*)$', line)
if match:
self.key = match[1].lower()
self.headers[self.key] = match[2].rstrip()
self.headers[self.key] = decode(match[2].rstrip())
elif self.state is MBoxMessageState.BODY:
if self.body is None:
self.body = ''