Massive email parsing performance boost

Massive email parsing performance boost by simply using Python's
own native email.message parser
This commit is contained in:
XANTRONIX Development 2025-01-05 00:16:02 -05:00
parent 467dd77de3
commit 00b0347fe2

View file

@ -3,6 +3,8 @@ import enum
import datetime
import uuid
import email.message
from email.utils import parsedate_to_datetime
from email.header import decode_header, Header
@ -343,10 +345,16 @@ class Message(DatabaseTable):
self.line = None
def read(self, text: str):
for line in each_line(text):
self.readline(line)
obj = email.message_from_string(text)
self.finish()
self._headers = dict()
self._headers_lc = dict()
for key in obj.keys():
self._header_set(key, obj.get(key))
self._body = obj.get_payload()
self.line = None
def message_id_assign(self):
sender = self.sender
@ -394,7 +402,11 @@ class Message(DatabaseTable):
def from_text(text: str):
message = Message()
for line in each_line(text):
message.readline(line)
obj = email.message_from_string(text)
for key in obj.keys():
message._header_set(key, obj.get(key))
message._body = obj.get_payload()
return message