Massive email parsing performance boost

Massive email parsing performance boost by simply using Python's
own native email.message parser
This commit is contained in:
XANTRONIX Development 2025-01-05 00:16:02 -05:00
parent 467dd77de3
commit 00b0347fe2

View file

@ -3,6 +3,8 @@ import enum
import datetime import datetime
import uuid import uuid
import email.message
from email.utils import parsedate_to_datetime from email.utils import parsedate_to_datetime
from email.header import decode_header, Header from email.header import decode_header, Header
@ -343,10 +345,16 @@ class Message(DatabaseTable):
self.line = None self.line = None
def read(self, text: str): def read(self, text: str):
for line in each_line(text): obj = email.message_from_string(text)
self.readline(line)
self.finish() self._headers = dict()
self._headers_lc = dict()
for key in obj.keys():
self._header_set(key, obj.get(key))
self._body = obj.get_payload()
self.line = None
def message_id_assign(self): def message_id_assign(self):
sender = self.sender sender = self.sender
@ -394,7 +402,11 @@ class Message(DatabaseTable):
def from_text(text: str): def from_text(text: str):
message = Message() message = Message()
for line in each_line(text): obj = email.message_from_string(text)
message.readline(line)
for key in obj.keys():
message._header_set(key, obj.get(key))
message._body = obj.get_payload()
return message return message