Massive email parsing performance boost
Massive email parsing performance boost by simply using Python's own native email.message parser
This commit is contained in:
parent
467dd77de3
commit
00b0347fe2
1 changed files with 17 additions and 5 deletions
|
@ -3,6 +3,8 @@ import enum
|
||||||
import datetime
|
import datetime
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
import email.message
|
||||||
|
|
||||||
from email.utils import parsedate_to_datetime
|
from email.utils import parsedate_to_datetime
|
||||||
from email.header import decode_header, Header
|
from email.header import decode_header, Header
|
||||||
|
|
||||||
|
@ -343,10 +345,16 @@ class Message(DatabaseTable):
|
||||||
self.line = None
|
self.line = None
|
||||||
|
|
||||||
def read(self, text: str):
|
def read(self, text: str):
|
||||||
for line in each_line(text):
|
obj = email.message_from_string(text)
|
||||||
self.readline(line)
|
|
||||||
|
|
||||||
self.finish()
|
self._headers = dict()
|
||||||
|
self._headers_lc = dict()
|
||||||
|
|
||||||
|
for key in obj.keys():
|
||||||
|
self._header_set(key, obj.get(key))
|
||||||
|
|
||||||
|
self._body = obj.get_payload()
|
||||||
|
self.line = None
|
||||||
|
|
||||||
def message_id_assign(self):
|
def message_id_assign(self):
|
||||||
sender = self.sender
|
sender = self.sender
|
||||||
|
@ -394,7 +402,11 @@ class Message(DatabaseTable):
|
||||||
def from_text(text: str):
|
def from_text(text: str):
|
||||||
message = Message()
|
message = Message()
|
||||||
|
|
||||||
for line in each_line(text):
|
obj = email.message_from_string(text)
|
||||||
message.readline(line)
|
|
||||||
|
for key in obj.keys():
|
||||||
|
message._header_set(key, obj.get(key))
|
||||||
|
|
||||||
|
message._body = obj.get_payload()
|
||||||
|
|
||||||
return message
|
return message
|
||||||
|
|
Loading…
Add table
Reference in a new issue