Significantly improve message parsing performance
This commit is contained in:
parent
6038e09670
commit
8aa8ae4dec
1 changed files with 21 additions and 54 deletions
|
@ -4,6 +4,9 @@ import datetime
|
|||
import uuid
|
||||
|
||||
import email.message
|
||||
import email.policy
|
||||
import email.parser
|
||||
import email.errors
|
||||
|
||||
from email.utils import parsedate_to_datetime
|
||||
from email.header import decode_header, Header
|
||||
|
@ -117,14 +120,10 @@ class MessageRange():
|
|||
obj.max = int(match[1])
|
||||
return obj
|
||||
|
||||
class MessageState(enum.Enum):
|
||||
EMPTY = 0
|
||||
HEADER = 1
|
||||
BODY = 2
|
||||
|
||||
class Message(DatabaseTable):
|
||||
__slots__ = (
|
||||
'_cache',
|
||||
'_parser',
|
||||
'_headers',
|
||||
'_headers_lc',
|
||||
'_body',
|
||||
|
@ -153,12 +152,12 @@ class Message(DatabaseTable):
|
|||
super().__init__()
|
||||
|
||||
self._cache = dict()
|
||||
self._parser = None
|
||||
self._headers = None
|
||||
self._headers_lc = None
|
||||
self._body = None
|
||||
self._key = None
|
||||
self.id = None
|
||||
self.state = MessageState.EMPTY
|
||||
self.line = None
|
||||
self.content = ''
|
||||
|
||||
|
@ -304,49 +303,16 @@ class Message(DatabaseTable):
|
|||
elif value is not None:
|
||||
self._header_set('Subject', Header(value).encode())
|
||||
|
||||
def is_first_line(self):
|
||||
return len(self.headers) == 1 and (self._body == '' or self._body is None)
|
||||
|
||||
def readline(self, line: str):
|
||||
if self.line is not None:
|
||||
self.content += self.line
|
||||
|
||||
if self.state is MessageState.EMPTY:
|
||||
self.state = MessageState.HEADER
|
||||
self._headers = dict()
|
||||
self._headers_lc = dict()
|
||||
|
||||
if self.state is MessageState.HEADER:
|
||||
if line == '\n' or line == '\r\n':
|
||||
self.state = MessageState.BODY
|
||||
elif line[0] == ' ' or line[0] == '\t':
|
||||
self._header_append(self._key, ' ' + line.strip())
|
||||
if self._parser is None:
|
||||
self.content = line
|
||||
self._parser = email.parser.FeedParser()
|
||||
else:
|
||||
match = self.RE_HEADER.match(line)
|
||||
self.content += line
|
||||
|
||||
if match:
|
||||
self._key = match[1]
|
||||
|
||||
self._header_append(self._key, match[2].rstrip())
|
||||
|
||||
elif self.state is MessageState.BODY:
|
||||
if self._body is None:
|
||||
self._body = ''
|
||||
else:
|
||||
self._body += self.line
|
||||
|
||||
self.line = line
|
||||
|
||||
def finish(self):
|
||||
if self.line:
|
||||
self.content += self.line
|
||||
self._body += self.line
|
||||
|
||||
self.line = None
|
||||
|
||||
def read(self, text: str):
|
||||
obj = email.message_from_string(text)
|
||||
self._parser.feed(line)
|
||||
|
||||
def read_email_obj(self, obj: email.message.Message):
|
||||
self._headers = dict()
|
||||
self._headers_lc = dict()
|
||||
|
||||
|
@ -354,7 +320,14 @@ class Message(DatabaseTable):
|
|||
self._header_set(key, obj.get(key))
|
||||
|
||||
self._body = obj.get_payload()
|
||||
self.line = None
|
||||
|
||||
def finish(self):
|
||||
if self._parser:
|
||||
self.read_email_obj(self._parser.close())
|
||||
self._parser = None
|
||||
|
||||
def read(self, text: str):
|
||||
self.read_email_obj(email.message_from_string(text))
|
||||
|
||||
def message_id_assign(self):
|
||||
sender = self.sender
|
||||
|
@ -401,12 +374,6 @@ class Message(DatabaseTable):
|
|||
@staticmethod
|
||||
def from_text(text: str):
|
||||
message = Message()
|
||||
|
||||
obj = email.message_from_string(text)
|
||||
|
||||
for key in obj.keys():
|
||||
message._header_set(key, obj.get(key))
|
||||
|
||||
message._body = obj.get_payload()
|
||||
message.read(text)
|
||||
|
||||
return message
|
||||
|
|
Loading…
Add table
Reference in a new issue