commit fdac039c2a4d15a449e06deb198a64c5bd5cb31d
Author: XANTRONIX Development <dev@xantronix.com>
Date:   Wed Nov 6 23:34:51 2024 -0500

    initial commit

diff --git a/ingest.py b/ingest.py
new file mode 100644
index 0000000..0fa271d
--- /dev/null
+++ b/ingest.py
@@ -0,0 +1,147 @@
+#! /usr/bin/env python3
+
+import sys
+import re
+import sqlite3
+
+class MBoxReaderError(Exception):
+    pass
+
+class MBoxReaderBuffer():
+    def __init__(self):
+        self.lines = [None, None, None, None]
+        self.index = None
+
+    def add(self, line: str):
+        if self.lines[0] is None:
+            self.lines[0] = line
+            self.index = 0
+        elif self.lines[1] is None:
+            self.lines[1] = line
+            self.index = 1
+        elif self.lines[2] is None:
+            self.lines[2] = line
+            self.index = 2
+        elif self.lines[3] is None:
+            self.lines[3] = line
+            self.index = 3
+        else:
+            self.lines[0] = self.lines[1]
+            self.lines[1] = self.lines[2]
+            self.lines[2] = self.lines[3]
+            self.lines[3] = line
+            self.index = 3
+
+        if self.index is None:
+            self.index = 0
+        elif self.index < 3:
+            self.index += 1
+
+    def is_empty_line(self, line):
+        return self.lines[line] == '\n'
+
+    def is_from_line(self, line):
+        return self.lines[line][0:5] == 'From '
+
+    def is_header_line(self, line):
+        return re.match('^([^:]+): (.*)$', self.lines[line]) is not None
+
+    def is_start(self):
+        if self.lines[0] is None or self.lines[1] is None:
+            return
+            
+        if self.is_from_line(0) and self.is_header_line(1):
+            pass
+        elif self.is_empty_line(0) \
+         and self.is_empty_line(1) \
+         and self.is_from_line(2) \
+         and self.is_header_line(3):
+            return 3
+
+class MBoxMessage():
+    __slots__ = 'headers', 'body', 'key',
+
+    def __init__(self):
+        self.headers = dict()
+        self.body    = None
+        self.key     = None
+
+    def add(self, line: str):
+        if self.body is None:
+            if line == '\n':
+                self.body = ''
+            elif line[0] == ' ' or line[0] == '\t':
+                self.headers[self.key] += ' ' + line.strip()
+            else:
+                match = re.match('^([^:]+): (.*)$', line)
+
+                if match:
+                    self.key = match[1]
+
+                    self.headers[self.key] = line.rstrip()
+        else:
+            self.body += line
+
+    def is_empty(self):
+        return len(self.headers) == 0 and self.body is None
+
+    def is_first_line(self):
+        return len(self.headers) == 1 and self.body is None
+
+class MBoxReader():
+    __slots__ = 'path', 'fh', 'line', 'buf', 'message',
+
+    def __init__(self, path: str):
+        self.path    = path
+        self.fh      = open(path, 'r', newline='')
+        self.line    = 0
+        self.buf     = MBoxReaderBuffer()
+        self.message = None
+
+    def get_message(self):
+        while True:
+            line = self.fh.readline()
+
+            if line is None:
+                ret = self.message
+
+                self.message = None
+
+                return ret
+
+            self.line += 1
+
+            self.buf.add(line)
+
+            if self.buf.is_start():
+                if self.message is None:
+                    self.message = MBoxMessage()
+                else:
+                    ret = self.message
+
+                    self.message = MBoxMessage()
+                    self.message.add(line)
+
+                    return ret
+
+            if self.message:
+                self.message.add(line)
+
+    def messages(self):
+        while True:
+            message = self.get_message()
+
+            if message is None:
+                break
+
+            yield message
+
+db     = sqlite3.connect(sys.argv[1])
+reader = MBoxReader(sys.argv[2])
+
+count = 0
+
+for message in reader.messages():
+    count += 1
+
+print(f"Found {count} messages")