From a8aa3ea771dc59683483d2a55b45c98b8341e3dd Mon Sep 17 00:00:00 2001
From: XANTRONIX Industrial <xan@xantronix.com>
Date: Fri, 28 Feb 2025 13:07:17 -0500
Subject: [PATCH] Initial commit of rawins.py

---
 lib/xmet/rawins.py | 104 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 lib/xmet/rawins.py

diff --git a/lib/xmet/rawins.py b/lib/xmet/rawins.py
new file mode 100644
index 0000000..954aa9c
--- /dev/null
+++ b/lib/xmet/rawins.py
@@ -0,0 +1,104 @@
+import io
+import re
+import datetime
+
+from xmet.util     import each_chunk
+from xmet.afos     import RE_ID, RE_ISSUANCE, RE_PRODUCT
+from xmet.sounding import Sounding, SoundingSample
+
+CHUNK_SEP         = "\x01"
+CHUNK_STRIP_CHARS = "\x01\x03\x0a\x20"
+
+class RawinsReaderException(Exception):
+    ...
+
+class RawinsChunk():
+    def __init__(self,
+                 wfo:     str,
+                 product: str,
+                 station: str,
+                 values:  list[str]):
+        self.wfo     = wfo
+        self.product = product
+        self.station = station
+        self.values  = values
+
+class RawinsReader():
+    """
+    A reader for the global `Current.rawins` file provided by UCAR:
+
+        https://weather.rap.ucar.edu/data/upper/Current.rawins
+    """
+    def __init__(self, fh: io.TextIOBase):
+        self.fh        = fh
+        self.soundings = dict()
+        self.current   = Sounding()
+
+    def parse_chunk(self, text: str) -> RawinsChunk:
+        meta = {
+            'wfo':     None, # NWS forecast office
+            'product': None, # NWS product code
+            'station': None  # WMO rawinsonde station ID
+        }
+
+        line_index = 0
+
+        #
+        # Split each line in the text chunk.  Not all chunks will have the
+        # same amount of metadata, so parse accordingly.
+        #
+        lines = list(map(lambda s: s.strip(), text.split("\n")))
+
+        #
+        # The `Current.rawins` feed from UCAR includes basic AFOS header
+        # information in the first two lines.  Validate this.  Note the first
+        # line is a sort of sequence number which has no public significance.
+        #
+        match = RE_ID.match(lines[0])
+
+        if match is None:
+            raise RawinsReaderException(f"First chunk line not 3-digit identifier ({lines[0]})")
+        else:
+            line_index += 1
+
+        #
+        # The `Current.rawins` feed from UCAR should also include a product
+        # issuance code indicating the WFO and validity time.  This can also
+        # be validated.
+        #
+        match = RE_ISSUANCE.match(lines[1])
+
+        if match is None:
+            raise RawinsReaderException('Second chunk line not product issuance')
+        else:
+            meta['wfo'] = match['wfo']
+            line_index += 1
+
+        #
+        # Finally, sometimes, the `Current.rawins` feed has an AFOS header
+        # which indicates the product code followed by the three-character
+        # WFO code.  Capture the product code purely for posterity.
+        #
+        match = RE_PRODUCT.match(lines[2])
+
+        if match is not None:
+            meta['product'] = match['product']
+            line_index += 1
+
+        #
+        # Split each whitespace-delimited column of each line into one big
+        # list of lines for the remainder of the current text chunk.
+        #
+        values = list()
+
+        for line in lines[line_index:]:
+            values.extend(re.split(r'\s+', line))
+
+        return RawinsChunk(meta['wfo'],
+                           meta['product'],
+                           meta['station'],
+                           values)
+
+    def each_chunk(self) -> list[Sounding]:
+        for text in each_chunk(self.fh, CHUNK_SEP, CHUNK_STRIP_CHARS):
+            yield self.parse_chunk(text)