Initial commit of rawins.py

2025-02-28 13:07:17 -05:00 · 2025-02-28 13:07:17 -05:00 · a8aa3ea771
commit a8aa3ea771
parent 2afcf59ba0
1 changed files with 104 additions and 0 deletions
--- a/lib/xmet/rawins.py
+++ b/lib/xmet/rawins.py
@ -0,0 +1,104 @@
 import io
 import re
 import datetime
 from xmet.util     import each_chunk
 from xmet.afos     import RE_ID, RE_ISSUANCE, RE_PRODUCT
 from xmet.sounding import Sounding, SoundingSample
 CHUNK_SEP         = "\x01"
 CHUNK_STRIP_CHARS = "\x01\x03\x0a\x20"
 class RawinsReaderException(Exception):
    ...
 class RawinsChunk():
    def __init__(self,
                 wfo:     str,
                 product: str,
                 station: str,
                 values:  list[str]):
        self.wfo     = wfo
        self.product = product
        self.station = station
        self.values  = values
 class RawinsReader():
    """
    A reader for the global `Current.rawins` file provided by UCAR:
        https://weather.rap.ucar.edu/data/upper/Current.rawins
    """
    def __init__(self, fh: io.TextIOBase):
        self.fh        = fh
        self.soundings = dict()
        self.current   = Sounding()
    def parse_chunk(self, text: str) -> RawinsChunk:
        meta = {
            'wfo':     None, # NWS forecast office
            'product': None, # NWS product code
            'station': None  # WMO rawinsonde station ID
        }
        line_index = 0
        #
        # Split each line in the text chunk.  Not all chunks will have the
        # same amount of metadata, so parse accordingly.
        #
        lines = list(map(lambda s: s.strip(), text.split("\n")))
        #
        # The `Current.rawins` feed from UCAR includes basic AFOS header
        # information in the first two lines.  Validate this.  Note the first
        # line is a sort of sequence number which has no public significance.
        #
        match = RE_ID.match(lines[0])
        if match is None:
            raise RawinsReaderException(f"First chunk line not 3-digit identifier ({lines[0]})")
        else:
            line_index += 1
        #
        # The `Current.rawins` feed from UCAR should also include a product
        # issuance code indicating the WFO and validity time.  This can also
        # be validated.
        #
        match = RE_ISSUANCE.match(lines[1])
        if match is None:
            raise RawinsReaderException('Second chunk line not product issuance')
        else:
            meta['wfo'] = match['wfo']
            line_index += 1
        #
        # Finally, sometimes, the `Current.rawins` feed has an AFOS header
        # which indicates the product code followed by the three-character
        # WFO code.  Capture the product code purely for posterity.
        #
        match = RE_PRODUCT.match(lines[2])
        if match is not None:
            meta['product'] = match['product']
            line_index += 1
        #
        # Split each whitespace-delimited column of each line into one big
        # list of lines for the remainder of the current text chunk.
        #
        values = list()
        for line in lines[line_index:]:
            values.extend(re.split(r'\s+', line))
        return RawinsChunk(meta['wfo'],
                           meta['product'],
                           meta['station'],
                           values)
    def each_chunk(self) -> list[Sounding]:
        for text in each_chunk(self.fh, CHUNK_SEP, CHUNK_STRIP_CHARS):
            yield self.parse_chunk(text)