diff --git a/lib/xmet/rawins.py b/lib/xmet/rawins.py new file mode 100644 index 0000000..954aa9c --- /dev/null +++ b/lib/xmet/rawins.py @@ -0,0 +1,104 @@ +import io +import re +import datetime + +from xmet.util import each_chunk +from xmet.afos import RE_ID, RE_ISSUANCE, RE_PRODUCT +from xmet.sounding import Sounding, SoundingSample + +CHUNK_SEP = "\x01" +CHUNK_STRIP_CHARS = "\x01\x03\x0a\x20" + +class RawinsReaderException(Exception): + ... + +class RawinsChunk(): + def __init__(self, + wfo: str, + product: str, + station: str, + values: list[str]): + self.wfo = wfo + self.product = product + self.station = station + self.values = values + +class RawinsReader(): + """ + A reader for the global `Current.rawins` file provided by UCAR: + + https://weather.rap.ucar.edu/data/upper/Current.rawins + """ + def __init__(self, fh: io.TextIOBase): + self.fh = fh + self.soundings = dict() + self.current = Sounding() + + def parse_chunk(self, text: str) -> RawinsChunk: + meta = { + 'wfo': None, # NWS forecast office + 'product': None, # NWS product code + 'station': None # WMO rawinsonde station ID + } + + line_index = 0 + + # + # Split each line in the text chunk. Not all chunks will have the + # same amount of metadata, so parse accordingly. + # + lines = list(map(lambda s: s.strip(), text.split("\n"))) + + # + # The `Current.rawins` feed from UCAR includes basic AFOS header + # information in the first two lines. Validate this. Note the first + # line is a sort of sequence number which has no public significance. + # + match = RE_ID.match(lines[0]) + + if match is None: + raise RawinsReaderException(f"First chunk line not 3-digit identifier ({lines[0]})") + else: + line_index += 1 + + # + # The `Current.rawins` feed from UCAR should also include a product + # issuance code indicating the WFO and validity time. This can also + # be validated. + # + match = RE_ISSUANCE.match(lines[1]) + + if match is None: + raise RawinsReaderException('Second chunk line not product issuance') + else: + meta['wfo'] = match['wfo'] + line_index += 1 + + # + # Finally, sometimes, the `Current.rawins` feed has an AFOS header + # which indicates the product code followed by the three-character + # WFO code. Capture the product code purely for posterity. + # + match = RE_PRODUCT.match(lines[2]) + + if match is not None: + meta['product'] = match['product'] + line_index += 1 + + # + # Split each whitespace-delimited column of each line into one big + # list of lines for the remainder of the current text chunk. + # + values = list() + + for line in lines[line_index:]: + values.extend(re.split(r'\s+', line)) + + return RawinsChunk(meta['wfo'], + meta['product'], + meta['station'], + values) + + def each_chunk(self) -> list[Sounding]: + for text in each_chunk(self.fh, CHUNK_SEP, CHUNK_STRIP_CHARS): + yield self.parse_chunk(text)