diff --git a/lib/nexrad/vtec.py b/lib/nexrad/vtec.py index e53f689..3558505 100644 --- a/lib/nexrad/vtec.py +++ b/lib/nexrad/vtec.py @@ -1,45 +1,93 @@ +import re import enum import datetime import json import shapely -import shapefile from nexrad.db import DatabaseTable from nexrad.coord import COORD_SYSTEM -def parse_timestamp(text: str): - size = len(text) +RE_ID = re.compile(r'^(\d+)$') - if size == 8: - fmt = '%Y%m%d' - elif size == 10: - fmt = '%Y%m%d%H' - elif size == 12: - fmt = '%Y%m%d%H%M' - elif size == 14: - fmt = '%Y%m%d%H%M%S' +RE_ISSUANCE = re.compile(r''' + ^ + (WF[A-Z]{2}\d{2}) + [ ]{1} + (?P[A-Z]{4}) + [ ]{1} + (?P\d{2}) + (?P\d{2}) + (?P\d{2}) + $ +''', re.X) - return datetime.datetime.strptime(text, fmt).astimezone(datetime.UTC) +RE_PHENOM = re.compile(r''' + ^/ + (?P[OTEX]) + \. + (?P[A-Z]{3}) + \. + (?P[A-Z]{4}) + \. + (?P[A-Z]{2}) + \. + (?P[A-Z]) + \. + (?P\d{4}) + \. + (?P\d{6}T\d{4}Z) + - + (?P\d{6}T\d{4}Z) + /$ +''', re.X) -def shape_to_geojson(shape: shapefile.Shape): - return json.dumps(shape.__geo_interface__) +RE_HYDRO = re.compile(r''' + ^/ + (?P[0N1]) + \. + (?P[A-Z]{2}) + \. + (?P\d{6}T\d{4}Z) + - + (?P\d{6}T\d{4}Z) + \. + (?P[A-Z]{2}) + /$ +''', re.X) -class VTECType(enum.StrEnum): +RE_POLY = re.compile(r'^LAT\.\.\.LON(?P \d{4})+') + +def parse_timestamp(text: str, post_2016_05_11: bool): + return datetime.datetime.strptime( + text, '%y%m%dT%H%M%SZ' + ).astimezone(datetime.UTC) + +class VTECEventType(enum.StrEnum): OPERATIONAL = 'O' TEST = 'T' EXPERIMENTAL = 'E' EXPERIMENTAL_VTEC = 'X' -class VTEC(DatabaseTable): +class VTECEventParserState(enum.Enum): + NONE = 1 + HEADER = enum.auto() + ISSUANCE = enum.auto() + META = enum.auto() + TYPEOFFICE = enum.auto() + VTEC = enum.auto() + BODY_SEP = enum.auto() + BODY = enum.auto() + POLY = enum.auto() + FOOTER = enum.auto() + +class VTECEvent(DatabaseTable): __table__ = 'nexrad_vtec_event' __key__ = 'id' __columns__ = ( - 'id', 'timestamp_issued', 'timestamp_expired', - 'timestamp_init_iss', 'timestamp_init_exp', - 'timestamp_updated', 'timestamp_poly_start', - 'timestamp_poly_end', 'event_id', 'wfo', 'sig', 'phenom', - 'status', 'hail_size', 'tornado_tag', 'damage_tag', 'poly' + 'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end', + 'typeof', 'etn', 'actions', 'wfo', 'phenom', 'sig', 'body', + 'forecaster', 'poly', ) __columns_read__ = { @@ -51,52 +99,99 @@ class VTEC(DatabaseTable): } __columns_write__ = { - 'poly': 'SetSRID(GeomFromGeoJSON(:poly), {crs})'.format(crs=COORD_SYSTEM) + 'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM) } __values_write__ = { - 'poly': lambda v: {'poly': shape_to_geojson(v)} + 'poly': lambda v: {'poly': shapely.to_wkt(v)} } - id: int - poly: shapefile.Shape - event_id: int + id: int + timestamp_issued: datetime.datetime + timestamp_start: datetime.datetime + timestamp_end: datetime.datetime + typeof: str + actions: str + wfo: str + phenom: str + sig: str + etn: int + body: str + forecaster: str + poly: shapely.Geometry + + def parse_shape(self, coords: str): + pass @staticmethod - def from_shapefile_record(record, shape): - vtec = VTEC() - vtec.id = None + def parse(text: str): + event = VTECEvent() + state = VTECEventParserState.NONE - vtec.timestamp_issued = parse_timestamp(record['ISSUED']) - vtec.timestamp_expired = parse_timestamp(record['EXPIRED']) - vtec.timestamp_init_iss = parse_timestamp(record['INIT_ISS']) - vtec.timestamp_init_exp = parse_timestamp(record['INIT_EXP']) - vtec.timestamp_updated = parse_timestamp(record['UPDATED']) - vtec.timestamp_poly_start = parse_timestamp(record['POLY_BEG']) - vtec.timestamp_poly_end = parse_timestamp(record['POLY_END']) + # + # A timestamp post 11 May 2016 can be detected based on the + # presence of lowercase letters in bulletin text, as per: + # + # https://www.noaa.gov/media-release/national-weather-service-will-stop-using-all-caps-in-its-forecasts + # + post_2016_05_11 = any(c for c in text if c.islower()) - vtec.event_id = int(record['ETN']) if (record['ETN'] is not None and record['ETN'] != '') else None - vtec.hail_size = float(record['HAILTAG']) if record['HAILTAG'] is not None else None - vtec.wind_speed = float(record['WINDTAG']) if record['WINDTAG'] is not None else None + issuance = None - vtec.status = record['STATUS'] - vtec.wfo = record['WFO'] - vtec.phenom = record['PHENOM'] - vtec.sig = record['SIG'] - vtec.tornado_tag = record['TORNTAG'] - vtec.damage_tag = record['DAMAGTAG'] - vtec.poly = shape + for line in text.split('\n'): + line = line.rstrip() - return vtec + if state == VTECEventParserState.NONE: + match = RE_ID.match(line) - @staticmethod - def each_from_shapefile(path: str): - sf = shapefile.Reader(path) + if match is not None: + event.id = int(match[1]) + state = VTECEventParserState.HEADER + elif state == VTECEventParserState.HEADER: + match = RE_ISSUANCE.match(line) - for i in range(0, sf.numRecords): - record = sf.record(i) + if match is not None: + issuance = match + state = VTECEventParserState.ISSUANCE + elif state == VTECEventParserState.ISSUANCE: + state = VTECEventParserState.META + elif state == VTECEventParserState.META: + match = RE_PHENOM.match(line) - if record['GTYPE'] != 'P': - continue + if match is not None: + event.timestamp_start = parse_timestamp(match['time_start'], post_2016_05_11) + event.timestamp_end = parse_timestamp(match['time_end'], post_2016_05_11) - yield VTEC.from_shapefile_record(record, sf.shape(i)) + event.typeof = match['typeof'] + event.actions = match['actions'] + event.wfo = match['wfo'] + event.phenom = match['phenom'] + event.sig = match['sig'] + event.etn = int(match['etn']) + + state = VTECEventParserState.VTEC + elif state == VTECEventParserState.VTEC: + if line == '': + state = VTECEventParserState.BODY_SEP + elif state == VTECEventParserState.BODY_SEP: + event.body = line + state = VTECEventParserState.BODY + elif state == VTECEventParserState.BODY: + if line == '&&': + state = VTECEventParserState.POLY + else: + event.body += '\n' + line + elif state == VTECEventParserState.POLY: + match = RE_POLY.match(line) + + if match is not None: + event.parse_shape(match['coords']) + elif line == '$$': + state = VTECEventParserState.FOOTER + else: + pass + elif state == VTECEventParserState.FOOTER: + if line != '': + event.forecaster = line + + return event diff --git a/requirements.txt b/requirements.txt index c1d2ebb..245b632 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ boto3>=1.36 shapely>=2.0 -pyshp>=2.3.0