From a9bdab632d9179a0813c2fd2a1f7f66a0f81c4d3 Mon Sep 17 00:00:00 2001 From: XANTRONIX Industrial Date: Thu, 20 Feb 2025 00:58:16 -0500 Subject: [PATCH] Become more aggressive in parsing --- db/nexrad.sql | 2 +- lib/nexrad/afos.py | 134 ++++++++++++++++++++++++++++++++++----------- lib/nexrad/vtec.py | 3 +- 3 files changed, 103 insertions(+), 36 deletions(-) diff --git a/db/nexrad.sql b/db/nexrad.sql index a161384..b338651 100644 --- a/db/nexrad.sql +++ b/db/nexrad.sql @@ -56,7 +56,7 @@ create table nexrad_afos_message ( phenom TEXT NOT NULL, sig TEXT NOT NULL, etn INTEGER NOT NULL, - body TEXT NOT NULL, + text_raw TEXT NOT NULL, azimuth FLOAT NOT NULL, speed FLOAT NOT NULL, forecaster TEXT NOT NULL diff --git a/lib/nexrad/afos.py b/lib/nexrad/afos.py index da69d85..4263b6b 100644 --- a/lib/nexrad/afos.py +++ b/lib/nexrad/afos.py @@ -12,11 +12,20 @@ from nexrad.vtec import VTECEvent RE_ID = re.compile(r'^(\d+)$') RE_ISSUANCE = re.compile(r''' - ^ (W[A-Z]{3}\d{2}) - [ ]{1} (?P[A-Z]{4}) - [ ]{1} (?P\d{2}) - (?P\d{2}) (?P\d{2}) - $ + ^ ([A-Z]{4}\d+) + \s+ (?P[A-Z]{4}) + \s+ (?P\d{2}) (?P\d{2}) (?P\d{2}) +''', re.X) + +RE_DATE = re.compile(r''' + ^ (?P\d{1,2}) + (?P\d{2}) + \s+ (AM|PM) + \s+ (?P[A-Z]{3}) + \s+ (?P[A-Za-z]+) + \s+ (?P[A-Za-z]+) + \s+ (?P\d{1,2}) + \s+ (?P\d{4}) ''', re.X) RE_PRODUCT = re.compile(r'^(?P[A-Z]{3})(?P[A-Z]{3})$') @@ -27,14 +36,28 @@ RE_MOTION = re.compile(r''' ^ TIME \.\.\. MOT \.\.\. LOC - [ ]{1} (?P\d{2})(?P\d{2})Z - [ ]{1} (?P\d+)DEG - [ ]{1} (?P\d+)KT - [ ]{1} (?P\d+) - [ ]{1} (?P\d+) + \s+ (?P\d{2})(?P\d{2})Z + \s+ (?P\d+)DEG + \s+ (?P\d+)KT + \s+ (?P\d+) + \s+ (?P\d+) $ ''', re.X) +MONTHS = { + 'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6, + 'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12, + + 'JANUARY': 1, 'FEBRUARY': 2, 'MARCH': 3, 'APRIL': 4, + 'MAY': 5, 'JUNE': 6, 'JULY': 7, 'AUGUST': 8, + 'SEPTEMBER': 9, 'OCTOBER': 10, 'NOVEMBER': 11, 'DECEMBER': 12 +} + +TIMEZONES = { + 'HST': -10, 'PST': -8, 'PDT': -7, 'MST': -7, 'MDT': -6, 'CST': -6, + 'CDT': -5, ' EST': -5, 'EDT': -4, 'GMT': 0 +} + def parse_lon(text: str): size = len(text) return 0 - float(text[0:size-2]) + (float(text[size-2:size]) / 100) @@ -76,24 +99,28 @@ class AFOSMessage(DatabaseTable): __columns__ = ( 'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end', 'serial', 'product', 'vtec_type', 'etn', 'actions', 'wfo', - 'phenom', 'sig', 'text', 'azimuth', 'speed', 'location', + 'phenom', 'sig', 'text_raw', 'azimuth', 'speed', 'location', 'forecaster', 'poly', ) __columns_read__ = { - 'poly': 'ST_AsText(poly) as poly' + 'poly': 'ST_AsText(poly) as poly', + 'location': 'ST_AsText(location) as location' } __values_write__ = { - 'poly': shapely.from_wkt + 'poly': shapely.from_wkt, + 'location': shapely.from_wkt } __columns_write__ = { - 'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM) + 'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM), + 'location': 'ST_GeomFromText(:location, {crs})'.format(crs=COORD_SYSTEM) } __values_write__ = { - 'poly': lambda v: {'poly': shapely.to_wkt(v)} + 'poly': lambda v: {'poly': shapely.to_wkt(v)}, + 'location': lambda v: {'location': shapely.to_wkt(v)} } id: int @@ -110,7 +137,7 @@ class AFOSMessage(DatabaseTable): phenom: str sig: str etn: int - body: str + text_raw: str azimuth: int speed: int location: shapely.Point @@ -133,7 +160,7 @@ class AFOSMessage(DatabaseTable): self.phenom = None self.sig = None self.etn = None - self.body = None + self.text_raw = None self.azimuth = None self.speed = None self.location = None @@ -143,13 +170,37 @@ class AFOSMessage(DatabaseTable): @staticmethod def parse(text: str) -> Self: message = AFOSMessage() - message.text = text + message.text_raw = text state = AFOSMessageParserState.SERIAL + issuance = None + timestamp_inline = None + for line in text.split('\n'): line = line.rstrip() + if line == '': + continue + elif line[0] == '/' and line[-1] == '/': + # + # The VTEC line can appear anywhere in the message + # text, therefore, parsing must be able to occur in + # all states. + # + vtec = VTECEvent.parse(line) + + if vtec is not None: + message.timestamp_start = vtec.timestamp_start + message.timestamp_end = vtec.timestamp_end + + message.vtec_type = vtec.typeof + message.actions = vtec.actions + message.wfo = vtec.wfo + message.phenom = vtec.phenom + message.sig = vtec.sig + message.etn = vtec.etn + if state == AFOSMessageParserState.SERIAL: match = RE_ID.match(line) @@ -160,7 +211,8 @@ class AFOSMessage(DatabaseTable): match = RE_ISSUANCE.match(line) if match is not None: - state = AFOSMessageParserState.PRODUCT + state = AFOSMessageParserState.PRODUCT + issuance = match elif state == AFOSMessageParserState.PRODUCT: match = RE_PRODUCT.match(line) @@ -169,22 +221,22 @@ class AFOSMessage(DatabaseTable): state = AFOSMessageParserState.BODY elif state == AFOSMessageParserState.BODY: - if line == '': - continue - elif line[0] == '/': - vtec = VTECEvent.parse(line) + if timestamp_inline is None: + match = RE_DATE.match(line) - if vtec is not None: - message.timestamp_start = vtec.timestamp_start - message.timestamp_end = vtec.timestamp_end + if match is not None: + offset = TIMEZONES[match['tz'].upper()] + timestamp_inline = datetime.datetime( + year = int(match['year']), + month = MONTHS[match['month'].upper()], + day = int(match['day']), + hour = int(match['hour']), + minute = int(match['minute']), + second = 0, + tzinfo = datetime.timezone(datetime.timedelta(hours=offset)) + ).astimezone(datetime.UTC) - message.vtec_type = vtec.typeof - message.actions = vtec.actions - message.wfo = vtec.wfo - message.phenom = vtec.phenom - message.sig = vtec.sig - message.etn = vtec.etn - elif line == '&&': + if line == '&&': state = AFOSMessageParserState.TAGS elif state == AFOSMessageParserState.TAGS: if line == '$$': @@ -205,6 +257,22 @@ class AFOSMessage(DatabaseTable): if line != '': message.forecaster = line + if message.timestamp_issued is None: + if timestamp_inline is not None: + message.timestamp_issued = timestamp_inline + message.timestamp_start = timestamp_inline + message.timestamp_end = timestamp_inline + datetime.timedelta(hours=1) + else: + message.timestamp_issued = datetime.datetime( + year = message.timestamp_start.year, + month = message.timestamp_start.month, + day = int(issuance['day']), + hour = int(issuance['hour']), + minute = int(issuance['minute']), + second = 0, + tzinfo = datetime.UTC + ) + return message def is_watch(self): diff --git a/lib/nexrad/vtec.py b/lib/nexrad/vtec.py index 4b59324..c136a6b 100644 --- a/lib/nexrad/vtec.py +++ b/lib/nexrad/vtec.py @@ -91,8 +91,7 @@ class VTECEvent(): class VTECHydroEvent(): __slots__ = ( - 'severity', 'cause', 'record', - 'timestamp_start', 'timestamp_end' + 'severity', 'cause', 'record', 'timestamp_start', 'timestamp_end' ) severity: str