Become more aggressive in parsing

This commit is contained in:
XANTRONIX Industrial 2025-02-20 00:58:16 -05:00
parent 51009d942c
commit a9bdab632d
3 changed files with 103 additions and 36 deletions

View file

@ -56,7 +56,7 @@ create table nexrad_afos_message (
phenom TEXT NOT NULL, phenom TEXT NOT NULL,
sig TEXT NOT NULL, sig TEXT NOT NULL,
etn INTEGER NOT NULL, etn INTEGER NOT NULL,
body TEXT NOT NULL, text_raw TEXT NOT NULL,
azimuth FLOAT NOT NULL, azimuth FLOAT NOT NULL,
speed FLOAT NOT NULL, speed FLOAT NOT NULL,
forecaster TEXT NOT NULL forecaster TEXT NOT NULL

View file

@ -12,11 +12,20 @@ from nexrad.vtec import VTECEvent
RE_ID = re.compile(r'^(\d+)$') RE_ID = re.compile(r'^(\d+)$')
RE_ISSUANCE = re.compile(r''' RE_ISSUANCE = re.compile(r'''
^ (W[A-Z]{3}\d{2}) ^ ([A-Z]{4}\d+)
[ ]{1} (?P<wfo>[A-Z]{4}) \s+ (?P<wfo>[A-Z]{4})
[ ]{1} (?P<day>\d{2}) \s+ (?P<day>\d{2}) (?P<hour>\d{2}) (?P<minute>\d{2})
(?P<hour>\d{2}) (?P<minute>\d{2}) ''', re.X)
$
RE_DATE = re.compile(r'''
^ (?P<hour>\d{1,2})
(?P<minute>\d{2})
\s+ (AM|PM)
\s+ (?P<tz>[A-Z]{3})
\s+ (?P<weekday>[A-Za-z]+)
\s+ (?P<month>[A-Za-z]+)
\s+ (?P<day>\d{1,2})
\s+ (?P<year>\d{4})
''', re.X) ''', re.X)
RE_PRODUCT = re.compile(r'^(?P<product>[A-Z]{3})(?P<wfo>[A-Z]{3})$') RE_PRODUCT = re.compile(r'^(?P<product>[A-Z]{3})(?P<wfo>[A-Z]{3})$')
@ -27,14 +36,28 @@ RE_MOTION = re.compile(r'''
^ TIME ^ TIME
\.\.\. MOT \.\.\. MOT
\.\.\. LOC \.\.\. LOC
[ ]{1} (?P<hour>\d{2})(?P<minute>\d{2})Z \s+ (?P<hour>\d{2})(?P<minute>\d{2})Z
[ ]{1} (?P<azimuth>\d+)DEG \s+ (?P<azimuth>\d+)DEG
[ ]{1} (?P<speed>\d+)KT \s+ (?P<speed>\d+)KT
[ ]{1} (?P<lat>\d+) \s+ (?P<lat>\d+)
[ ]{1} (?P<lon>\d+) \s+ (?P<lon>\d+)
$ $
''', re.X) ''', re.X)
MONTHS = {
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12,
'JANUARY': 1, 'FEBRUARY': 2, 'MARCH': 3, 'APRIL': 4,
'MAY': 5, 'JUNE': 6, 'JULY': 7, 'AUGUST': 8,
'SEPTEMBER': 9, 'OCTOBER': 10, 'NOVEMBER': 11, 'DECEMBER': 12
}
TIMEZONES = {
'HST': -10, 'PST': -8, 'PDT': -7, 'MST': -7, 'MDT': -6, 'CST': -6,
'CDT': -5, ' EST': -5, 'EDT': -4, 'GMT': 0
}
def parse_lon(text: str): def parse_lon(text: str):
size = len(text) size = len(text)
return 0 - float(text[0:size-2]) + (float(text[size-2:size]) / 100) return 0 - float(text[0:size-2]) + (float(text[size-2:size]) / 100)
@ -76,24 +99,28 @@ class AFOSMessage(DatabaseTable):
__columns__ = ( __columns__ = (
'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end', 'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end',
'serial', 'product', 'vtec_type', 'etn', 'actions', 'wfo', 'serial', 'product', 'vtec_type', 'etn', 'actions', 'wfo',
'phenom', 'sig', 'text', 'azimuth', 'speed', 'location', 'phenom', 'sig', 'text_raw', 'azimuth', 'speed', 'location',
'forecaster', 'poly', 'forecaster', 'poly',
) )
__columns_read__ = { __columns_read__ = {
'poly': 'ST_AsText(poly) as poly' 'poly': 'ST_AsText(poly) as poly',
'location': 'ST_AsText(location) as location'
} }
__values_write__ = { __values_write__ = {
'poly': shapely.from_wkt 'poly': shapely.from_wkt,
'location': shapely.from_wkt
} }
__columns_write__ = { __columns_write__ = {
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM) 'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM),
'location': 'ST_GeomFromText(:location, {crs})'.format(crs=COORD_SYSTEM)
} }
__values_write__ = { __values_write__ = {
'poly': lambda v: {'poly': shapely.to_wkt(v)} 'poly': lambda v: {'poly': shapely.to_wkt(v)},
'location': lambda v: {'location': shapely.to_wkt(v)}
} }
id: int id: int
@ -110,7 +137,7 @@ class AFOSMessage(DatabaseTable):
phenom: str phenom: str
sig: str sig: str
etn: int etn: int
body: str text_raw: str
azimuth: int azimuth: int
speed: int speed: int
location: shapely.Point location: shapely.Point
@ -133,7 +160,7 @@ class AFOSMessage(DatabaseTable):
self.phenom = None self.phenom = None
self.sig = None self.sig = None
self.etn = None self.etn = None
self.body = None self.text_raw = None
self.azimuth = None self.azimuth = None
self.speed = None self.speed = None
self.location = None self.location = None
@ -143,35 +170,24 @@ class AFOSMessage(DatabaseTable):
@staticmethod @staticmethod
def parse(text: str) -> Self: def parse(text: str) -> Self:
message = AFOSMessage() message = AFOSMessage()
message.text = text message.text_raw = text
state = AFOSMessageParserState.SERIAL state = AFOSMessageParserState.SERIAL
issuance = None
timestamp_inline = None
for line in text.split('\n'): for line in text.split('\n'):
line = line.rstrip() line = line.rstrip()
if state == AFOSMessageParserState.SERIAL:
match = RE_ID.match(line)
if match is not None:
message.serial = int(match[1])
state = AFOSMessageParserState.ISSUANCE
elif state == AFOSMessageParserState.ISSUANCE:
match = RE_ISSUANCE.match(line)
if match is not None:
state = AFOSMessageParserState.PRODUCT
elif state == AFOSMessageParserState.PRODUCT:
match = RE_PRODUCT.match(line)
if match is not None:
message.product = match['product']
state = AFOSMessageParserState.BODY
elif state == AFOSMessageParserState.BODY:
if line == '': if line == '':
continue continue
elif line[0] == '/': elif line[0] == '/' and line[-1] == '/':
#
# The VTEC line can appear anywhere in the message
# text, therefore, parsing must be able to occur in
# all states.
#
vtec = VTECEvent.parse(line) vtec = VTECEvent.parse(line)
if vtec is not None: if vtec is not None:
@ -184,7 +200,43 @@ class AFOSMessage(DatabaseTable):
message.phenom = vtec.phenom message.phenom = vtec.phenom
message.sig = vtec.sig message.sig = vtec.sig
message.etn = vtec.etn message.etn = vtec.etn
elif line == '&&':
if state == AFOSMessageParserState.SERIAL:
match = RE_ID.match(line)
if match is not None:
message.serial = int(match[1])
state = AFOSMessageParserState.ISSUANCE
elif state == AFOSMessageParserState.ISSUANCE:
match = RE_ISSUANCE.match(line)
if match is not None:
state = AFOSMessageParserState.PRODUCT
issuance = match
elif state == AFOSMessageParserState.PRODUCT:
match = RE_PRODUCT.match(line)
if match is not None:
message.product = match['product']
state = AFOSMessageParserState.BODY
elif state == AFOSMessageParserState.BODY:
if timestamp_inline is None:
match = RE_DATE.match(line)
if match is not None:
offset = TIMEZONES[match['tz'].upper()]
timestamp_inline = datetime.datetime(
year = int(match['year']),
month = MONTHS[match['month'].upper()],
day = int(match['day']),
hour = int(match['hour']),
minute = int(match['minute']),
second = 0,
tzinfo = datetime.timezone(datetime.timedelta(hours=offset))
).astimezone(datetime.UTC)
if line == '&&':
state = AFOSMessageParserState.TAGS state = AFOSMessageParserState.TAGS
elif state == AFOSMessageParserState.TAGS: elif state == AFOSMessageParserState.TAGS:
if line == '$$': if line == '$$':
@ -205,6 +257,22 @@ class AFOSMessage(DatabaseTable):
if line != '': if line != '':
message.forecaster = line message.forecaster = line
if message.timestamp_issued is None:
if timestamp_inline is not None:
message.timestamp_issued = timestamp_inline
message.timestamp_start = timestamp_inline
message.timestamp_end = timestamp_inline + datetime.timedelta(hours=1)
else:
message.timestamp_issued = datetime.datetime(
year = message.timestamp_start.year,
month = message.timestamp_start.month,
day = int(issuance['day']),
hour = int(issuance['hour']),
minute = int(issuance['minute']),
second = 0,
tzinfo = datetime.UTC
)
return message return message
def is_watch(self): def is_watch(self):

View file

@ -91,8 +91,7 @@ class VTECEvent():
class VTECHydroEvent(): class VTECHydroEvent():
__slots__ = ( __slots__ = (
'severity', 'cause', 'record', 'severity', 'cause', 'record', 'timestamp_start', 'timestamp_end'
'timestamp_start', 'timestamp_end'
) )
severity: str severity: str