Become more aggressive in parsing

This commit is contained in:
XANTRONIX Industrial 2025-02-20 00:58:16 -05:00
parent 51009d942c
commit a9bdab632d
3 changed files with 103 additions and 36 deletions

View file

@ -56,7 +56,7 @@ create table nexrad_afos_message (
phenom TEXT NOT NULL,
sig TEXT NOT NULL,
etn INTEGER NOT NULL,
body TEXT NOT NULL,
text_raw TEXT NOT NULL,
azimuth FLOAT NOT NULL,
speed FLOAT NOT NULL,
forecaster TEXT NOT NULL

View file

@ -12,11 +12,20 @@ from nexrad.vtec import VTECEvent
RE_ID = re.compile(r'^(\d+)$')
RE_ISSUANCE = re.compile(r'''
^ (W[A-Z]{3}\d{2})
[ ]{1} (?P<wfo>[A-Z]{4})
[ ]{1} (?P<day>\d{2})
(?P<hour>\d{2}) (?P<minute>\d{2})
$
^ ([A-Z]{4}\d+)
\s+ (?P<wfo>[A-Z]{4})
\s+ (?P<day>\d{2}) (?P<hour>\d{2}) (?P<minute>\d{2})
''', re.X)
RE_DATE = re.compile(r'''
^ (?P<hour>\d{1,2})
(?P<minute>\d{2})
\s+ (AM|PM)
\s+ (?P<tz>[A-Z]{3})
\s+ (?P<weekday>[A-Za-z]+)
\s+ (?P<month>[A-Za-z]+)
\s+ (?P<day>\d{1,2})
\s+ (?P<year>\d{4})
''', re.X)
RE_PRODUCT = re.compile(r'^(?P<product>[A-Z]{3})(?P<wfo>[A-Z]{3})$')
@ -27,14 +36,28 @@ RE_MOTION = re.compile(r'''
^ TIME
\.\.\. MOT
\.\.\. LOC
[ ]{1} (?P<hour>\d{2})(?P<minute>\d{2})Z
[ ]{1} (?P<azimuth>\d+)DEG
[ ]{1} (?P<speed>\d+)KT
[ ]{1} (?P<lat>\d+)
[ ]{1} (?P<lon>\d+)
\s+ (?P<hour>\d{2})(?P<minute>\d{2})Z
\s+ (?P<azimuth>\d+)DEG
\s+ (?P<speed>\d+)KT
\s+ (?P<lat>\d+)
\s+ (?P<lon>\d+)
$
''', re.X)
MONTHS = {
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12,
'JANUARY': 1, 'FEBRUARY': 2, 'MARCH': 3, 'APRIL': 4,
'MAY': 5, 'JUNE': 6, 'JULY': 7, 'AUGUST': 8,
'SEPTEMBER': 9, 'OCTOBER': 10, 'NOVEMBER': 11, 'DECEMBER': 12
}
TIMEZONES = {
'HST': -10, 'PST': -8, 'PDT': -7, 'MST': -7, 'MDT': -6, 'CST': -6,
'CDT': -5, ' EST': -5, 'EDT': -4, 'GMT': 0
}
def parse_lon(text: str):
size = len(text)
return 0 - float(text[0:size-2]) + (float(text[size-2:size]) / 100)
@ -76,24 +99,28 @@ class AFOSMessage(DatabaseTable):
__columns__ = (
'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end',
'serial', 'product', 'vtec_type', 'etn', 'actions', 'wfo',
'phenom', 'sig', 'text', 'azimuth', 'speed', 'location',
'phenom', 'sig', 'text_raw', 'azimuth', 'speed', 'location',
'forecaster', 'poly',
)
__columns_read__ = {
'poly': 'ST_AsText(poly) as poly'
'poly': 'ST_AsText(poly) as poly',
'location': 'ST_AsText(location) as location'
}
__values_write__ = {
'poly': shapely.from_wkt
'poly': shapely.from_wkt,
'location': shapely.from_wkt
}
__columns_write__ = {
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM)
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM),
'location': 'ST_GeomFromText(:location, {crs})'.format(crs=COORD_SYSTEM)
}
__values_write__ = {
'poly': lambda v: {'poly': shapely.to_wkt(v)}
'poly': lambda v: {'poly': shapely.to_wkt(v)},
'location': lambda v: {'location': shapely.to_wkt(v)}
}
id: int
@ -110,7 +137,7 @@ class AFOSMessage(DatabaseTable):
phenom: str
sig: str
etn: int
body: str
text_raw: str
azimuth: int
speed: int
location: shapely.Point
@ -133,7 +160,7 @@ class AFOSMessage(DatabaseTable):
self.phenom = None
self.sig = None
self.etn = None
self.body = None
self.text_raw = None
self.azimuth = None
self.speed = None
self.location = None
@ -143,13 +170,37 @@ class AFOSMessage(DatabaseTable):
@staticmethod
def parse(text: str) -> Self:
message = AFOSMessage()
message.text = text
message.text_raw = text
state = AFOSMessageParserState.SERIAL
issuance = None
timestamp_inline = None
for line in text.split('\n'):
line = line.rstrip()
if line == '':
continue
elif line[0] == '/' and line[-1] == '/':
#
# The VTEC line can appear anywhere in the message
# text, therefore, parsing must be able to occur in
# all states.
#
vtec = VTECEvent.parse(line)
if vtec is not None:
message.timestamp_start = vtec.timestamp_start
message.timestamp_end = vtec.timestamp_end
message.vtec_type = vtec.typeof
message.actions = vtec.actions
message.wfo = vtec.wfo
message.phenom = vtec.phenom
message.sig = vtec.sig
message.etn = vtec.etn
if state == AFOSMessageParserState.SERIAL:
match = RE_ID.match(line)
@ -160,7 +211,8 @@ class AFOSMessage(DatabaseTable):
match = RE_ISSUANCE.match(line)
if match is not None:
state = AFOSMessageParserState.PRODUCT
state = AFOSMessageParserState.PRODUCT
issuance = match
elif state == AFOSMessageParserState.PRODUCT:
match = RE_PRODUCT.match(line)
@ -169,22 +221,22 @@ class AFOSMessage(DatabaseTable):
state = AFOSMessageParserState.BODY
elif state == AFOSMessageParserState.BODY:
if line == '':
continue
elif line[0] == '/':
vtec = VTECEvent.parse(line)
if timestamp_inline is None:
match = RE_DATE.match(line)
if vtec is not None:
message.timestamp_start = vtec.timestamp_start
message.timestamp_end = vtec.timestamp_end
if match is not None:
offset = TIMEZONES[match['tz'].upper()]
timestamp_inline = datetime.datetime(
year = int(match['year']),
month = MONTHS[match['month'].upper()],
day = int(match['day']),
hour = int(match['hour']),
minute = int(match['minute']),
second = 0,
tzinfo = datetime.timezone(datetime.timedelta(hours=offset))
).astimezone(datetime.UTC)
message.vtec_type = vtec.typeof
message.actions = vtec.actions
message.wfo = vtec.wfo
message.phenom = vtec.phenom
message.sig = vtec.sig
message.etn = vtec.etn
elif line == '&&':
if line == '&&':
state = AFOSMessageParserState.TAGS
elif state == AFOSMessageParserState.TAGS:
if line == '$$':
@ -205,6 +257,22 @@ class AFOSMessage(DatabaseTable):
if line != '':
message.forecaster = line
if message.timestamp_issued is None:
if timestamp_inline is not None:
message.timestamp_issued = timestamp_inline
message.timestamp_start = timestamp_inline
message.timestamp_end = timestamp_inline + datetime.timedelta(hours=1)
else:
message.timestamp_issued = datetime.datetime(
year = message.timestamp_start.year,
month = message.timestamp_start.month,
day = int(issuance['day']),
hour = int(issuance['hour']),
minute = int(issuance['minute']),
second = 0,
tzinfo = datetime.UTC
)
return message
def is_watch(self):

View file

@ -91,8 +91,7 @@ class VTECEvent():
class VTECHydroEvent():
__slots__ = (
'severity', 'cause', 'record',
'timestamp_start', 'timestamp_end'
'severity', 'cause', 'record', 'timestamp_start', 'timestamp_end'
)
severity: str