Become more aggressive in parsing
This commit is contained in:
parent
51009d942c
commit
a9bdab632d
3 changed files with 103 additions and 36 deletions
|
@ -56,7 +56,7 @@ create table nexrad_afos_message (
|
|||
phenom TEXT NOT NULL,
|
||||
sig TEXT NOT NULL,
|
||||
etn INTEGER NOT NULL,
|
||||
body TEXT NOT NULL,
|
||||
text_raw TEXT NOT NULL,
|
||||
azimuth FLOAT NOT NULL,
|
||||
speed FLOAT NOT NULL,
|
||||
forecaster TEXT NOT NULL
|
||||
|
|
|
@ -12,11 +12,20 @@ from nexrad.vtec import VTECEvent
|
|||
RE_ID = re.compile(r'^(\d+)$')
|
||||
|
||||
RE_ISSUANCE = re.compile(r'''
|
||||
^ (W[A-Z]{3}\d{2})
|
||||
[ ]{1} (?P<wfo>[A-Z]{4})
|
||||
[ ]{1} (?P<day>\d{2})
|
||||
(?P<hour>\d{2}) (?P<minute>\d{2})
|
||||
$
|
||||
^ ([A-Z]{4}\d+)
|
||||
\s+ (?P<wfo>[A-Z]{4})
|
||||
\s+ (?P<day>\d{2}) (?P<hour>\d{2}) (?P<minute>\d{2})
|
||||
''', re.X)
|
||||
|
||||
RE_DATE = re.compile(r'''
|
||||
^ (?P<hour>\d{1,2})
|
||||
(?P<minute>\d{2})
|
||||
\s+ (AM|PM)
|
||||
\s+ (?P<tz>[A-Z]{3})
|
||||
\s+ (?P<weekday>[A-Za-z]+)
|
||||
\s+ (?P<month>[A-Za-z]+)
|
||||
\s+ (?P<day>\d{1,2})
|
||||
\s+ (?P<year>\d{4})
|
||||
''', re.X)
|
||||
|
||||
RE_PRODUCT = re.compile(r'^(?P<product>[A-Z]{3})(?P<wfo>[A-Z]{3})$')
|
||||
|
@ -27,14 +36,28 @@ RE_MOTION = re.compile(r'''
|
|||
^ TIME
|
||||
\.\.\. MOT
|
||||
\.\.\. LOC
|
||||
[ ]{1} (?P<hour>\d{2})(?P<minute>\d{2})Z
|
||||
[ ]{1} (?P<azimuth>\d+)DEG
|
||||
[ ]{1} (?P<speed>\d+)KT
|
||||
[ ]{1} (?P<lat>\d+)
|
||||
[ ]{1} (?P<lon>\d+)
|
||||
\s+ (?P<hour>\d{2})(?P<minute>\d{2})Z
|
||||
\s+ (?P<azimuth>\d+)DEG
|
||||
\s+ (?P<speed>\d+)KT
|
||||
\s+ (?P<lat>\d+)
|
||||
\s+ (?P<lon>\d+)
|
||||
$
|
||||
''', re.X)
|
||||
|
||||
MONTHS = {
|
||||
'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
|
||||
'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12,
|
||||
|
||||
'JANUARY': 1, 'FEBRUARY': 2, 'MARCH': 3, 'APRIL': 4,
|
||||
'MAY': 5, 'JUNE': 6, 'JULY': 7, 'AUGUST': 8,
|
||||
'SEPTEMBER': 9, 'OCTOBER': 10, 'NOVEMBER': 11, 'DECEMBER': 12
|
||||
}
|
||||
|
||||
TIMEZONES = {
|
||||
'HST': -10, 'PST': -8, 'PDT': -7, 'MST': -7, 'MDT': -6, 'CST': -6,
|
||||
'CDT': -5, ' EST': -5, 'EDT': -4, 'GMT': 0
|
||||
}
|
||||
|
||||
def parse_lon(text: str):
|
||||
size = len(text)
|
||||
return 0 - float(text[0:size-2]) + (float(text[size-2:size]) / 100)
|
||||
|
@ -76,24 +99,28 @@ class AFOSMessage(DatabaseTable):
|
|||
__columns__ = (
|
||||
'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end',
|
||||
'serial', 'product', 'vtec_type', 'etn', 'actions', 'wfo',
|
||||
'phenom', 'sig', 'text', 'azimuth', 'speed', 'location',
|
||||
'phenom', 'sig', 'text_raw', 'azimuth', 'speed', 'location',
|
||||
'forecaster', 'poly',
|
||||
)
|
||||
|
||||
__columns_read__ = {
|
||||
'poly': 'ST_AsText(poly) as poly'
|
||||
'poly': 'ST_AsText(poly) as poly',
|
||||
'location': 'ST_AsText(location) as location'
|
||||
}
|
||||
|
||||
__values_write__ = {
|
||||
'poly': shapely.from_wkt
|
||||
'poly': shapely.from_wkt,
|
||||
'location': shapely.from_wkt
|
||||
}
|
||||
|
||||
__columns_write__ = {
|
||||
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM)
|
||||
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM),
|
||||
'location': 'ST_GeomFromText(:location, {crs})'.format(crs=COORD_SYSTEM)
|
||||
}
|
||||
|
||||
__values_write__ = {
|
||||
'poly': lambda v: {'poly': shapely.to_wkt(v)}
|
||||
'poly': lambda v: {'poly': shapely.to_wkt(v)},
|
||||
'location': lambda v: {'location': shapely.to_wkt(v)}
|
||||
}
|
||||
|
||||
id: int
|
||||
|
@ -110,7 +137,7 @@ class AFOSMessage(DatabaseTable):
|
|||
phenom: str
|
||||
sig: str
|
||||
etn: int
|
||||
body: str
|
||||
text_raw: str
|
||||
azimuth: int
|
||||
speed: int
|
||||
location: shapely.Point
|
||||
|
@ -133,7 +160,7 @@ class AFOSMessage(DatabaseTable):
|
|||
self.phenom = None
|
||||
self.sig = None
|
||||
self.etn = None
|
||||
self.body = None
|
||||
self.text_raw = None
|
||||
self.azimuth = None
|
||||
self.speed = None
|
||||
self.location = None
|
||||
|
@ -143,35 +170,24 @@ class AFOSMessage(DatabaseTable):
|
|||
@staticmethod
|
||||
def parse(text: str) -> Self:
|
||||
message = AFOSMessage()
|
||||
message.text = text
|
||||
message.text_raw = text
|
||||
|
||||
state = AFOSMessageParserState.SERIAL
|
||||
|
||||
issuance = None
|
||||
timestamp_inline = None
|
||||
|
||||
for line in text.split('\n'):
|
||||
line = line.rstrip()
|
||||
|
||||
if state == AFOSMessageParserState.SERIAL:
|
||||
match = RE_ID.match(line)
|
||||
|
||||
if match is not None:
|
||||
message.serial = int(match[1])
|
||||
state = AFOSMessageParserState.ISSUANCE
|
||||
elif state == AFOSMessageParserState.ISSUANCE:
|
||||
match = RE_ISSUANCE.match(line)
|
||||
|
||||
if match is not None:
|
||||
state = AFOSMessageParserState.PRODUCT
|
||||
elif state == AFOSMessageParserState.PRODUCT:
|
||||
match = RE_PRODUCT.match(line)
|
||||
|
||||
if match is not None:
|
||||
message.product = match['product']
|
||||
|
||||
state = AFOSMessageParserState.BODY
|
||||
elif state == AFOSMessageParserState.BODY:
|
||||
if line == '':
|
||||
continue
|
||||
elif line[0] == '/':
|
||||
elif line[0] == '/' and line[-1] == '/':
|
||||
#
|
||||
# The VTEC line can appear anywhere in the message
|
||||
# text, therefore, parsing must be able to occur in
|
||||
# all states.
|
||||
#
|
||||
vtec = VTECEvent.parse(line)
|
||||
|
||||
if vtec is not None:
|
||||
|
@ -184,7 +200,43 @@ class AFOSMessage(DatabaseTable):
|
|||
message.phenom = vtec.phenom
|
||||
message.sig = vtec.sig
|
||||
message.etn = vtec.etn
|
||||
elif line == '&&':
|
||||
|
||||
if state == AFOSMessageParserState.SERIAL:
|
||||
match = RE_ID.match(line)
|
||||
|
||||
if match is not None:
|
||||
message.serial = int(match[1])
|
||||
state = AFOSMessageParserState.ISSUANCE
|
||||
elif state == AFOSMessageParserState.ISSUANCE:
|
||||
match = RE_ISSUANCE.match(line)
|
||||
|
||||
if match is not None:
|
||||
state = AFOSMessageParserState.PRODUCT
|
||||
issuance = match
|
||||
elif state == AFOSMessageParserState.PRODUCT:
|
||||
match = RE_PRODUCT.match(line)
|
||||
|
||||
if match is not None:
|
||||
message.product = match['product']
|
||||
|
||||
state = AFOSMessageParserState.BODY
|
||||
elif state == AFOSMessageParserState.BODY:
|
||||
if timestamp_inline is None:
|
||||
match = RE_DATE.match(line)
|
||||
|
||||
if match is not None:
|
||||
offset = TIMEZONES[match['tz'].upper()]
|
||||
timestamp_inline = datetime.datetime(
|
||||
year = int(match['year']),
|
||||
month = MONTHS[match['month'].upper()],
|
||||
day = int(match['day']),
|
||||
hour = int(match['hour']),
|
||||
minute = int(match['minute']),
|
||||
second = 0,
|
||||
tzinfo = datetime.timezone(datetime.timedelta(hours=offset))
|
||||
).astimezone(datetime.UTC)
|
||||
|
||||
if line == '&&':
|
||||
state = AFOSMessageParserState.TAGS
|
||||
elif state == AFOSMessageParserState.TAGS:
|
||||
if line == '$$':
|
||||
|
@ -205,6 +257,22 @@ class AFOSMessage(DatabaseTable):
|
|||
if line != '':
|
||||
message.forecaster = line
|
||||
|
||||
if message.timestamp_issued is None:
|
||||
if timestamp_inline is not None:
|
||||
message.timestamp_issued = timestamp_inline
|
||||
message.timestamp_start = timestamp_inline
|
||||
message.timestamp_end = timestamp_inline + datetime.timedelta(hours=1)
|
||||
else:
|
||||
message.timestamp_issued = datetime.datetime(
|
||||
year = message.timestamp_start.year,
|
||||
month = message.timestamp_start.month,
|
||||
day = int(issuance['day']),
|
||||
hour = int(issuance['hour']),
|
||||
minute = int(issuance['minute']),
|
||||
second = 0,
|
||||
tzinfo = datetime.UTC
|
||||
)
|
||||
|
||||
return message
|
||||
|
||||
def is_watch(self):
|
||||
|
|
|
@ -91,8 +91,7 @@ class VTECEvent():
|
|||
|
||||
class VTECHydroEvent():
|
||||
__slots__ = (
|
||||
'severity', 'cause', 'record',
|
||||
'timestamp_start', 'timestamp_end'
|
||||
'severity', 'cause', 'record', 'timestamp_start', 'timestamp_end'
|
||||
)
|
||||
|
||||
severity: str
|
||||
|
|
Loading…
Add table
Reference in a new issue