405 lines
10 KiB
Python
405 lines
10 KiB
Python
import re
|
|
import enum
|
|
import shapely
|
|
import datetime
|
|
|
|
from xmet.db import DatabaseTable
|
|
from xmet.coord import COORD_SYSTEM
|
|
from xmet.geo import PointSequence
|
|
from xmet.afos import MONTHS, TIMEZONES
|
|
|
|
RE_HEADER = re.compile(r'''
|
|
^DAY
|
|
\s+ (?P<day>\d+)
|
|
\s+ CONVECTIVE
|
|
\s+ OUTLOOK
|
|
''', re.X)
|
|
|
|
RE_OFFICE = re.compile(r'.* STORM PREDICTION CENTER .*')
|
|
|
|
RE_ISSUANCE = re.compile(r'''
|
|
^(?P<hour>\d{2})
|
|
(?P<minute>\d{2})
|
|
\s+ (?P<ampm>AM|PM)
|
|
\s+ (?P<tz>[A-Z]{3})
|
|
\s+ (?P<weekday>[A-Z]{3})
|
|
\s+ (?P<month>[A-Z]{3})
|
|
\s+ (?P<day>\d{2})
|
|
\s+ (?P<year>\d{4})$
|
|
''', re.X)
|
|
|
|
RE_VALIDITY = re.compile(r'''
|
|
^VALID \s+ TIME
|
|
\s+ (?P<day_start>\d{2})
|
|
(?P<hour_start>\d{2})
|
|
(?P<minute_start>\d{2})Z
|
|
\s+ -
|
|
\s+ (?P<day_end>\d{2})
|
|
(?P<hour_end>\d{2})
|
|
(?P<minute_end>\d{2})Z$
|
|
''', re.X)
|
|
|
|
RE_AREA_TYPE = re.compile(r'^(?P<type>[A-Z]+) OUTLOOK POINTS DAY .*')
|
|
|
|
RE_HAZARD = re.compile(r'''
|
|
^(?:\.\.\.)
|
|
\s+ (?P<type>[A-Z]+)
|
|
\s+ (?:\.\.\.)$
|
|
''', re.X)
|
|
|
|
RE_POINTS_START = re.compile(r'''
|
|
^(?P<category>[A-Z0-9\.]+)
|
|
(?P<rest>(?:\s+\d{8}){1,6})
|
|
''', re.X)
|
|
|
|
RE_POINTS = re.compile(r'^(?:\s+\d{8}){1,6}$')
|
|
|
|
class SPCOutlookParserException(Exception):
|
|
pass
|
|
|
|
def parse_coord(coord: str) -> tuple[float, float]:
|
|
if not coord.isdecimal():
|
|
raise SPCOutlookParserException('Coordinate pair is not decimal')
|
|
|
|
if len(coord) != 8:
|
|
raise SPCOutlookParserException('Coordinate pair is incorrect length string')
|
|
|
|
lon = int(coord[4:8])
|
|
|
|
if lon <= 6100:
|
|
lon += 10000
|
|
|
|
return shapely.Point(0.01 * -lon,
|
|
0.01 * int(coord[0:4]))
|
|
|
|
def each_point_sequence(parts: list[str]):
|
|
points = list()
|
|
|
|
for part in parts:
|
|
if part == '99999999':
|
|
continue
|
|
else:
|
|
points.append(parse_coord(part))
|
|
|
|
if len(points) > 1:
|
|
yield PointSequence(points)
|
|
|
|
def each_poly(parts: list[str]):
|
|
for sequence in each_point_sequence(parts):
|
|
sequence.close()
|
|
|
|
yield sequence.poly
|
|
|
|
class SPCOutlookArea(DatabaseTable):
|
|
__slots__ = ('id', 'outlook_id', 'poly')
|
|
|
|
__columns_read__ = {
|
|
'poly': 'ST_AsText(poly) as poly'
|
|
}
|
|
|
|
__values_read__ = {
|
|
'poly': shapely.from_wkt
|
|
}
|
|
|
|
__columns_write__ = {
|
|
'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM)
|
|
}
|
|
|
|
__values_write__ = {
|
|
'poly': lambda v: {'poly': shapely.to_wkt(v)}
|
|
}
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.id = None
|
|
self.outlook_id = None
|
|
self.poly = None
|
|
|
|
class SPCOutlookProbabilityArea(SPCOutlookArea):
|
|
__slots__ = (
|
|
'hazard', 'probability', 'sig',
|
|
)
|
|
|
|
__table__ = 'xmet_spc_outlook_probability_area'
|
|
__key__ = 'id'
|
|
|
|
__columns__ = (
|
|
'id', 'outlook_id', 'hazard', 'probability', 'sig', 'poly'
|
|
)
|
|
|
|
class SPCOutlookCategoryArea(SPCOutlookArea):
|
|
__slots__ = (
|
|
'category'
|
|
)
|
|
|
|
__table__ = 'xmet_spc_outlook_category_area'
|
|
__key__ = 'id'
|
|
|
|
__columns__ = (
|
|
'id', 'outlook_id', 'category', 'poly'
|
|
)
|
|
|
|
class SPCOutlook():
|
|
__slots__ = (
|
|
'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end', 'day',
|
|
'text_raw', 'body', 'poly', 'probabilities', 'categories'
|
|
)
|
|
|
|
__table__ = 'xmet_spc_outlook'
|
|
__key__ = 'id'
|
|
|
|
__columns__ = (
|
|
'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end',
|
|
'day', 'text_raw', 'body'
|
|
)
|
|
|
|
def __init__(self):
|
|
self.id = None
|
|
self.timestamp_issued = None
|
|
self.timestamp_start = None
|
|
self.timestamp_end = None
|
|
self.day = None
|
|
self.text_raw = None
|
|
self.body = ''
|
|
self.poly = None
|
|
|
|
self.probabilities = list()
|
|
self.categories = list()
|
|
|
|
class SPCOutlookParserState(enum.Enum):
|
|
HEADER = 1
|
|
OFFICE = enum.auto()
|
|
ISSUANCE = enum.auto()
|
|
VALIDITY = enum.auto()
|
|
AREA_THREAT = enum.auto()
|
|
BODY = enum.auto()
|
|
|
|
class SPCOutlookParser():
|
|
outlook: SPCOutlook
|
|
state: SPCOutlookParserState
|
|
|
|
area_type: str
|
|
hazard: str
|
|
category: str
|
|
points: list[str]
|
|
|
|
def reset(self):
|
|
self.outlook = SPCOutlook()
|
|
self.state = SPCOutlookParserState.HEADER
|
|
|
|
self.area_type = None
|
|
self.hazard = None
|
|
self.category = None
|
|
self.points = list()
|
|
|
|
def __init__(self):
|
|
self.reset()
|
|
|
|
def parse_header(self, line: str):
|
|
if line == '':
|
|
return
|
|
|
|
match = RE_HEADER.match(line)
|
|
|
|
if match is None:
|
|
raise SPCOutlookParserException(f"Unexpected header value, got '{line}'")
|
|
|
|
self.outlook.day = int(match['day'])
|
|
|
|
self.state = SPCOutlookParserState.OFFICE
|
|
|
|
def parse_office(self, line: str):
|
|
if RE_OFFICE.match(line) is not None:
|
|
self.state = SPCOutlookParserState.ISSUANCE
|
|
|
|
def parse_issuance(self, line: str):
|
|
match = RE_ISSUANCE.match(line)
|
|
|
|
if match is None:
|
|
raise SPCOutlookParserException(f"Invalid issuance time, got '{line}'")
|
|
|
|
hour = int(match['hour'])
|
|
|
|
if match['ampm'] == 'AM':
|
|
if hour == 12:
|
|
hour = 0
|
|
elif match['ampm'] == 'PM':
|
|
if hour < 12:
|
|
hour += 12
|
|
|
|
tzoffset = TIMEZONES[match['tz'].upper()]
|
|
tzinfo = datetime.timezone(datetime.timedelta(hours=tzoffset))
|
|
|
|
timestamp = datetime.datetime(
|
|
year = int(match['year']),
|
|
month = MONTHS[match['month']],
|
|
day = int(match['day']),
|
|
hour = hour,
|
|
minute = int(match['minute']),
|
|
second = 0,
|
|
tzinfo = tzinfo
|
|
).astimezone(datetime.UTC)
|
|
|
|
self.outlook.timestamp_issued = timestamp
|
|
|
|
self.state = SPCOutlookParserState.VALIDITY
|
|
|
|
def parse_validity(self, line: str):
|
|
if line == '':
|
|
return
|
|
|
|
match = RE_VALIDITY.match(line)
|
|
|
|
if match is None:
|
|
raise SPCOutlookParserException(f"Invalid validity time, got '{line}'")
|
|
|
|
date = datetime.datetime(
|
|
year = self.outlook.timestamp_issued.year,
|
|
month = self.outlook.timestamp_issued.month,
|
|
day = self.outlook.timestamp_issued.day,
|
|
tzinfo = self.outlook.timestamp_issued.tzinfo
|
|
) + datetime.timedelta(days=self.outlook.day-1)
|
|
|
|
month_start = date.month
|
|
month_end = date.month
|
|
year_end = date.year
|
|
day_start = int(match['day_start'])
|
|
day_end = int(match['day_end'])
|
|
|
|
if day_start > day_end:
|
|
month_end = (month_end + 1) % 12
|
|
|
|
if month_start > month_end:
|
|
year_end += 1
|
|
|
|
self.outlook.timestamp_start = datetime.datetime(
|
|
year = date.year,
|
|
month = date.month,
|
|
day = day_start,
|
|
hour = int(match['hour_start']),
|
|
minute = int(match['minute_start']),
|
|
second = 0,
|
|
tzinfo = datetime.UTC
|
|
)
|
|
|
|
self.outlook.timestamp_end = datetime.datetime(
|
|
year = year_end,
|
|
month = month_end,
|
|
day = day_end,
|
|
hour = int(match['hour_end']),
|
|
minute = int(match['minute_end']),
|
|
second = 0,
|
|
tzinfo = datetime.UTC
|
|
)
|
|
|
|
self.state = SPCOutlookParserState.AREA_THREAT
|
|
|
|
def handle_area(self):
|
|
for poly in each_poly(self.points):
|
|
if self.area_type == 'PROBABILISTIC':
|
|
area = SPCOutlookProbabilityArea()
|
|
area.hazard = self.hazard
|
|
area.poly = poly
|
|
|
|
if self.category == 'SIGN':
|
|
area.probability = None
|
|
area.sig = True
|
|
else:
|
|
area.probability = float(self.category)
|
|
area.sig = False
|
|
|
|
self.outlook.probabilities.append(area)
|
|
elif self.area_type == 'CATEGORICAL':
|
|
area = SPCOutlookCategoryArea()
|
|
area.category = self.category
|
|
area.poly = poly
|
|
|
|
self.outlook.categories.append(area)
|
|
|
|
self.category = None
|
|
self.points = list()
|
|
|
|
def parse_area_hazard(self, line: str):
|
|
if line == '':
|
|
return
|
|
elif line == '&&':
|
|
self.handle_area()
|
|
return
|
|
|
|
#
|
|
# Check for an area type.
|
|
#
|
|
match = RE_AREA_TYPE.match(line)
|
|
|
|
if match is not None:
|
|
self.area_type = match['type']
|
|
return
|
|
|
|
#
|
|
# Check for an area hazard.
|
|
#
|
|
match = RE_HAZARD.match(line)
|
|
|
|
if match is not None:
|
|
self.hazard = match['type']
|
|
return
|
|
|
|
#
|
|
# Check for first line of polygon.
|
|
#
|
|
match = RE_POINTS_START.match(line)
|
|
|
|
if match is not None:
|
|
if len(self.points) > 0:
|
|
self.handle_area()
|
|
|
|
self.category = match['category']
|
|
self.points = re.split(r'\s+', match['rest'])[1:]
|
|
|
|
return
|
|
|
|
#
|
|
# Check for polygon line continuation.
|
|
#
|
|
match = RE_POINTS.match(line)
|
|
|
|
if match is not None:
|
|
self.points.extend(re.split(r'\s+', line.rstrip())[1:])
|
|
return
|
|
|
|
#
|
|
# If none of the previous expressions match, then treat all
|
|
# following text as body.
|
|
#
|
|
self.outlook.body = line
|
|
|
|
self.state = SPCOutlookParserState.BODY
|
|
|
|
def parse_body(self, line: str):
|
|
self.outlook.body += '\n' + line
|
|
|
|
def parse(self, text: str) -> SPCOutlook:
|
|
self.reset()
|
|
|
|
self.outlook.text_raw = text
|
|
|
|
for line in text.split('\n'):
|
|
if line is None:
|
|
break
|
|
|
|
line = line.rstrip()
|
|
|
|
if self.state is SPCOutlookParserState.HEADER:
|
|
self.parse_header(line)
|
|
elif self.state is SPCOutlookParserState.OFFICE:
|
|
self.parse_office(line)
|
|
elif self.state is SPCOutlookParserState.ISSUANCE:
|
|
self.parse_issuance(line)
|
|
elif self.state is SPCOutlookParserState.VALIDITY:
|
|
self.parse_validity(line)
|
|
elif self.state is SPCOutlookParserState.AREA_THREAT:
|
|
self.parse_area_hazard(line)
|
|
elif self.state is SPCOutlookParserState.BODY:
|
|
self.parse_body(line)
|
|
|
|
return self.outlook
|