xmet/lib/xmet/spc.py

import re
import enum
import shapely
import datetime

from xmet.db    import DatabaseTable
from xmet.coord import COORD_SYSTEM
from xmet.geo   import PointSequence
from xmet.afos  import MONTHS, TIMEZONES

RE_HEADER = re.compile(r'''
    ^DAY
    \s+ (?P<day>\d+)
    \s+ CONVECTIVE
    \s+ OUTLOOK
''', re.X)

RE_OFFICE = re.compile(r'.* STORM PREDICTION CENTER .*')

RE_ISSUANCE = re.compile(r'''
       ^(?P<hour>\d{2})
        (?P<minute>\d{2})
    \s+ (?P<ampm>AM|PM)
    \s+ (?P<tz>[A-Z]{3})
    \s+ (?P<weekday>[A-Z]{3})
    \s+ (?P<month>[A-Z]{3})
    \s+ (?P<day>\d{2})
    \s+ (?P<year>\d{4})$
''', re.X)

RE_VALIDITY = re.compile(r'''
    ^VALID \s+ TIME
    \s+ (?P<day_start>\d{2})
        (?P<hour_start>\d{2})
        (?P<minute_start>\d{2})Z
    \s+ -
    \s+ (?P<day_end>\d{2})
        (?P<hour_end>\d{2})
        (?P<minute_end>\d{2})Z$
''', re.X)

RE_AREA_TYPE = re.compile(r'^(?P<type>[A-Z]+) OUTLOOK POINTS DAY .*')

RE_HAZARD = re.compile(r'''
    ^(?:\.\.\.)
    \s+ (?P<type>[A-Z]+)
    \s+ (?:\.\.\.)$
''', re.X)

RE_POINTS_START = re.compile(r'''
    ^(?P<category>[A-Z0-9\.]+)
     (?P<rest>(?:\s+\d{8}){1,6})
''', re.X)

RE_POINTS = re.compile(r'^(?:\s+\d{8}){1,6}$')

class SPCOutlookParserException(Exception):
    pass

def parse_coord(coord: str) -> tuple[float, float]:
    if not coord.isdecimal():
        raise SPCOutlookParserException('Coordinate pair is not decimal')

    if len(coord) != 8:
        raise SPCOutlookParserException('Coordinate pair is incorrect length string')

    lon = int(coord[4:8])

    if lon <= 6100:
        lon += 10000

    return shapely.Point(0.01 * -lon,
                         0.01 * int(coord[0:4]))

def each_point_sequence(parts: list[str]):
    points = list()

    for part in parts:
        if part == '99999999':
            continue
        else:
            points.append(parse_coord(part))

    if len(points) > 1:
        yield PointSequence(points)

def each_poly(parts: list[str]):
    for sequence in each_point_sequence(parts):
        sequence.close()

        yield sequence.poly

class SPCOutlookArea(DatabaseTable):
    __slots__ = ('id', 'outlook_id', 'poly')

    __columns_read__ = {
        'poly': 'ST_AsText(poly) as poly'
    }

    __values_read__ = {
        'poly': shapely.from_wkt
    }

    __columns_write__ = {
        'poly': 'ST_GeomFromText(:poly, {crs})'.format(crs=COORD_SYSTEM)
    }

    __values_write__ = {
        'poly': lambda v: {'poly': shapely.to_wkt(v)}
    }

    def __init__(self):
        super().__init__()
        self.id         = None
        self.outlook_id = None
        self.poly       = None

class SPCOutlookProbabilityArea(SPCOutlookArea):
    __slots__ = (
        'hazard', 'probability', 'sig',
    )

    __table__ = 'xmet_spc_outlook_probability_area'
    __key__   = 'id'

    __columns__ = (
        'id', 'outlook_id', 'hazard', 'probability', 'sig', 'poly'
    )

class SPCOutlookCategoryArea(SPCOutlookArea):
    __slots__ = (
        'category'
    )

    __table__ = 'xmet_spc_outlook_category_area'
    __key__   = 'id'

    __columns__ = (
        'id', 'outlook_id', 'category', 'poly'
    )

class SPCOutlook():
    __slots__ = (
        'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end', 'day',
        'text_raw', 'body', 'poly', 'probabilities', 'categories'
    )

    __table__ = 'xmet_spc_outlook'
    __key__   = 'id'

    __columns__ = (
        'id', 'timestamp_issued', 'timestamp_start', 'timestamp_end',
        'day', 'text_raw', 'body'
    )

    def __init__(self):
        self.id               = None
        self.timestamp_issued = None
        self.timestamp_start  = None
        self.timestamp_end    = None
        self.day              = None
        self.text_raw         = None
        self.body             = ''
        self.poly             = None

        self.probabilities = list()
        self.categories    = list()

class SPCOutlookParserState(enum.Enum):
    HEADER      = 1
    OFFICE      = enum.auto()
    ISSUANCE    = enum.auto()
    VALIDITY    = enum.auto()
    AREA_THREAT = enum.auto()
    BODY        = enum.auto()

class SPCOutlookParser():
    outlook: SPCOutlook
    state:   SPCOutlookParserState

    area_type: str
    hazard:    str
    category:  str
    points:    list[str]

    def reset(self):
        self.outlook = SPCOutlook()
        self.state   = SPCOutlookParserState.HEADER

        self.area_type = None
        self.hazard    = None
        self.category  = None
        self.points    = list()

    def __init__(self):
        self.reset()

    def parse_header(self, line: str):
        if line == '':
            return

        match = RE_HEADER.match(line)

        if match is None:
            raise SPCOutlookParserException(f"Unexpected header value, got '{line}'")

        self.outlook.day = int(match['day'])

        self.state = SPCOutlookParserState.OFFICE

    def parse_office(self, line: str):
        if RE_OFFICE.match(line) is not None:
            self.state = SPCOutlookParserState.ISSUANCE

    def parse_issuance(self, line: str):
        match = RE_ISSUANCE.match(line)

        if match is None:
            raise SPCOutlookParserException(f"Invalid issuance time, got '{line}'")

        hour = int(match['hour'])

        if match['ampm'] == 'AM':
            if hour == 12:
                hour = 0
        elif match['ampm'] == 'PM':
            if hour < 12:
                hour += 12

        tzoffset = TIMEZONES[match['tz'].upper()]
        tzinfo   = datetime.timezone(datetime.timedelta(hours=tzoffset))

        timestamp = datetime.datetime(
            year   = int(match['year']),
            month  = MONTHS[match['month']],
            day    = int(match['day']),
            hour   = hour,
            minute = int(match['minute']),
            second = 0,
            tzinfo = tzinfo
        ).astimezone(datetime.UTC)

        self.outlook.timestamp_issued = timestamp

        self.state = SPCOutlookParserState.VALIDITY

    def parse_validity(self, line: str):
        if line == '':
            return

        match = RE_VALIDITY.match(line)

        if match is None:
            raise SPCOutlookParserException(f"Invalid validity time, got '{line}'")

        date = datetime.datetime(
            year   = self.outlook.timestamp_issued.year,
            month  = self.outlook.timestamp_issued.month,
            day    = self.outlook.timestamp_issued.day,
            tzinfo = self.outlook.timestamp_issued.tzinfo
        ) + datetime.timedelta(days=self.outlook.day-1)

        month_start = date.month
        month_end   = date.month
        year_end    = date.year
        day_start   = int(match['day_start'])
        day_end     = int(match['day_end'])

        if day_start > day_end:
            month_end = (month_end + 1) % 12

        if month_start > month_end:
            year_end += 1

        self.outlook.timestamp_start = datetime.datetime(
            year   = date.year,
            month  = date.month,
            day    = day_start,
            hour   = int(match['hour_start']),
            minute = int(match['minute_start']),
            second = 0,
            tzinfo = datetime.UTC
        )

        self.outlook.timestamp_end = datetime.datetime(
            year   = year_end,
            month  = month_end,
            day    = day_end,
            hour   = int(match['hour_end']),
            minute = int(match['minute_end']),
            second = 0,
            tzinfo = datetime.UTC
        )

        self.state = SPCOutlookParserState.AREA_THREAT

    def handle_area(self):
        for poly in each_poly(self.points):
            if self.area_type == 'PROBABILISTIC':
                area = SPCOutlookProbabilityArea()
                area.hazard = self.hazard
                area.poly   = poly

                if self.category == 'SIGN':
                    area.probability = None
                    area.sig         = True
                else:
                    area.probability = float(self.category)
                    area.sig         = False

                self.outlook.probabilities.append(area)
            elif self.area_type == 'CATEGORICAL':
                area = SPCOutlookCategoryArea()
                area.category = self.category
                area.poly     = poly

                self.outlook.categories.append(area)

        self.category = None
        self.points   = list()

    def parse_area_hazard(self, line: str):
        if line == '':
            return
        elif line == '&&':
            self.handle_area()
            return

        #
        # Check for an area type.
        #
        match = RE_AREA_TYPE.match(line)

        if match is not None:
            self.area_type = match['type']
            return

        #
        # Check for an area hazard.
        #
        match = RE_HAZARD.match(line)

        if match is not None:
            self.hazard = match['type']
            return

        #
        # Check for first line of polygon.
        #
        match = RE_POINTS_START.match(line)

        if match is not None:
            if len(self.points) > 0:
                self.handle_area()

            self.category = match['category']
            self.points   = re.split(r'\s+', match['rest'])[1:]

            return

        #
        # Check for polygon line continuation.
        #
        match = RE_POINTS.match(line)

        if match is not None:
            self.points.extend(re.split(r'\s+', line.rstrip())[1:])
            return

        #
        # If none of the previous expressions match, then treat all
        # following text as body.
        #
        self.outlook.body = line

        self.state = SPCOutlookParserState.BODY

    def parse_body(self, line: str):
        self.outlook.body += '\n' + line

    def parse(self, text: str) -> SPCOutlook:
        self.reset()

        self.outlook.text_raw = text

        for line in text.split('\n'):
            if line is None:
                break

            line = line.rstrip()

            if self.state is SPCOutlookParserState.HEADER:
                self.parse_header(line)
            elif self.state is SPCOutlookParserState.OFFICE:
                self.parse_office(line)
            elif self.state is SPCOutlookParserState.ISSUANCE:
                self.parse_issuance(line)
            elif self.state is SPCOutlookParserState.VALIDITY:
                self.parse_validity(line)
            elif self.state is SPCOutlookParserState.AREA_THREAT:
                self.parse_area_hazard(line)
            elif self.state is SPCOutlookParserState.BODY:
                self.parse_body(line)

        return self.outlook