nexrad-archive/lib/nexrad/archive.py

import os
import re
import enum
import datetime

from nexrad.db    import Database
from nexrad.s3    import S3Bucket, S3_KEY_RE
from nexrad.radar import RADAR_RANGE

class ArchiveDateError(Exception):
    def __init__(self, supplied: str, missing: str):
        self.supplied = supplied
        self.missing  = missing

    def __str__(self):
        return "Archive {self.supplied} was supplied, but required {self.missing} is missing"

class ArchiveProductType(enum.Enum):
    DEFAULT = 1
    V03     = 3
    V04     = 4

class ArchiveProduct():
    __slots__ = 'typeof', 'radar', 'timestamp',

    typeof:    ArchiveProductType
    radar:     str
    timestamp: datetime.datetime

    def __parts__(self):
        return [
            "%04d" % (self.timestamp.year),
            "%02d" % (self.timestamp.month),
            "%02d" % (self.timestamp.day),
            self.radar,
            "%4s%04d%02d%02d_%02d%02d%02d" % (
                self.radar,
                self.timestamp.year, self.timestamp.month,  self.timestamp.day,
                self.timestamp.hour, self.timestamp.minute, self.timestamp.second
            )
        ]

    def __str__(self):
        ret = '/'.join(self.__parts__())

        if self.typeof == ArchiveProductType.V03:
            ret += "_V03"
        elif self.typeof == ArchiveProductType.V04:
            ret += "_V04"

        ret += ".gz"

        return ret

    def path(self):
        parts = self.__parts__()
        ret   = os.path.join(*parts)

        if self.typeof == ArchiveProductType.V03:
            ret += "_V03"
        elif self.typeof == ArchiveProductType.V04:
            ret += "_V04"

        ret += ".gz"

        return ret

    def key(self):
        return str(self)

    @staticmethod
    def from_s3_key(key: str):
        product = ArchiveProduct()
        match   = S3_KEY_RE.match(key)

        product.timestamp = datetime.datetime(
            year   = int(match[6]),
            month  = int(match[7]),
            day    = int(match[8]),
            hour   = int(match[9]),
            minute = int(match[10]),
            second = int(match[11]),
            tzinfo = datetime.UTC
        )

        product.radar  = match[4]
        product.typeof = ArchiveProductType.V03 \
                if key[-7:] == '_V03.gz' else ArchiveProductType.DEFAULT

        return product

    def is_downloaded(self, path: str):
        return os.path.isfile(os.path.join(path, self.path()))

    def is_reported(self, db: Database):
        sql = """select count((
                     select ST_Distance(MakeLine(report.coord_start, report.coord_end),
                                        radar.coord,
                                        true) as distance
                     from
                         nexrad_storm_report as report,
                         nexrad_radar as radar
                     where
                         distance <= :radius
                         and :timestamp between report.timestamp_start and report.timestamp_end
                         and radar.call = :call)) as num
        """

        st = db.execute(sql, {
            'radius':    RADAR_RANGE,
            'timestamp': self.timestamp,
            'call':      self.radar
        })

        result = st.fetchone()

        return result['num'] == 1

class Archive():
    path:   str
    bucket: S3Bucket

    def __init__(self, path: str, bucket: S3Bucket):
        self.path   = path
        self.bucket = bucket

    def is_downloaded(self, key: str):
        return os.path.exists(os.path.join(self.path, key))

    def download(self, key: str):
        path   = os.path.join(self.path, key)
        parent = os.path.dirname(path)

        os.makedirs(parent, exist_ok=True)

        with open(path, 'wb') as fh:
            self.bucket.s3.download_fileobj(self.bucket.name, key, fh)

    RE_YEAR      = re.compile(r'^\d{4}$')
    RE_MONTH_DAY = re.compile(r'^\d{2}$')
    RE_CALL      = re.compile(r'^[A-Z]{4}$')
    RE_FILE      = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')

    def each_downloaded_key(self,
                            year:  int=None,
                            month: int=None,
                            day:   int=None):
        parts = [self.path]

        if day is not None and month is None:
            raise ArchiveDateError('day', 'month')

        if month is not None and year is None:
            raise ArchiveDateError('month', 'year')

        for cur_year in os.scandir(os.path.join(*parts)):
            if not (cur_year.is_dir() and self.RE_YEAR.match(cur_year.name)):
                continue

            if year is not None and int(cur_year.name) != year:
                continue

            parts.append(cur_year.name)

            for cur_month in os.scandir(os.path.join(*parts)):
                if not (cur_month.is_dir() and self.RE_MONTH_DAY.match(cur_month.name)):
                    continue

                if month is not None and int(cur_month.name) != month:
                    continue

                parts.append(cur_month.name)

                for cur_day in os.scandir(os.path.join(*parts)):
                    if not (cur_day.is_dir() and self.RE_MONTH_DAY.match(cur_day.name)):
                        continue

                    if day is not None and int(cur_day.name) != day:
                        continue

                    parts.append(cur_day.name)

                    for call in os.scandir(os.path.join(*parts)):
                        if not (call.is_dir() and self.RE_CALL.match(call.name)):
                            continue

                        parts.append(call.name)

                        for item in os.scandir(os.path.join(*parts)):
                            if not (item.is_file() and self.RE_FILE.match(item.name)):
                                continue

                            yield '/'.join([*parts[1:], item.name])

                        parts.pop()

                    parts.pop()

                parts.pop()

            parts.pop()

    def each_downloaded_product(self,
                                year:  int=None,
                                month: int=None,
                                day:   int=None):
        for key in self.each_downloaded_key(year, month, day):
            yield ArchiveProduct.from_s3_key(key)
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00			`import os`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`import re`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00			`import enum`
			`import datetime`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00			`from nexrad.db import Database`
			`from nexrad.s3 import S3Bucket, S3_KEY_RE`
			`from nexrad.radar import RADAR_RANGE`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00
Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`class ArchiveDateError(Exception):`
			`def __init__(self, supplied: str, missing: str):`
			`self.supplied = supplied`
			`self.missing = missing`

			`def __str__(self):`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00			`return "Archive {self.supplied} was supplied, but required {self.missing} is missing"`

			`class ArchiveProductType(enum.Enum):`
			`DEFAULT = 1`
			`V03 = 3`
Add support for _V04 products 2025-02-16 21:31:34 -05:00			`V04 = 4`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00
			`class ArchiveProduct():`
			`__slots__ = 'typeof', 'radar', 'timestamp',`

			`typeof: ArchiveProductType`
			`radar: str`
			`timestamp: datetime.datetime`

			`def __parts__(self):`
			`return [`
			`"%04d" % (self.timestamp.year),`
			`"%02d" % (self.timestamp.month),`
			`"%02d" % (self.timestamp.day),`
			`self.radar,`
			`"%4s%04d%02d%02d_%02d%02d%02d" % (`
			`self.radar,`
			`self.timestamp.year, self.timestamp.month, self.timestamp.day,`
			`self.timestamp.hour, self.timestamp.minute, self.timestamp.second`
			`)`
			`]`

			`def __str__(self):`
			`ret = '/'.join(self.__parts__())`

			`if self.typeof == ArchiveProductType.V03:`
			`ret += "_V03"`
Add support for _V04 products 2025-02-16 21:31:34 -05:00			`elif self.typeof == ArchiveProductType.V04:`
			`ret += "_V04"`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00
			`ret += ".gz"`

			`return ret`

Don't call internal methods on ArchiveProduct 2025-02-15 17:25:44 -05:00			`def path(self):`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00			`parts = self.__parts__()`
			`ret = os.path.join(*parts)`

			`if self.typeof == ArchiveProductType.V03:`
			`ret += "_V03"`
Add support for _V04 products 2025-02-16 21:31:34 -05:00			`elif self.typeof == ArchiveProductType.V04:`
			`ret += "_V04"`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00
			`ret += ".gz"`

			`return ret`

Don't call internal methods on ArchiveProduct 2025-02-15 17:25:44 -05:00			`def key(self):`
			`return str(self)`

Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00			`@staticmethod`
			`def from_s3_key(key: str):`
			`product = ArchiveProduct()`
			`match = S3_KEY_RE.match(key)`

			`product.timestamp = datetime.datetime(`
			`year = int(match[6]),`
			`month = int(match[7]),`
			`day = int(match[8]),`
			`hour = int(match[9]),`
			`minute = int(match[10]),`
			`second = int(match[11]),`
			`tzinfo = datetime.UTC`
			`)`

			`product.radar = match[4]`
			`product.typeof = ArchiveProductType.V03 \`
			`if key[-7:] == '_V03.gz' else ArchiveProductType.DEFAULT`

			`return product`

			`def is_downloaded(self, path: str):`
Don't call internal methods on ArchiveProduct 2025-02-15 17:25:44 -05:00			`return os.path.isfile(os.path.join(path, self.path()))`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00
			`def is_reported(self, db: Database):`
			`sql = """select count((`
			`select ST_Distance(MakeLine(report.coord_start, report.coord_end),`
			`radar.coord,`
			`true) as distance`
			`from`
			`nexrad_storm_report as report,`
			`nexrad_radar as radar`
			`where`
			`distance <= :radius`
			`and :timestamp between report.timestamp_start and report.timestamp_end`
			`and radar.call = :call)) as num`
			`"""`

			`st = db.execute(sql, {`
			`'radius': RADAR_RANGE,`
			`'timestamp': self.timestamp,`
			`'call': self.radar`
			`})`

			`result = st.fetchone()`

			`return result['num'] == 1`
Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00			`class Archive():`
			`path: str`
			`bucket: S3Bucket`

			`def __init__(self, path: str, bucket: S3Bucket):`
			`self.path = path`
			`self.bucket = bucket`

Allow multiple CSV files; rename nexrad.archive.Archive methods 2025-02-13 12:19:06 -05:00			`def is_downloaded(self, key: str):`
Use os.path.join() where possible 2025-02-12 15:37:14 -05:00			`return os.path.exists(os.path.join(self.path, key))`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00
Allow multiple CSV files; rename nexrad.archive.Archive methods 2025-02-13 12:19:06 -05:00			`def download(self, key: str):`
Use os.path.join() where possible 2025-02-12 15:37:14 -05:00			`path = os.path.join(self.path, key)`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00			`parent = os.path.dirname(path)`

			`os.makedirs(parent, exist_ok=True)`

			`with open(path, 'wb') as fh:`
			`self.bucket.s3.download_fileobj(self.bucket.name, key, fh)`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
			`RE_YEAR = re.compile(r'^\d{4}$')`
			`RE_MONTH_DAY = re.compile(r'^\d{2}$')`
			`RE_CALL = re.compile(r'^[A-Z]{4}$')`
Rename RE_PRODUCT to RE_FILE for accuracy 2025-02-15 20:34:06 -05:00			`RE_FILE = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`def each_downloaded_key(self,`
			`year: int=None,`
			`month: int=None,`
			`day: int=None):`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`parts = [self.path]`

Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`if day is not None and month is None:`
			`raise ArchiveDateError('day', 'month')`

			`if month is not None and year is None:`
			`raise ArchiveDateError('month', 'year')`

			`for cur_year in os.scandir(os.path.join(*parts)):`
			`if not (cur_year.is_dir() and self.RE_YEAR.match(cur_year.name)):`
			`continue`

			`if year is not None and int(cur_year.name) != year:`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`continue`

Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`parts.append(cur_year.name)`

			`for cur_month in os.scandir(os.path.join(*parts)):`
			`if not (cur_month.is_dir() and self.RE_MONTH_DAY.match(cur_month.name)):`
			`continue`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`if month is not None and int(cur_month.name) != month:`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`continue`

Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`parts.append(cur_month.name)`

			`for cur_day in os.scandir(os.path.join(*parts)):`
			`if not (cur_day.is_dir() and self.RE_MONTH_DAY.match(cur_day.name)):`
			`continue`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`if day is not None and int(cur_day.name) != day:`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`continue`

Allow for searching downloads for specific years, months, days 2025-02-15 14:28:29 -05:00			`parts.append(cur_day.name)`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
			`for call in os.scandir(os.path.join(*parts)):`
			`if not (call.is_dir() and self.RE_CALL.match(call.name)):`
			`continue`

			`parts.append(call.name)`

			`for item in os.scandir(os.path.join(*parts)):`
Rename RE_PRODUCT to RE_FILE for accuracy 2025-02-15 20:34:06 -05:00			`if not (item.is_file() and self.RE_FILE.match(item.name)):`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`continue`

			`yield '/'.join([*parts[1:], item.name])`

			`parts.pop()`

			`parts.pop()`

			`parts.pop()`

			`parts.pop()`
Implement ArchiveProduct class 2025-02-15 17:10:42 -05:00
			`def each_downloaded_product(self,`
			`year: int=None,`
			`month: int=None,`
			`day: int=None):`
			`for key in self.each_downloaded_key(year, month, day):`
			`yield ArchiveProduct.from_s3_key(key)`