nexrad-archive/lib/nexrad/archive.py

import os
import re

from nexrad.s3 import S3Bucket

class Archive():
    path:   str
    bucket: S3Bucket

    def __init__(self, path: str, bucket: S3Bucket):
        self.path   = path
        self.bucket = bucket

    def is_downloaded(self, key: str):
        return os.path.exists(os.path.join(self.path, key))

    def download(self, key: str):
        path   = os.path.join(self.path, key)
        parent = os.path.dirname(path)

        os.makedirs(parent, exist_ok=True)

        with open(path, 'wb') as fh:
            self.bucket.s3.download_fileobj(self.bucket.name, key, fh)

    RE_YEAR      = re.compile(r'^\d{4}$')
    RE_MONTH_DAY = re.compile(r'^\d{2}$')
    RE_CALL      = re.compile(r'^[A-Z]{4}$')
    RE_PRODUCT   = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')

    def each_downloaded_key(self):
        parts = [self.path]

        for year in os.scandir(os.path.join(*parts)):
            if not (year.is_dir() and self.RE_YEAR.match(year.name)):
                continue

            parts.append(year.name)

            for month in os.scandir(os.path.join(*parts)):
                if not (month.is_dir() and self.RE_MONTH_DAY.match(month.name)):
                    continue

                parts.append(month.name)

                for day in os.scandir(os.path.join(*parts)):
                    if not (day.is_dir() and self.RE_MONTH_DAY.match(day.name)):
                        continue

                    parts.append(day.name)

                    for call in os.scandir(os.path.join(*parts)):
                        if not (call.is_dir() and self.RE_CALL.match(call.name)):
                            continue

                        parts.append(call.name)

                        for item in os.scandir(os.path.join(*parts)):
                            if not (item.is_file() and self.RE_PRODUCT.match(item.name)):
                                continue

                            yield '/'.join([*parts[1:], item.name])

                        parts.pop()

                    parts.pop()

                parts.pop()

            parts.pop()
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00			`import os`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00			`import re`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00
			`from nexrad.s3 import S3Bucket`

			`class Archive():`
			`path: str`
			`bucket: S3Bucket`

			`def __init__(self, path: str, bucket: S3Bucket):`
			`self.path = path`
			`self.bucket = bucket`

Allow multiple CSV files; rename nexrad.archive.Archive methods 2025-02-13 12:19:06 -05:00			`def is_downloaded(self, key: str):`
Use os.path.join() where possible 2025-02-12 15:37:14 -05:00			`return os.path.exists(os.path.join(self.path, key))`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00
Allow multiple CSV files; rename nexrad.archive.Archive methods 2025-02-13 12:19:06 -05:00			`def download(self, key: str):`
Use os.path.join() where possible 2025-02-12 15:37:14 -05:00			`path = os.path.join(self.path, key)`
Initial commit of lib/nexrad/archive.py 2025-02-11 21:11:43 -05:00			`parent = os.path.dirname(path)`

			`os.makedirs(parent, exist_ok=True)`

			`with open(path, 'wb') as fh:`
			`self.bucket.s3.download_fileobj(self.bucket.name, key, fh)`
Implement method for walking downloaded products 2025-02-15 14:09:54 -05:00
			`RE_YEAR = re.compile(r'^\d{4}$')`
			`RE_MONTH_DAY = re.compile(r'^\d{2}$')`
			`RE_CALL = re.compile(r'^[A-Z]{4}$')`
			`RE_PRODUCT = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')`

			`def each_downloaded_key(self):`
			`parts = [self.path]`

			`for year in os.scandir(os.path.join(*parts)):`
			`if not (year.is_dir() and self.RE_YEAR.match(year.name)):`
			`continue`

			`parts.append(year.name)`

			`for month in os.scandir(os.path.join(*parts)):`
			`if not (month.is_dir() and self.RE_MONTH_DAY.match(month.name)):`
			`continue`

			`parts.append(month.name)`

			`for day in os.scandir(os.path.join(*parts)):`
			`if not (day.is_dir() and self.RE_MONTH_DAY.match(day.name)):`
			`continue`

			`parts.append(day.name)`

			`for call in os.scandir(os.path.join(*parts)):`
			`if not (call.is_dir() and self.RE_CALL.match(call.name)):`
			`continue`

			`parts.append(call.name)`

			`for item in os.scandir(os.path.join(*parts)):`
			`if not (item.is_file() and self.RE_PRODUCT.match(item.name)):`
			`continue`

			`yield '/'.join([*parts[1:], item.name])`

			`parts.pop()`

			`parts.pop()`

			`parts.pop()`

			`parts.pop()`