2025-02-11 21:11:43 -05:00
|
|
|
import os
|
2025-02-15 14:09:54 -05:00
|
|
|
import re
|
2025-02-11 21:11:43 -05:00
|
|
|
|
|
|
|
from nexrad.s3 import S3Bucket
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
class ArchiveDateError(Exception):
|
|
|
|
def __init__(self, supplied: str, missing: str):
|
|
|
|
self.supplied = supplied
|
|
|
|
self.missing = missing
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return "Date {self.supplied} was supplied, but required {self.missing} is missing"
|
|
|
|
|
2025-02-11 21:11:43 -05:00
|
|
|
class Archive():
|
|
|
|
path: str
|
|
|
|
bucket: S3Bucket
|
|
|
|
|
|
|
|
def __init__(self, path: str, bucket: S3Bucket):
|
|
|
|
self.path = path
|
|
|
|
self.bucket = bucket
|
|
|
|
|
2025-02-13 12:19:06 -05:00
|
|
|
def is_downloaded(self, key: str):
|
2025-02-12 15:37:14 -05:00
|
|
|
return os.path.exists(os.path.join(self.path, key))
|
2025-02-11 21:11:43 -05:00
|
|
|
|
2025-02-13 12:19:06 -05:00
|
|
|
def download(self, key: str):
|
2025-02-12 15:37:14 -05:00
|
|
|
path = os.path.join(self.path, key)
|
2025-02-11 21:11:43 -05:00
|
|
|
parent = os.path.dirname(path)
|
|
|
|
|
|
|
|
os.makedirs(parent, exist_ok=True)
|
|
|
|
|
|
|
|
with open(path, 'wb') as fh:
|
|
|
|
self.bucket.s3.download_fileobj(self.bucket.name, key, fh)
|
2025-02-15 14:09:54 -05:00
|
|
|
|
|
|
|
RE_YEAR = re.compile(r'^\d{4}$')
|
|
|
|
RE_MONTH_DAY = re.compile(r'^\d{2}$')
|
|
|
|
RE_CALL = re.compile(r'^[A-Z]{4}$')
|
|
|
|
RE_PRODUCT = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
def each_downloaded_key(self,
|
|
|
|
year: int=None,
|
|
|
|
month: int=None,
|
|
|
|
day: int=None):
|
2025-02-15 14:09:54 -05:00
|
|
|
parts = [self.path]
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
if day is not None and month is None:
|
|
|
|
raise ArchiveDateError('day', 'month')
|
|
|
|
|
|
|
|
if month is not None and year is None:
|
|
|
|
raise ArchiveDateError('month', 'year')
|
|
|
|
|
|
|
|
for cur_year in os.scandir(os.path.join(*parts)):
|
|
|
|
if not (cur_year.is_dir() and self.RE_YEAR.match(cur_year.name)):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if year is not None and int(cur_year.name) != year:
|
2025-02-15 14:09:54 -05:00
|
|
|
continue
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
parts.append(cur_year.name)
|
|
|
|
|
|
|
|
for cur_month in os.scandir(os.path.join(*parts)):
|
|
|
|
if not (cur_month.is_dir() and self.RE_MONTH_DAY.match(cur_month.name)):
|
|
|
|
continue
|
2025-02-15 14:09:54 -05:00
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
if month is not None and int(cur_month.name) != month:
|
2025-02-15 14:09:54 -05:00
|
|
|
continue
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
parts.append(cur_month.name)
|
|
|
|
|
|
|
|
for cur_day in os.scandir(os.path.join(*parts)):
|
|
|
|
if not (cur_day.is_dir() and self.RE_MONTH_DAY.match(cur_day.name)):
|
|
|
|
continue
|
2025-02-15 14:09:54 -05:00
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
if day is not None and int(cur_day.name) != day:
|
2025-02-15 14:09:54 -05:00
|
|
|
continue
|
|
|
|
|
2025-02-15 14:28:29 -05:00
|
|
|
parts.append(cur_day.name)
|
2025-02-15 14:09:54 -05:00
|
|
|
|
|
|
|
for call in os.scandir(os.path.join(*parts)):
|
|
|
|
if not (call.is_dir() and self.RE_CALL.match(call.name)):
|
|
|
|
continue
|
|
|
|
|
|
|
|
parts.append(call.name)
|
|
|
|
|
|
|
|
for item in os.scandir(os.path.join(*parts)):
|
|
|
|
if not (item.is_file() and self.RE_PRODUCT.match(item.name)):
|
|
|
|
continue
|
|
|
|
|
|
|
|
yield '/'.join([*parts[1:], item.name])
|
|
|
|
|
|
|
|
parts.pop()
|
|
|
|
|
|
|
|
parts.pop()
|
|
|
|
|
|
|
|
parts.pop()
|
|
|
|
|
|
|
|
parts.pop()
|