import os import re from nexrad.s3 import S3Bucket class ArchiveDateError(Exception): def __init__(self, supplied: str, missing: str): self.supplied = supplied self.missing = missing def __str__(self): return "Date {self.supplied} was supplied, but required {self.missing} is missing" class Archive(): path: str bucket: S3Bucket def __init__(self, path: str, bucket: S3Bucket): self.path = path self.bucket = bucket def is_downloaded(self, key: str): return os.path.exists(os.path.join(self.path, key)) def download(self, key: str): path = os.path.join(self.path, key) parent = os.path.dirname(path) os.makedirs(parent, exist_ok=True) with open(path, 'wb') as fh: self.bucket.s3.download_fileobj(self.bucket.name, key, fh) RE_YEAR = re.compile(r'^\d{4}$') RE_MONTH_DAY = re.compile(r'^\d{2}$') RE_CALL = re.compile(r'^[A-Z]{4}$') RE_PRODUCT = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})') def each_downloaded_key(self, year: int=None, month: int=None, day: int=None): parts = [self.path] if day is not None and month is None: raise ArchiveDateError('day', 'month') if month is not None and year is None: raise ArchiveDateError('month', 'year') for cur_year in os.scandir(os.path.join(*parts)): if not (cur_year.is_dir() and self.RE_YEAR.match(cur_year.name)): continue if year is not None and int(cur_year.name) != year: continue parts.append(cur_year.name) for cur_month in os.scandir(os.path.join(*parts)): if not (cur_month.is_dir() and self.RE_MONTH_DAY.match(cur_month.name)): continue if month is not None and int(cur_month) != month: continue parts.append(cur_month.name) for cur_day in os.scandir(os.path.join(*parts)): if not (cur_day.is_dir() and self.RE_MONTH_DAY.match(cur_day.name)): continue if day is not None and int(cur_day) != day: continue parts.append(cur_day.name) for call in os.scandir(os.path.join(*parts)): if not (call.is_dir() and self.RE_CALL.match(call.name)): continue parts.append(call.name) for item in os.scandir(os.path.join(*parts)): if not (item.is_file() and self.RE_PRODUCT.match(item.name)): continue yield '/'.join([*parts[1:], item.name]) parts.pop() parts.pop() parts.pop() parts.pop()