import re import datetime import boto3 from botocore import UNSIGNED from botocore.config import Config S3_BUCKET = 'noaa-nexrad-level2' S3_KEY_RE = re.compile(r'^(\d{4})/(\d{2})/(\d{2})/([A-Z]{4})/([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})[_\.]') def key_matches(key: str, start: datetime.datetime, end: datetime.datetime): match = S3_KEY_RE.match(key) if match is None: return False date = datetime.datetime( year = int(match[6]), month = int(match[7]), day = int(match[8]), hour = int(match[9]), minute = int(match[10]), second = int(match[11]), tzinfo = datetime.UTC ) return date >= start and date <= end class S3Bucket(): name: str cache: dict def __init__(self, name: str=S3_BUCKET): self.s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) self.name = name self.cache = dict() def each_key_by_prefix(self, prefix: str): if prefix in self.cache: for key in self.cache[prefix]: yield key else: result = self.s3.list_objects_v2( Bucket = self.name, Prefix = prefix ) cache = list() self.cache[prefix] = cache if result is None or 'Contents' not in result: return for item in result['Contents']: key = item['Key'] cache.append(key) yield key def each_matching_key(self, radars: list, start: datetime.datetime, end: datetime.datetime): elapsed = end - start for radar in radars: for day in range(0, 1+elapsed.days): date = start + datetime.timedelta(days=day) prefix = '%04d/%02d/%02d/%s/' % ( date.year, date.month, date.day, radar[1] ) for key in self.each_key_by_prefix(prefix): if key_matches(key, start, end): yield key