diff --git a/lib/nexrad/archive.py b/lib/nexrad/archive.py index dcf5dd8..ebd1e32 100644 --- a/lib/nexrad/archive.py +++ b/lib/nexrad/archive.py @@ -1,4 +1,5 @@ import os +import re from nexrad.s3 import S3Bucket @@ -21,3 +22,49 @@ class Archive(): with open(path, 'wb') as fh: self.bucket.s3.download_fileobj(self.bucket.name, key, fh) + + RE_YEAR = re.compile(r'^\d{4}$') + RE_MONTH_DAY = re.compile(r'^\d{2}$') + RE_CALL = re.compile(r'^[A-Z]{4}$') + RE_PRODUCT = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})') + + def each_downloaded_key(self): + parts = [self.path] + + for year in os.scandir(os.path.join(*parts)): + if not (year.is_dir() and self.RE_YEAR.match(year.name)): + continue + + parts.append(year.name) + + for month in os.scandir(os.path.join(*parts)): + if not (month.is_dir() and self.RE_MONTH_DAY.match(month.name)): + continue + + parts.append(month.name) + + for day in os.scandir(os.path.join(*parts)): + if not (day.is_dir() and self.RE_MONTH_DAY.match(day.name)): + continue + + parts.append(day.name) + + for call in os.scandir(os.path.join(*parts)): + if not (call.is_dir() and self.RE_CALL.match(call.name)): + continue + + parts.append(call.name) + + for item in os.scandir(os.path.join(*parts)): + if not (item.is_file() and self.RE_PRODUCT.match(item.name)): + continue + + yield '/'.join([*parts[1:], item.name]) + + parts.pop() + + parts.pop() + + parts.pop() + + parts.pop()