nexrad-archive/lib/nexrad/archive.py

71 lines
2.1 KiB
Python
Raw Normal View History

import os
import re
from nexrad.s3 import S3Bucket
class Archive():
path: str
bucket: S3Bucket
def __init__(self, path: str, bucket: S3Bucket):
self.path = path
self.bucket = bucket
def is_downloaded(self, key: str):
2025-02-12 15:37:14 -05:00
return os.path.exists(os.path.join(self.path, key))
def download(self, key: str):
2025-02-12 15:37:14 -05:00
path = os.path.join(self.path, key)
parent = os.path.dirname(path)
os.makedirs(parent, exist_ok=True)
with open(path, 'wb') as fh:
self.bucket.s3.download_fileobj(self.bucket.name, key, fh)
RE_YEAR = re.compile(r'^\d{4}$')
RE_MONTH_DAY = re.compile(r'^\d{2}$')
RE_CALL = re.compile(r'^[A-Z]{4}$')
RE_PRODUCT = re.compile(r'^([A-Z]{4})(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})')
def each_downloaded_key(self):
parts = [self.path]
for year in os.scandir(os.path.join(*parts)):
if not (year.is_dir() and self.RE_YEAR.match(year.name)):
continue
parts.append(year.name)
for month in os.scandir(os.path.join(*parts)):
if not (month.is_dir() and self.RE_MONTH_DAY.match(month.name)):
continue
parts.append(month.name)
for day in os.scandir(os.path.join(*parts)):
if not (day.is_dir() and self.RE_MONTH_DAY.match(day.name)):
continue
parts.append(day.name)
for call in os.scandir(os.path.join(*parts)):
if not (call.is_dir() and self.RE_CALL.match(call.name)):
continue
parts.append(call.name)
for item in os.scandir(os.path.join(*parts)):
if not (item.is_file() and self.RE_PRODUCT.match(item.name)):
continue
yield '/'.join([*parts[1:], item.name])
parts.pop()
parts.pop()
parts.pop()
parts.pop()