diff --git a/README.md b/README.md index 0fa590c..372bdb1 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ in conjunction with the [NCEI Storm Events Database](https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/) to archive only NEXRAD Level II data for which storms were recorded by the National Weather Service. Using the start and end times and -coordinates of a given storm report, this tool is able to query and +coordinates of a given storm event, this tool is able to query and filter objects in the [NEXRAD Level II Amazon bucket](https://registry.opendata.aws/noaa-nexrad/), allowing one to only archive volume scans for which storms were noted, diff --git a/bin/nexrad-archive b/bin/nexrad-archive index b1240c6..310998f 100755 --- a/bin/nexrad-archive +++ b/bin/nexrad-archive @@ -4,7 +4,7 @@ import argparse from nexrad.db import Database from nexrad.s3 import S3Bucket -from nexrad.storm import StormReport +from nexrad.storm import StormEvent from nexrad.archive import Archive parser = argparse.ArgumentParser( @@ -15,12 +15,12 @@ parser.add_argument('--quiet', action='store_true', help='Suppress output') parser.add_argument('--dry-run', action='store_true', help='Do not actually archive data') group = parser.add_mutually_exclusive_group() -group.add_argument('--exclude', action='append', type=str, help='Exclude types of reports from ingest') -group.add_argument('--type', action='append', type=str, help='Specify only given types of reports to ingest') +group.add_argument('--exclude', action='append', type=str, help='Exclude types of events from ingest') +group.add_argument('--type', action='append', type=str, help='Specify only given types of events to ingest') -parser.add_argument('db', help='SQLite3 NEXRAD radar site database') -parser.add_argument('csv-report-details', nargs='+', help='Compressed storm report details CSV file') -parser.add_argument('archive-dir', help='Target archive directory') +parser.add_argument('db', help='SQLite3 NEXRAD radar site database') +parser.add_argument('csv-event-details', nargs='+', help='Compressed storm event details CSV file') +parser.add_argument('archive-dir', help='Target archive directory') args = parser.parse_args() @@ -36,32 +36,32 @@ if args.exclude is not None: if args.type is not None: types = {s: True for s in args.type} -for path in getattr(args, 'csv-report-details'): - for report in StormReport.each_from_csv_file(path): - if args.exclude is not None and report.event_type in exclude: +for path in getattr(args, 'csv-event-details'): + for event in StormEvent.each_from_csv_file(path): + if args.exclude is not None and event.event_type in exclude: continue - if args.type is not None and report.event_type not in types: + if args.type is not None and event.event_type not in types: continue - if report.coord_start is None or report.coord_end is None: + if event.coord_start is None or event.coord_end is None: continue - if not report.is_radar_significant(): + if not event.is_radar_significant(): continue - radars = report.nearby_radars(db) + radars = event.nearby_radars(db) - for key in bucket.each_matching_key(radars, report.timestamp_start, report.timestamp_end): + for key in bucket.each_matching_key(radars, event.timestamp_start, event.timestamp_end): if archive.is_downloaded(key): if not args.quiet: - print(f"event {report.id} key {key} type {report.event_type} already archived") + print(f"event {event.id} key {key} type {event.event_type} already archived") else: if not args.quiet: if args.dry_run: - print(f"event {report.id} key {key} type {report.event_type} would archive") + print(f"event {event.id} key {key} type {event.event_type} would archive") else: - print(f"event {report.id} key {key} type {report.event_type} archiving") + print(f"event {event.id} key {key} type {event.event_type} archiving") if not args.dry_run: archive.download(key) diff --git a/bin/nexrad-archive-event-ingest b/bin/nexrad-archive-event-ingest new file mode 100755 index 0000000..2bf3828 --- /dev/null +++ b/bin/nexrad-archive-event-ingest @@ -0,0 +1,33 @@ +#! /usr/bin/env python3 + +import argparse + +from nexrad.db import Database +from nexrad.storm import StormEvent + +parser = argparse.ArgumentParser( + description = 'Ingest events from StormEvent_details_*.csv.gz files' +) + +parser.add_argument('--quiet', action='store_true', help='Suppress output') +parser.add_argument('--dry-run', action='store_true', help='Do not actually ingest events') +parser.add_argument('db', help='SQLite3 NEXRAD radar site database') +parser.add_argument('csv-event-details', nargs='+', help='Compressed storm event details CSV file') + +args = parser.parse_args() + +db = Database.connect(args.db) + +if not args.dry_run: + db.execute('begin transaction') + +for path in getattr(args, 'csv-event-details'): + for event in StormEvent.each_from_csv_file(path): + if not args.dry_run: + db.add(event) + + if not args.quiet: + print(f"Finished ingesting file {path}") + +if not args.dry_run: + db.commit() diff --git a/bin/nexrad-archive-report-ingest b/bin/nexrad-archive-report-ingest deleted file mode 100755 index e0f09df..0000000 --- a/bin/nexrad-archive-report-ingest +++ /dev/null @@ -1,33 +0,0 @@ -#! /usr/bin/env python3 - -import argparse - -from nexrad.db import Database -from nexrad.storm import StormReport - -parser = argparse.ArgumentParser( - description = 'Ingest reports from StormEvent_details_*.csv.gz files' -) - -parser.add_argument('--quiet', action='store_true', help='Suppress output') -parser.add_argument('--dry-run', action='store_true', help='Do not actually ingest reports') -parser.add_argument('db', help='SQLite3 NEXRAD radar site database') -parser.add_argument('csv-report-details', nargs='+', help='Compressed storm report details CSV file') - -args = parser.parse_args() - -db = Database.connect(args.db) - -if not args.dry_run: - db.execute('begin transaction') - -for path in getattr(args, 'csv-report-details'): - for report in StormReport.each_from_csv_file(path): - if not args.dry_run: - db.add(report) - - if not args.quiet: - print(f"Finished ingesting file {path}") - -if not args.dry_run: - db.commit() diff --git a/db/nexrad.sql b/db/nexrad.sql index 9ebccce..8a29a38 100644 --- a/db/nexrad.sql +++ b/db/nexrad.sql @@ -16,7 +16,7 @@ select AddGeometryColumn('nexrad_radar', 'coord', 4326, 'POINT', 'XY'), CreateSpatialIndex('nexrad_radar', 'coord'); -create table nexrad_storm_report ( +create table nexrad_storm_event ( id INTEGER PRIMARY KEY NOT NULL, episode_id INTEGER, timestamp_start TIMESTAMP NOT NULL, @@ -29,18 +29,18 @@ create table nexrad_storm_report ( tornado_f_rating TEXT ); -create index nexrad_storm_report_episode_id_idx on nexrad_storm_report (episode_id); -create index nexrad_storm_report_event_type_idx on nexrad_storm_report (event_type); -create index nexrad_storm_report_wfo_idx on nexrad_storm_report (wfo); -create index nexrad_storm_report_timestamp_start_idx on nexrad_storm_report (timestamp_start); -create index nexrad_storm_report_timestamp_end_idx on nexrad_storm_report (timestamp_end); +create index nexrad_storm_event_episode_id_idx on nexrad_storm_event (episode_id); +create index nexrad_storm_event_event_type_idx on nexrad_storm_event (event_type); +create index nexrad_storm_event_wfo_idx on nexrad_storm_event (wfo); +create index nexrad_storm_event_timestamp_start_idx on nexrad_storm_event (timestamp_start); +create index nexrad_storm_event_timestamp_end_idx on nexrad_storm_event (timestamp_end); select - AddGeometryColumn('nexrad_storm_report', 'coord_start', 4326, 'POINT', 'XY', 0), - CreateSpatialIndex('nexrad_storm_report', 'coord_start'); + AddGeometryColumn('nexrad_storm_event', 'coord_start', 4326, 'POINT', 'XY', 0), + CreateSpatialIndex('nexrad_storm_event', 'coord_start'); select - AddGeometryColumn('nexrad_storm_report', 'coord_end', 4326, 'POINT', 'XY', 0), - CreateSpatialIndex('nexrad_storm_report', 'coord_end'); + AddGeometryColumn('nexrad_storm_event', 'coord_end', 4326, 'POINT', 'XY', 0), + CreateSpatialIndex('nexrad_storm_event', 'coord_end'); commit; diff --git a/lib/nexrad/archive.py b/lib/nexrad/archive.py index a4ac784..52627ef 100644 --- a/lib/nexrad/archive.py +++ b/lib/nexrad/archive.py @@ -94,15 +94,15 @@ class ArchiveProduct(): def is_reported(self, db: Database): sql = """select count(( - select ST_Distance(MakeLine(report.coord_start, report.coord_end), + select ST_Distance(MakeLine(event.coord_start, event.coord_end), radar.coord, true) as distance from - nexrad_storm_report as report, + nexrad_storm_event as event, nexrad_radar as radar where distance <= :radius - and :timestamp between report.timestamp_start and report.timestamp_end + and :timestamp between event.timestamp_start and event.timestamp_end and radar.call = :call)) as num """ diff --git a/lib/nexrad/storm.py b/lib/nexrad/storm.py index 19550bd..a8d9de8 100644 --- a/lib/nexrad/storm.py +++ b/lib/nexrad/storm.py @@ -80,14 +80,14 @@ def coord_from_str(text_lon: str, text_lat: str): return Coord(float(text_lon), float(text_lat)) -class StormReport(DatabaseTable): +class StormEvent(DatabaseTable): __slots__ = ( 'id', 'timestamp_start', 'timestamp_end', 'episode_id', 'state', 'event_type', 'wfo', 'locale_start', 'locale_end', 'tornado_f_rating', 'coord_start', 'coord_end' ) - __table__ = 'nexrad_storm_report' + __table__ = 'nexrad_storm_event' __key__ = 'id' __columns__ = ( @@ -133,32 +133,32 @@ class StormReport(DatabaseTable): @staticmethod def from_csv_row(row: dict): - report = StormReport() + event = StormEvent() tz = timezone_from_str(row['CZ_TIMEZONE']) - report.timestamp_start = timestamp_from_parts(tz, row['BEGIN_YEARMONTH'], row['BEGIN_DAY'], row['BEGIN_TIME']) - report.timestamp_end = timestamp_from_parts(tz, row['END_YEARMONTH'], row['END_DAY'], row['END_TIME']) - report.state = row['STATE'] - report.event_type = row['EVENT_TYPE'] - report.wfo = row['WFO'] - report.locale_start = row['BEGIN_LOCATION'] - report.locale_end = row['END_LOCATION'] - report.tornado_f_rating = row['TOR_F_SCALE'] - report.coord_start = coord_from_str(row['BEGIN_LON'], row['BEGIN_LAT']) - report.coord_end = coord_from_str(row['END_LON'], row['END_LAT']) + event.timestamp_start = timestamp_from_parts(tz, row['BEGIN_YEARMONTH'], row['BEGIN_DAY'], row['BEGIN_TIME']) + event.timestamp_end = timestamp_from_parts(tz, row['END_YEARMONTH'], row['END_DAY'], row['END_TIME']) + event.state = row['STATE'] + event.event_type = row['EVENT_TYPE'] + event.wfo = row['WFO'] + event.locale_start = row['BEGIN_LOCATION'] + event.locale_end = row['END_LOCATION'] + event.tornado_f_rating = row['TOR_F_SCALE'] + event.coord_start = coord_from_str(row['BEGIN_LON'], row['BEGIN_LAT']) + event.coord_end = coord_from_str(row['END_LON'], row['END_LAT']) try: - report.episode_id = int(row['EPISODE_ID']) + event.episode_id = int(row['EPISODE_ID']) except ValueError: - report.episode_id = None + event.episode_id = None try: - report.id = int(row['EVENT_ID']) + event.id = int(row['EVENT_ID']) except ValueError: - report.id = None + event.id = None - return report + return event @staticmethod def each_from_csv_file(file: str): @@ -167,7 +167,7 @@ class StormReport(DatabaseTable): for row in reader: try: - yield StormReport.from_csv_row(row) + yield StormEvent.from_csv_row(row) except: pass @@ -176,7 +176,7 @@ class StormReport(DatabaseTable): coord: Coord=None, radius: float=RADAR_RANGE, timestamp: datetime.datetime=None): - columns = StormReport.__format_columns_select__(StormReport) + columns = StormEvent.__format_columns_select__(StormEvent) clauses = list() values = dict() @@ -205,12 +205,12 @@ class StormReport(DatabaseTable): 'timestamp': str(timestamp) }) - sql = "select " + ", ".join(columns) + " from nexrad_storm_report" + sql = "select " + ", ".join(columns) + " from nexrad_storm_event" if len(clauses) > 0: sql += " where " + " and ".join(clauses) - st = db.query_sql(StormReport, sql, values) + st = db.query_sql(StormEvent, sql, values) while True: obj = st.fetchone() diff --git a/run.sh b/run.sh index eb505e7..72f6c48 100644 --- a/run.sh +++ b/run.sh @@ -1,11 +1,11 @@ #! /bin/sh -reports_csv_gz="$(realpath "$1")" +events_csv_gz="$(realpath "$1")" dest="$(realpath "$2")" shift 2 docker run --rm -it \ - --volume "$reports_csv_gz:/tmp/reports.csv.gz" \ + --volume "$events_csv_gz:/tmp/events.csv.gz" \ --volume "$dest:/dest" \ - nexrad-archive:latest /tmp/reports.csv.gz /dest "$@" + nexrad-archive:latest /tmp/events.csv.gz /dest "$@"