awips2/deltaScripts/goesr_deploy/convertSatURIs.py

157 lines
6.2 KiB
Python
Raw Normal View History

2022-05-05 12:34:50 -05:00
#!/usr/bin/python
#
"""
Convert to append the coverageid to the satellite datauris, and
modify the associated satellite hdf5 group names to append the
coverageid. The new groups are added as an alias to the existing
datasets.
Date Ticket# Engineer Description
------------ ---------- ----------- --------------------------
20120711 798 jkorman Initial Development
20120822 798 jkorman Added ability to account for files containing hdfFileId.
"""
from subprocess import Popen, PIPE
import sys
from time import time
import h5py
POSTGRES_CMD = "psql -U awips -d metadata -t -q -A -c "
HDF5_LOC = "/awips2/edex/data/hdf5"
ID_IDX = 0
DATAURI_IDX = 1
COVERAGE_IDX = 2
SECTORID_IDX = 3
PHYSICAL_ELEMENT_IDX = 4
REFTIME_IDX = 5
HDFFILEID_IDX = 6
def update_satellite_table():
"""
Add the interpolationLevels column to the satellite table.
"""
result = queryPostgres("select count(*) from information_schema.columns where table_name='satellite' and column_name='interpolationlevels';")
if(result[0][0] == '0'):
result = queryPostgres("alter table satellite add column interpolationlevels integer;")
print "Adding interpolationlevels column to satellite table"
def formatFileTime(refTime):
"""
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
from the reference time. The output is formatted as YYYY-MM-DD-HH
"""
return refTime[0:4] + "-" + refTime[5:7] + "-" + refTime[8:10] + "-" + refTime[11:13]
def getFilename(refTime, h5FileId):
"""
Create the satellite data hdf filename corresponding to the given reference time and an
possible hdf5 file identifier.
"""
if(len(h5FileId) > 0):
filename = "satellite-" + formatFileTime(refTime) + "-" + h5FileId + ".h5"
else:
filename = "satellite-" + formatFileTime(refTime) + ".h5"
return filename
def queryPostgres(sql):
"""
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
from the reference time. The output is formatted as YYYY-MM-DD-HH
"""
result = Popen(POSTGRES_CMD + "\"" + sql + "\"", stdout=PIPE, shell=True)
retVal = []
for line in result.stdout:
retVal.append(line.strip().split("|"))
return retVal
def get_sectorids():
"""
Get a list of unique sector identifiers from the satellite table.
"""
return queryPostgres("select distinct sectorid from satellite;")
def get_satellite_rows(sectorid):
"""
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
from the reference time. The output is formatted as YYYY-MM-DD-HH
"""
keys = {}
rows = queryPostgres("select id, dataURI, coverage_gid, sectorid, physicalelement, reftime, hdffileid from satellite where sectorid=" + repr(sectorid) + ";")
for row in rows:
# create the key for this entry.
key = "/satellite/" + row[SECTORID_IDX] + "/" + row[PHYSICAL_ELEMENT_IDX] + "/" + getFilename(row[REFTIME_IDX], row[HDFFILEID_IDX])
#print "key = " + key
# have we found this key already?
if(key in keys):
# if so, get the row list for this key
rowList = keys[key]
else:
# otherwise create an empty list to put the row in
rowList = []
# add it to the collection
keys[key] = rowList
# and add the row to the list
rowList.append(row)
return keys
def process_all_satellite():
"""
Process all entries in the satellite table.
Do one sector id at a time.
"""
sectorids = get_sectorids()
if(sectorids):
for sectorid in sectorids:
print "Processing sector " + sectorid[0]
keys = get_satellite_rows(sectorid[0])
if(keys):
for key in keys:
print "=========================================================="
print " Processing key = " + key
fname = HDF5_LOC + key
try:
f = h5py.File(fname,'r+')
for row in keys[key]:
dataURI = row[1]
coverage = row[2]
newGroupName = dataURI + "/" + coverage
group = f.create_group(newGroupName)
group = f.create_group(newGroupName + "/Data-interpolated")
oldds = row[DATAURI_IDX] + "/Data"
newds = newGroupName + "/Data"
# Link to the old data set
f[newds] = h5py.SoftLink(oldds)
group = f[row[DATAURI_IDX] + "/Data-interpolated"]
numLevels = 0
for n in group.keys():
newds = newGroupName + "/Data-interpolated/" + n
if (n == '0'):
# special case for this link.
# dataset /Data-interpolated/0 points to /Data
oldds = row[DATAURI_IDX] + "/Data"
else:
numLevels += 1
oldds = row[DATAURI_IDX] + "/Data-interpolated/" + n
f[newds] = h5py.SoftLink(oldds)
updateSql = "update satellite set datauri='" + row[DATAURI_IDX] + "/" + row[COVERAGE_IDX] + "'"
updateSql += ", interpolationlevels=" + repr(numLevels)
updateSql += " where id=" + row[ID_IDX] + ";"
queryPostgres(updateSql)
f.close()
except Exception, e:
print e
print "Error occurred processing file " + fname
else:
print "No keys found for the sector id " + sectorid[0]
else:
print "No sector identifiers found in the satellite table"
if __name__ == '__main__':
t = time()
update_satellite_table()
process_all_satellite()
print "Total Conversion time %ds" % (time() - t)