157 lines
6.2 KiB
Python
157 lines
6.2 KiB
Python
|
#!/usr/bin/python
|
||
|
#
|
||
|
"""
|
||
|
Convert to append the coverageid to the satellite datauris, and
|
||
|
modify the associated satellite hdf5 group names to append the
|
||
|
coverageid. The new groups are added as an alias to the existing
|
||
|
datasets.
|
||
|
Date Ticket# Engineer Description
|
||
|
------------ ---------- ----------- --------------------------
|
||
|
20120711 798 jkorman Initial Development
|
||
|
20120822 798 jkorman Added ability to account for files containing hdfFileId.
|
||
|
"""
|
||
|
from subprocess import Popen, PIPE
|
||
|
import sys
|
||
|
from time import time
|
||
|
import h5py
|
||
|
|
||
|
POSTGRES_CMD = "psql -U awips -d metadata -t -q -A -c "
|
||
|
HDF5_LOC = "/awips2/edex/data/hdf5"
|
||
|
|
||
|
ID_IDX = 0
|
||
|
DATAURI_IDX = 1
|
||
|
COVERAGE_IDX = 2
|
||
|
SECTORID_IDX = 3
|
||
|
PHYSICAL_ELEMENT_IDX = 4
|
||
|
REFTIME_IDX = 5
|
||
|
HDFFILEID_IDX = 6
|
||
|
|
||
|
def update_satellite_table():
|
||
|
"""
|
||
|
Add the interpolationLevels column to the satellite table.
|
||
|
"""
|
||
|
result = queryPostgres("select count(*) from information_schema.columns where table_name='satellite' and column_name='interpolationlevels';")
|
||
|
if(result[0][0] == '0'):
|
||
|
result = queryPostgres("alter table satellite add column interpolationlevels integer;")
|
||
|
print "Adding interpolationlevels column to satellite table"
|
||
|
|
||
|
def formatFileTime(refTime):
|
||
|
"""
|
||
|
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
|
||
|
from the reference time. The output is formatted as YYYY-MM-DD-HH
|
||
|
"""
|
||
|
return refTime[0:4] + "-" + refTime[5:7] + "-" + refTime[8:10] + "-" + refTime[11:13]
|
||
|
|
||
|
def getFilename(refTime, h5FileId):
|
||
|
"""
|
||
|
Create the satellite data hdf filename corresponding to the given reference time and an
|
||
|
possible hdf5 file identifier.
|
||
|
"""
|
||
|
if(len(h5FileId) > 0):
|
||
|
filename = "satellite-" + formatFileTime(refTime) + "-" + h5FileId + ".h5"
|
||
|
else:
|
||
|
filename = "satellite-" + formatFileTime(refTime) + ".h5"
|
||
|
return filename
|
||
|
|
||
|
def queryPostgres(sql):
|
||
|
"""
|
||
|
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
|
||
|
from the reference time. The output is formatted as YYYY-MM-DD-HH
|
||
|
"""
|
||
|
result = Popen(POSTGRES_CMD + "\"" + sql + "\"", stdout=PIPE, shell=True)
|
||
|
retVal = []
|
||
|
for line in result.stdout:
|
||
|
retVal.append(line.strip().split("|"))
|
||
|
return retVal
|
||
|
|
||
|
def get_sectorids():
|
||
|
"""
|
||
|
Get a list of unique sector identifiers from the satellite table.
|
||
|
"""
|
||
|
return queryPostgres("select distinct sectorid from satellite;")
|
||
|
|
||
|
def get_satellite_rows(sectorid):
|
||
|
"""
|
||
|
Extract and format the year (YYYY), month (MM), day (DD), and hour (HH)
|
||
|
from the reference time. The output is formatted as YYYY-MM-DD-HH
|
||
|
"""
|
||
|
keys = {}
|
||
|
rows = queryPostgres("select id, dataURI, coverage_gid, sectorid, physicalelement, reftime, hdffileid from satellite where sectorid=" + repr(sectorid) + ";")
|
||
|
for row in rows:
|
||
|
# create the key for this entry.
|
||
|
key = "/satellite/" + row[SECTORID_IDX] + "/" + row[PHYSICAL_ELEMENT_IDX] + "/" + getFilename(row[REFTIME_IDX], row[HDFFILEID_IDX])
|
||
|
#print "key = " + key
|
||
|
# have we found this key already?
|
||
|
if(key in keys):
|
||
|
# if so, get the row list for this key
|
||
|
rowList = keys[key]
|
||
|
else:
|
||
|
# otherwise create an empty list to put the row in
|
||
|
rowList = []
|
||
|
# add it to the collection
|
||
|
keys[key] = rowList
|
||
|
# and add the row to the list
|
||
|
rowList.append(row)
|
||
|
return keys
|
||
|
|
||
|
def process_all_satellite():
|
||
|
"""
|
||
|
Process all entries in the satellite table.
|
||
|
Do one sector id at a time.
|
||
|
"""
|
||
|
sectorids = get_sectorids()
|
||
|
if(sectorids):
|
||
|
for sectorid in sectorids:
|
||
|
print "Processing sector " + sectorid[0]
|
||
|
keys = get_satellite_rows(sectorid[0])
|
||
|
if(keys):
|
||
|
for key in keys:
|
||
|
print "=========================================================="
|
||
|
print " Processing key = " + key
|
||
|
fname = HDF5_LOC + key
|
||
|
try:
|
||
|
f = h5py.File(fname,'r+')
|
||
|
for row in keys[key]:
|
||
|
dataURI = row[1]
|
||
|
coverage = row[2]
|
||
|
|
||
|
newGroupName = dataURI + "/" + coverage
|
||
|
group = f.create_group(newGroupName)
|
||
|
group = f.create_group(newGroupName + "/Data-interpolated")
|
||
|
|
||
|
oldds = row[DATAURI_IDX] + "/Data"
|
||
|
newds = newGroupName + "/Data"
|
||
|
# Link to the old data set
|
||
|
f[newds] = h5py.SoftLink(oldds)
|
||
|
|
||
|
group = f[row[DATAURI_IDX] + "/Data-interpolated"]
|
||
|
numLevels = 0
|
||
|
for n in group.keys():
|
||
|
newds = newGroupName + "/Data-interpolated/" + n
|
||
|
if (n == '0'):
|
||
|
# special case for this link.
|
||
|
# dataset /Data-interpolated/0 points to /Data
|
||
|
oldds = row[DATAURI_IDX] + "/Data"
|
||
|
else:
|
||
|
numLevels += 1
|
||
|
oldds = row[DATAURI_IDX] + "/Data-interpolated/" + n
|
||
|
f[newds] = h5py.SoftLink(oldds)
|
||
|
updateSql = "update satellite set datauri='" + row[DATAURI_IDX] + "/" + row[COVERAGE_IDX] + "'"
|
||
|
updateSql += ", interpolationlevels=" + repr(numLevels)
|
||
|
updateSql += " where id=" + row[ID_IDX] + ";"
|
||
|
queryPostgres(updateSql)
|
||
|
f.close()
|
||
|
except Exception, e:
|
||
|
print e
|
||
|
print "Error occurred processing file " + fname
|
||
|
else:
|
||
|
print "No keys found for the sector id " + sectorid[0]
|
||
|
else:
|
||
|
print "No sector identifiers found in the satellite table"
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
t = time()
|
||
|
update_satellite_table()
|
||
|
process_all_satellite()
|
||
|
print "Total Conversion time %ds" % (time() - t)
|