awips2/edexOsgi/com.raytheon.uf.edex.plugin.qc/utility/edex_static/base/python/qcNetCDF.py
root 06a8b51d6d Initial revision of AWIPS2 11.9.0-7p5
Former-commit-id: 64fa9254b946eae7e61bbc3f513b7c3696c4f54f
2012-01-06 08:55:05 -06:00

530 lines
20 KiB
Python

##
# This software was developed and / or modified by Raytheon Company,
# pursuant to Contract DG133W-05-CQ-1067 with the US Government.
#
# U.S. EXPORT CONTROLLED TECHNICAL DATA
# This software product contains export-restricted data whose
# export/transfer/disclosure is restricted by U.S. law. Dissemination
# to non-U.S. persons whether in the United States or abroad requires
# an export license or other authorization.
#
# Contractor Name: Raytheon Company
# Contractor Address: 6825 Pine Street, Suite 340
# Mail Stop B8
# Omaha, NE 68106
# 402.291.0100
#
# See the AWIPS II Master Rights File ("Master Rights File.pdf") for
# further licensing information.
##
import sys, time, string, numpy
import pupynere as netcdf
import LogStream,JUtil
from java.util import ArrayList
from java.lang import String
import jep
from com.raytheon.uf.common.time.util import TimeUtil
from com.raytheon.uf.common.datastorage.records import IDataRecord
from com.raytheon.uf.common.datastorage.records import FloatDataRecord
from com.raytheon.uf.common.datastorage.records import StringDataRecord
from com.raytheon.uf.common.datastorage.records import ShortDataRecord
from com.raytheon.uf.common.datastorage.records import IntegerDataRecord
from com.raytheon.uf.common.datastorage.records import LongDataRecord
from com.raytheon.uf.common.datastorage.records import ByteDataRecord
from com.raytheon.uf.common.pointdata import PointDataContainer
#
# Accesses the netCDF QC mesonet data
#
#
# SOFTWARE HISTORY
#
# Date Ticket# Engineer Description
# ------------ ---------- ----------- --------------------------
# 12/04/2009 #3408 bphillip Initial Creation
#
class NetCDFFile():
##
#Initialzes the NetCDFFile object
#@param fileName: The name of the file to open
##
def __init__(self, fileName):
self.__fileName = fileName
self.__file = netcdf.netcdf_file(self.__fileName, "r")
self.__vars = self.__file.variables
self.__inventory = None
self.__globalAttributes = None
##
#Closes the file
##
def close(self):
self.__file.close()
##
#Gets a variable from the file
#@param varName: The name of the variable to retrieve
#@return: The pupynere netcdf variable object
##
def getVariable(self, varName):
return self.__vars[varName]
##
#Gets an attribute for a variable from the file
#@param varName: The variable for which to retrieve the attribute
#@param attributeName: The attribute to retrieve
##
def getAttribute(self, varName, attributeName):
return getattr(self.getVariable(varName), attributeName)
##
#Gets a data element for the specified variable at the specified index
#@param varName: The variable from which to retrieve the data
#@param index: The index of the the data to retrieve
##
def getData(self, varName, index):
return self.__vars[varName][int(index)]
##
#Private method to get the inventory of the netCDF file. The inventory is
#stored in a dictionary keyed by the station. The values of the dictionary
#are another dictionary containing lists of the indices and observation times
#for the data contained in the file.
#
# The inventory is stored in the self.__inventory variable
#@see: self.__inventory
##
def __getInventoryInternal(self):
self.__inventory = {}
dataProviders = self.getVariable('dataProvider')
providerIds = self.getVariable('providerId')
obsTimes = self.getVariable('observationTime')
obsTimeFillValue = self.getAttribute('observationTime', '_FillValue')
counter = 0
for provider in dataProviders:
providerName = self._charArrayToString(dataProviders[counter]) + ' ' + self._charArrayToString(providerIds[counter])
if providerName in self.__inventory:
self.__inventory[providerName]['indices'].append(counter)
else:
self.__inventory[providerName] = {}
self.__inventory[providerName]['indices'] = []
counter = counter + 1
for key in self.__inventory.keys():
indices = self.__inventory[key]['indices']
self.__inventory[key]['obsTimes'] = []
for rec in indices:
self.__inventory[key]['obsTimes'].append(obsTimes[rec])
##
#Gets the list of variables contained in the file
#@return: The list of pupynere netcdfvariable objects
##
def getVars(self):
return self.__vars
##
#Gets the list of variable names contained in the file
#@return: A list of strings containing the variable names from the file
##
def getVarList(self):
return self.__vars.keys()
##
#Gets the inventory of the file
#@return: The inventory of the file
#@see: __getInventoryInternal
##
def getInventory(self):
if self.__inventory is None:
self.__getInventoryInternal()
return self.__inventory
##
#Gets a globally stored attribute
#@param attribute: The attribute name to retrieve
#@return: The attribute value
##
def getGlobalAttribute(self, attribute):
return getattr(self.__file, attribute)
##
#Utility method to convert a char array to a string
#@param arr: The char array to convert
#@return: The string representation of the char array
##
def charArrayToString(self, arr):
theString = ''
for letter in arr:
if letter != '':
theString = theString + letter
return theString
##
#Gets the variable names from the file
#@param filename: The name of the file to examine
#@return: List of strings containing the variable names contained in the file
##
def getVars(fileName):
file = NetCDFFile(str(fileName))
varList = file.getVarList()
file.close()
return varList
##
#Gets the data for the specified attributes from the netCDF file filtered by the
#query parameters provided
#@param fileName: The file from which to retrieve the data
#@param queryParameters: The parameters to use to filter the data
#@param ptDataDescription: The description of the parameters in the file
#@return: A PointDataContainer containing the requested attributes
##
def getDataSets(fileName, attributes, queryParameters, ptDataDescription):
#Converts the attributes from a java.util.List to a python list
attributes = JUtil.javaStringListToPylist(attributes)
#Converts the query parameters from a java.util.List to a python list
queryParameters = JUtil.javaStringListToPylist(queryParameters)
dataSets = {}
file = NetCDFFile(str(fileName))
#Parses the query parameters into a more easily managed form
queryParameters = parseQueryParams(file, queryParameters)
#Initializes the lists to be constructed
for attr in attributes:
dataSets[attr] = []
#Gets the data from the netCDF. The data retrieved is filtered according to
#the query parameters provided. This is essentially an inefficient query procedure
for recordIndex in range(0, file.getVariable('observationTime').shape[0]):
if _checkConditions(file, recordIndex, queryParameters):
for attr in attributes:
dataItem = file.getData(attr, recordIndex)
dataSets[attr].append(dataItem)
#If no data is retrieved, simply return an empty container
if len(dataSets[dataSets.keys()[0]]) == 0:
return PointDataContainer()
recs = ArrayList()
#Cycle through the requested attributes and format the data into a PointDataContainer
for attr in attributes:
#Get the type and size of the retrieved data
dataType = type(dataSets[attr][0])
sz = len(dataSets[attr])
sizes = numpy.zeros(1, numpy.int32)
sizes[0] = sz
#If the dataset is an array, construct the IDataRecord accordingly
if dataType == numpy.ndarray:
sizes = numpy.zeros(2, numpy.int32)
arrLen = len(dataSets[attr][0])
sizes[0] = sz
sizes[1] = arrLen
arrType = type(dataSets[attr][0][0])
if arrType == numpy.float32 or arrType == numpy.float64:
arr = numpy.zeros((sz, arrLen), numpy.float32)
for i in range(0, sz):
arr[0:][i] = dataSets[attr][i]
arr = numpy.resize(arr, (1, sz * arrLen))
rec = FloatDataRecord()
rec.setFloatData(arr)
elif arrType == numpy.int16 or arrType == numpy.int8 or arrType == numpy.int32:
arr = numpy.zeros((sz, arrLen), numpy.int32)
for i in range(0, sz):
arr[0:][i] = dataSets[attr][i]
arr = numpy.resize(arr, (1, sz * arrLen))
rec = IntegerDataRecord()
rec.setIntData(arr)
elif arrType == numpy.string_:
jstr = jep.jarray(sz, String)
for i in range(sz):
dataString = file.charArrayToString(dataSets[attr][i])
jstr[i] = String(dataString)
rec = StringDataRecord(attr, "", jstr)
else:
file.close()
LogStream.logProblem("Unsupported data type detected: "+str(arrType))
return None
if arrType != numpy.string_:
rec.setName(attr)
rec.setDimension(2)
rec.setIntSizes(sizes)
rec.setGroup("")
#The dataset is not an array type so examine the data and create the appropriate
#type of IDataRecord
else:
#Creates a FloatDataRecord
if dataType == numpy.float32 or dataType == numpy.float64:
arr = numpy.zeros(sz, numpy.float32)
for i in range(0, sz):
arr[i] = dataSets[attr][i]
rec = FloatDataRecord()
rec.setFloatData(arr)
#Creates an IntDataRecord
elif dataType == numpy.int16 or dataType == numpy.int8 or dataType == numpy.int32:
arr = numpy.zeros(sz, numpy.int32)
for i in range(0, sz):
arr[i] = dataSets[attr][i]
rec = IntegerDataRecord()
rec.setIntData(arr)
#Creates a StringDataRecord
elif dataType == numpy.string_:
jstr = jep.jarray(sz, String)
for i in range(sz):
jstr[i] = String(dataSets[attr][i])
rec = StringDataRecord(attr, "", jstr)
else:
file.close()
LogStream.logProblem("Unsupported data type detected: "+str(dataType))
return None
# Sets the required data on the IDataRecord.
# This is already done for for the StringDataRecord
if dataType != numpy.string_:
rec.setName(attr)
rec.setDimension(1)
rec.setIntSizes(sizes)
rec.setGroup("")
recs.add(rec)
#Close the file
file.close()
#Populate the container
return PointDataContainer.build(ptDataDescription, recs)
def getPointData(args):
return getPointData2(args.getFileName(), args.getAttributes(), args.getIndexes(), args.getPdd())
##
#Gets the data for the specified attributes from the netCDF file filtered by the
#query parameters provided
#@param fileName: The file from which to retrieve the data
#@param queryParameters: The parameters to use to filter the data
#@param ptDataDescription: The description of the parameters in the file
#@return: A PointDataContainer containing the requested attributes
##
def getPointData2(fileName, attributes, indexes, ptDataDescription):
dataSets = {}
file = NetCDFFile(str(fileName))
#Initializes the lists to be constructed
for attr in attributes:
dataSets[attr] = []
for recordIndex in indexes:
for attr in attributes:
dataItem = file.getData(attr, recordIndex)
dataSets[attr].append(dataItem)
#If no data is retrieved, simply return an empty container
if len(dataSets) == 0 or len(dataSets[dataSets.keys()[0]]) == 0:
return PointDataContainer()
recs = ArrayList(len(indexes))
#Cycle through the requested attributes and format the data into a PointDataContainer
for attr in attributes:
#Get the type and size of the retrieved data
dataType = type(dataSets[attr][0])
sz = len(dataSets[attr])
sizes = numpy.zeros(1, numpy.int32)
sizes[0] = sz
#If the dataset is an array, construct the IDataRecord accordingly
if dataType == numpy.ndarray:
sizes = numpy.zeros(2, numpy.int32)
arrLen = len(dataSets[attr][0])
sizes[0] = sz
sizes[1] = arrLen
arrType = type(dataSets[attr][0][0])
if arrType == numpy.float32 or arrType == numpy.float64:
arr = numpy.zeros((sz, arrLen), numpy.float32)
for i in range(0, sz):
arr[0:][i] = dataSets[attr][i]
arr = numpy.resize(arr, (1, sz * arrLen))
rec = FloatDataRecord()
rec.setFloatData(arr)
elif arrType == numpy.int16 or arrType == numpy.int8 or arrType == numpy.int32:
arr = numpy.zeros((sz, arrLen), numpy.int32)
for i in range(0, sz):
arr[0:][i] = dataSets[attr][i]
arr = numpy.resize(arr, (1, sz * arrLen))
rec = IntegerDataRecord()
rec.setIntData(arr)
elif arrType == numpy.string_:
jstr = jep.jarray(sz, String)
for i in range(sz):
dataString = file.charArrayToString(dataSets[attr][i])
jstr[i] = String(dataString)
rec = StringDataRecord(attr, "", jstr)
else:
file.close()
LogStream.logProblem("Unsupported data type detected: "+str(arrType))
return None
if arrType != numpy.string_:
rec.setName(attr)
rec.setDimension(2)
rec.setIntSizes(sizes)
rec.setGroup("")
#The dataset is not an array type so examine the data and create the appropriate
#type of IDataRecord
else:
#Creates a FloatDataRecord
if dataType == numpy.float32 or dataType == numpy.float64:
arr = numpy.zeros(sz, numpy.float32)
for i in range(0, sz):
arr[i] = dataSets[attr][i]
rec = FloatDataRecord()
rec.setFloatData(arr)
#Creates an IntDataRecord
elif dataType == numpy.int16 or dataType == numpy.int8 or dataType == numpy.int32:
arr = numpy.zeros(sz, numpy.int32)
for i in range(0, sz):
arr[i] = dataSets[attr][i]
rec = IntegerDataRecord()
rec.setIntData(arr)
#Creates a StringDataRecord
elif dataType == numpy.string_:
jstr = jep.jarray(sz, String)
for i in range(sz):
jstr[i] = String(dataSets[attr][i])
rec = StringDataRecord(attr, "", jstr)
else:
file.close()
LogStream.logProblem("Unsupported data type detected: "+str(dataType))
return None
# Sets the required data on the IDataRecord.
# This is already done for for the StringDataRecord
if dataType != numpy.string_:
rec.setName(attr)
rec.setDimension(1)
rec.setIntSizes(sizes)
rec.setGroup("")
recs.add(rec)
#Close the file
file.close()
#Populate the container
return PointDataContainer.build(ptDataDescription, recs)
##
#Check the data retrieved agains the query parameters provided to see if
#the data should be included in the returned data set
#@param file: The netCDF file containing the data
#@param recordIndex: The index of the record in the netCDF file
#@param queryParameters: The parameters to use to filter the data
#@return: True if the data at the specified index passed the query criteria
#else false
##
def _checkConditions(file, recordIndex, queryParameters):
conditionsPass = True
for param in queryParameters:
#Gets the fill value for the attribute from the file
try:
fillValue = file.getAttribute(param[0], "_FillValue")
except:
fillValue = None
#Gets the missing_value value for the attribute from the file
try:
missingValue = file.getAttribute(param[0], "missing_value")
except:
missingValue = None
#Get the data and determine the type
currentData = file.getData(param[0], recordIndex)
dataType = type(currentData)
#Fail the test if the data is the fill value or missing
if (currentData == fillValue) or (currentData == missingValue):
conditionsPass = False;
#Execute comparisons
elif param[2] is '=':
conditionsPass = file.getData(param[0], recordIndex) == _stringCast(param[1], dataType)
elif param[2] is '<':
conditionsPass = file.getData(param[0], recordIndex) < _stringCast(param[1], dataType)
elif param[2] is '<=':
conditionsPass = file.getData(param[0], recordIndex) <= _stringCast(param[1], dataType)
elif param[2] is '>':
conditionsPass = file.getData(param[0], recordIndex) > _stringCast(param[1], dataType)
elif param[2] is '>=':
conditionsPass = file.getData(param[0], recordIndex) >= _stringCast(param[1], dataType)
if not conditionsPass:
break;
return conditionsPass
##
#Casts a string object to the desired type
#@param value: The value to convert
#@param desiredType: The numpy type to convert the value to
#@return: The converted value
##
def _stringCast(value, desiredType):
return {
str: lambda value: value,
numpy.float32: lambda value:float(value),
numpy.float64: lambda value:float(value),
numpy.int8: lambda value:int(value),
numpy.int16: lambda value:int(value),
numpy.int32: lambda value:int(value)
}[desiredType](value)
##
#Breaks the query parameters into a more manageable form
#Each query parameter is passed in as a string of the form:
#attribute value operatore
#@param file: The file containing the data
#@param queryParams: The list of query params in the aforementioned form
#@return: Returns a list of params
##
def parseQueryParams(file, queryParams):
timeFields = file.getGlobalAttribute('timeVariables').split(',')
params = []
# Split the query parameters into a list
for param in queryParams:
currentParam = str(param).split(" ")
# Correct the time if it is a time field
if currentParam[0] in timeFields:
currentParam[1] = TimeUtil.formattedDateToLong(currentParam[1]) / 1000
params.append(currentParam)
return params