Issue #1332: Update pypies to create/use condensed group structure

Change-Id: I2e7311b063df1a217a2fe37c0e470272e4e85e92

Former-commit-id: 768ed3b2ed [formerly c72c0b9f98] [formerly 768ed3b2ed [formerly c72c0b9f98] [formerly dbe4290f45 [formerly 90f3cac62473c66dcdef73a6a605becf08868316]]]
Former-commit-id: dbe4290f45
Former-commit-id: 9e51ff1b8a [formerly 2b3f0a2938]
Former-commit-id: a0827f476e
This commit is contained in:
Richard Peter 2012-11-21 15:23:23 -06:00
parent 64efa971f6
commit 9fbabfde61

View file

@ -122,18 +122,19 @@ class H5pyDataStore(IDataStore.IDataStore):
raise StorageException('Data must be chunked to be compressed')
data = record.retrieveDataObject()
group = self.__getGroup(f, record.getGroup(), create=True)
rootNode=f['/']
group = self.__getNode(rootNode, record.getGroup(), None, create=True)
if record.getMinIndex() is not None and len(record.getMinIndex()):
ss = self.__writePartialHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(),
group, props, record.getMinIndex())
ss = self.__writePartialHDFDataset(f, data, record.getDimension(), record.getSizes(),
group[record.getName()], props, record.getMinIndex())
else:
ss = self.__writeHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(),
group, props, self.__getHdf5Datatype(record), storeOp, record)
if props and props.getDownscaled():
intName = record.getGroup() + '/' + record.getName() + '-interpolated'
self.__getGroup(f, intName, True)
self.__link(group, intName + '/0', group[record.getName()])
intGroup = self.__getNode(rootNode, intName, None, create=True)
self.__link(intGroup, '0', group[record.getName()])
f.flush()
if logger.isEnabledFor(logging.DEBUG):
@ -287,7 +288,7 @@ class H5pyDataStore(IDataStore.IDataStore):
return ss
def __writePartialHDFDataset(self, f, data, dims, szDims, datasetName, group, props,
def __writePartialHDFDataset(self, f, data, dims, szDims, ds, props,
minIndex):
# reverse sizes for hdf5
szDims1 = [None, ] * len(szDims)
@ -297,8 +298,6 @@ class H5pyDataStore(IDataStore.IDataStore):
for i in range(len(minIndex)):
offset[i] = minIndex[len(minIndex) - i - 1]
ds = group[datasetName]
# process chunking
# chunkSize = None
# if data.dtype != numpy._string and data.dtype != numpy._object:
@ -323,21 +322,25 @@ class H5pyDataStore(IDataStore.IDataStore):
try:
locs = request.getLocations()
for dataset in locs:
ds = self.__getGroup(f, dataset)
ds = self.__getNode(f, None, dataset)
grp = ds.parent
grp.id.unlink(ds.name)
finally:
# check if file has any remaining data sets
# if no data sets, flag file for deletion
deleteFile = False
try:
f.flush()
deleteFile = not self.__hasDataSet(f)
finally:
except Exception, e:
logger.error('Error occurred checking for dataSets in file [' + str(fn) + ']: ' + IDataStore._exc())
t0=time.time()
f.close()
t1=time.time()
timeMap['closeFile']=t1-t0
if deleteFile:
try:
os.remove(fn)
@ -365,11 +368,13 @@ class H5pyDataStore(IDataStore.IDataStore):
try:
group = request.getGroup()
req = request.getRequest()
rootNode=f['/']
if req:
grp = self.__getGroup(f, group)
result = [self.__retrieveInternal(grp, request.getDataset(), req)]
ds = self.__getNode(rootNode, group, request.getDataset())
result = [self.__retrieveInternal(ds, req)]
else:
result = self.__retrieve(f, group, request.getIncludeInterpolated())
groupNode = self.__getNode(rootNode, group)
result = self.__retrieve(groupNode, request.getIncludeInterpolated())
resp = RetrieveResponse()
resp.setRecords(result)
return resp
@ -382,25 +387,22 @@ class H5pyDataStore(IDataStore.IDataStore):
def __retrieve(self, f, group, includeInterpolated=False):
def __retrieve(self, group, includeInterpolated=False):
records = []
if type(group) is str:
group = self.__getGroup(f, group)
datasets = group.keys()
for ds in datasets:
interpDs = ds.endswith('-interpolated')
if includeInterpolated and interpDs:
subresults = self.__retrieve(f, group.name + '/' + ds, False)
subresults = self.__retrieve(group[ds], False)
if subresults:
records += subresults
elif not interpDs:
rec = self.__retrieveInternal(group, ds, REQUEST_ALL)
rec = self.__retrieveInternal(group[ds], REQUEST_ALL)
records.append(rec)
return records
def __retrieveInternal(self, grp, dsName, req):
ds = grp[dsName]
def __retrieveInternal(self, ds, req):
rawData = HDF5OpManager.read(ds, req)
rec = DataStoreFactory.createStorageRecord(rawData, ds)
return rec
@ -420,8 +422,9 @@ class H5pyDataStore(IDataStore.IDataStore):
names = request.getDatasetGroupPath()
req = request.getRequest()
result = []
rootNode=f['/']
for dsName in names:
ds = self.__getGroup(f, dsName)
ds = self.__getNode(rootNode, None, dsName)
t2 = time.time()
rawData = HDF5OpManager.read(ds, req)
t3 = time.time()
@ -455,12 +458,14 @@ class H5pyDataStore(IDataStore.IDataStore):
groups = request.getGroups()
req = request.getRequest()
recs = []
rootNode=f['/']
for group in groups:
grp = self.__getGroup(f, group)
grp = self.__getNode(rootNode, group)
datasets = grp.keys()
for ds in datasets:
rawData = HDF5OpManager.read(grp[ds], req)
rec = DataStoreFactory.createStorageRecord(rawData, grp[ds])
dsNode=grp[ds]
rawData = HDF5OpManager.read(dsNode, req)
rec = DataStoreFactory.createStorageRecord(rawData, dsNode)
recs.append(rec)
resp = RetrieveResponse()
resp.setRecords(recs)
@ -477,7 +482,7 @@ class H5pyDataStore(IDataStore.IDataStore):
f, lock = self.__openFile(fn, 'r')
try:
grpName = request.getGroup()
grp = self.__getGroup(f, grpName)
grp = self.__getNode(f['/'], grpName)
ds = grp.keys()
return ds
finally:
@ -510,7 +515,7 @@ class H5pyDataStore(IDataStore.IDataStore):
if props and not props.getChunked() and props.getCompression != 'NONE':
raise StorageException("Data must be chunked to be compressed")
grp = rec.getGroup()
group = self.__getGroup(f, grp, create=True)
group = self.__getNode(f['/'], grp, None, create=True)
# reverse sizes for hdf5
szDims = rec.getSizes()
@ -637,37 +642,87 @@ class H5pyDataStore(IDataStore.IDataStore):
return f, fd
def __getGroup(self, f, name, create=False):
def __getNode(self, rootNode, groupName, dsName=None, create=False):
t0=time.time()
# expected output to be node for /group1::group2::group3/dataSet
# expected output of /group1::group2::group3/dataSet-interpolated/1
if groupName:
if dsName:
toNormalize=groupName + '/' + dsName
else:
toNormalize=groupName
elif dsName:
toNormalize=dsName
else:
# both None, return root node as default
return rootNode
tokens=toNormalize.split('/')
# remove any empty tokens
tokens = filter(None, tokens)
dsNameToken=None
if dsName:
# data set name was given, keep last token for ds name
dsNameToken = tokens.pop()
# need to check final token for -interpolated
isInterpToken = None
if tokens:
isInterpToken = tokens[-1]
if isInterpToken.endswith('-interpolated'):
del tokens[-1]
if dsNameToken:
dsNameToken = isInterpToken + '/' + dsNameToken
else:
dsNameToken = isInterpToken
if tokens:
basePath='::'.join(tokens)
else:
basePath=None
node = None
if create:
parts = name.split('/')
grp = None
for s in parts:
if not grp:
if not s:
s = '/'
if s == '/' or s in f.keys():
grp = f[s]
if basePath is None:
node = rootNode
if basePath in rootNode.keys():
node = rootNode[basePath]
else:
grp = f.create_group(s)
node = rootNode.create_group(basePath)
if dsNameToken:
for token in dsNameToken.split('/'):
if token in node.keys():
node = node[token]
else:
if s:
if s in grp.keys():
grp = grp[s]
node = node.create_group(token)
else:
grp = grp.create_group(s)
else:
if name is None or len(name.strip()) == 0:
# if no group is specific default to base group
grp = f['/']
if dsNameToken:
if basePath:
basePath += '/' + dsNameToken
else:
basePath = dsNameToken
try:
group=name
if not group.startswith('/'):
group = '/' + group
grp = f[group]
if basePath:
node = rootNode[basePath]
else:
node = rootNode
except:
raise StorageException("No group " + name + " found")
group = None
if groupName:
group = groupName
if dsName:
group += '/' + dsName
elif dsName:
group = dsName
# check old group structure
node = self.__getGroup(rootNode, group)
t1=time.time()
if timeMap.has_key('getGroup'):
@ -675,6 +730,22 @@ class H5pyDataStore(IDataStore.IDataStore):
else:
timeMap['getGroup']=t1-t0
return node
# deprecated, should only be called in transition period
def __getGroup(self, rootNode, name):
if name is None or len(name.strip()) == 0:
# if no group is specific default to base group
grp = rootNode
else:
try:
group=name
if group.startswith('/'):
group = group[1:]
grp = rootNode[group]
except:
raise StorageException("No group " + name + " found")
return grp
def __link(self, group, linkName, dataset):
@ -793,5 +864,3 @@ class H5pyDataStore(IDataStore.IDataStore):
finally:
if lock:
LockManager.releaseLock(lock)