Issue #1332: Update pypies to create/use condensed group structure
Change-Id: I2e7311b063df1a217a2fe37c0e470272e4e85e92 Former-commit-id:768ed3b2ed
[formerlyc72c0b9f98
] [formerly768ed3b2ed
[formerlyc72c0b9f98
] [formerlydbe4290f45
[formerly 90f3cac62473c66dcdef73a6a605becf08868316]]] Former-commit-id:dbe4290f45
Former-commit-id:9e51ff1b8a
[formerly2b3f0a2938
] Former-commit-id:a0827f476e
This commit is contained in:
parent
64efa971f6
commit
9fbabfde61
1 changed files with 265 additions and 196 deletions
|
@ -122,18 +122,19 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
raise StorageException('Data must be chunked to be compressed')
|
||||
|
||||
data = record.retrieveDataObject()
|
||||
group = self.__getGroup(f, record.getGroup(), create=True)
|
||||
rootNode=f['/']
|
||||
group = self.__getNode(rootNode, record.getGroup(), None, create=True)
|
||||
if record.getMinIndex() is not None and len(record.getMinIndex()):
|
||||
ss = self.__writePartialHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(),
|
||||
group, props, record.getMinIndex())
|
||||
ss = self.__writePartialHDFDataset(f, data, record.getDimension(), record.getSizes(),
|
||||
group[record.getName()], props, record.getMinIndex())
|
||||
else:
|
||||
ss = self.__writeHDFDataset(f, data, record.getDimension(), record.getSizes(), record.getName(),
|
||||
group, props, self.__getHdf5Datatype(record), storeOp, record)
|
||||
|
||||
if props and props.getDownscaled():
|
||||
intName = record.getGroup() + '/' + record.getName() + '-interpolated'
|
||||
self.__getGroup(f, intName, True)
|
||||
self.__link(group, intName + '/0', group[record.getName()])
|
||||
intGroup = self.__getNode(rootNode, intName, None, create=True)
|
||||
self.__link(intGroup, '0', group[record.getName()])
|
||||
|
||||
f.flush()
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
|
@ -287,7 +288,7 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
|
||||
return ss
|
||||
|
||||
def __writePartialHDFDataset(self, f, data, dims, szDims, datasetName, group, props,
|
||||
def __writePartialHDFDataset(self, f, data, dims, szDims, ds, props,
|
||||
minIndex):
|
||||
# reverse sizes for hdf5
|
||||
szDims1 = [None, ] * len(szDims)
|
||||
|
@ -297,8 +298,6 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
for i in range(len(minIndex)):
|
||||
offset[i] = minIndex[len(minIndex) - i - 1]
|
||||
|
||||
ds = group[datasetName]
|
||||
|
||||
# process chunking
|
||||
# chunkSize = None
|
||||
# if data.dtype != numpy._string and data.dtype != numpy._object:
|
||||
|
@ -323,21 +322,25 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
try:
|
||||
locs = request.getLocations()
|
||||
for dataset in locs:
|
||||
ds = self.__getGroup(f, dataset)
|
||||
ds = self.__getNode(f, None, dataset)
|
||||
grp = ds.parent
|
||||
grp.id.unlink(ds.name)
|
||||
|
||||
finally:
|
||||
# check if file has any remaining data sets
|
||||
# if no data sets, flag file for deletion
|
||||
deleteFile = False
|
||||
try:
|
||||
f.flush()
|
||||
deleteFile = not self.__hasDataSet(f)
|
||||
finally:
|
||||
except Exception, e:
|
||||
logger.error('Error occurred checking for dataSets in file [' + str(fn) + ']: ' + IDataStore._exc())
|
||||
|
||||
t0=time.time()
|
||||
f.close()
|
||||
t1=time.time()
|
||||
timeMap['closeFile']=t1-t0
|
||||
|
||||
|
||||
if deleteFile:
|
||||
try:
|
||||
os.remove(fn)
|
||||
|
@ -365,11 +368,13 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
try:
|
||||
group = request.getGroup()
|
||||
req = request.getRequest()
|
||||
rootNode=f['/']
|
||||
if req:
|
||||
grp = self.__getGroup(f, group)
|
||||
result = [self.__retrieveInternal(grp, request.getDataset(), req)]
|
||||
ds = self.__getNode(rootNode, group, request.getDataset())
|
||||
result = [self.__retrieveInternal(ds, req)]
|
||||
else:
|
||||
result = self.__retrieve(f, group, request.getIncludeInterpolated())
|
||||
groupNode = self.__getNode(rootNode, group)
|
||||
result = self.__retrieve(groupNode, request.getIncludeInterpolated())
|
||||
resp = RetrieveResponse()
|
||||
resp.setRecords(result)
|
||||
return resp
|
||||
|
@ -382,25 +387,22 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
|
||||
|
||||
|
||||
def __retrieve(self, f, group, includeInterpolated=False):
|
||||
def __retrieve(self, group, includeInterpolated=False):
|
||||
records = []
|
||||
if type(group) is str:
|
||||
group = self.__getGroup(f, group)
|
||||
datasets = group.keys()
|
||||
for ds in datasets:
|
||||
interpDs = ds.endswith('-interpolated')
|
||||
if includeInterpolated and interpDs:
|
||||
subresults = self.__retrieve(f, group.name + '/' + ds, False)
|
||||
subresults = self.__retrieve(group[ds], False)
|
||||
if subresults:
|
||||
records += subresults
|
||||
elif not interpDs:
|
||||
rec = self.__retrieveInternal(group, ds, REQUEST_ALL)
|
||||
rec = self.__retrieveInternal(group[ds], REQUEST_ALL)
|
||||
records.append(rec)
|
||||
|
||||
return records
|
||||
|
||||
def __retrieveInternal(self, grp, dsName, req):
|
||||
ds = grp[dsName]
|
||||
def __retrieveInternal(self, ds, req):
|
||||
rawData = HDF5OpManager.read(ds, req)
|
||||
rec = DataStoreFactory.createStorageRecord(rawData, ds)
|
||||
return rec
|
||||
|
@ -420,8 +422,9 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
names = request.getDatasetGroupPath()
|
||||
req = request.getRequest()
|
||||
result = []
|
||||
rootNode=f['/']
|
||||
for dsName in names:
|
||||
ds = self.__getGroup(f, dsName)
|
||||
ds = self.__getNode(rootNode, None, dsName)
|
||||
t2 = time.time()
|
||||
rawData = HDF5OpManager.read(ds, req)
|
||||
t3 = time.time()
|
||||
|
@ -455,12 +458,14 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
groups = request.getGroups()
|
||||
req = request.getRequest()
|
||||
recs = []
|
||||
rootNode=f['/']
|
||||
for group in groups:
|
||||
grp = self.__getGroup(f, group)
|
||||
grp = self.__getNode(rootNode, group)
|
||||
datasets = grp.keys()
|
||||
for ds in datasets:
|
||||
rawData = HDF5OpManager.read(grp[ds], req)
|
||||
rec = DataStoreFactory.createStorageRecord(rawData, grp[ds])
|
||||
dsNode=grp[ds]
|
||||
rawData = HDF5OpManager.read(dsNode, req)
|
||||
rec = DataStoreFactory.createStorageRecord(rawData, dsNode)
|
||||
recs.append(rec)
|
||||
resp = RetrieveResponse()
|
||||
resp.setRecords(recs)
|
||||
|
@ -477,7 +482,7 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
f, lock = self.__openFile(fn, 'r')
|
||||
try:
|
||||
grpName = request.getGroup()
|
||||
grp = self.__getGroup(f, grpName)
|
||||
grp = self.__getNode(f['/'], grpName)
|
||||
ds = grp.keys()
|
||||
return ds
|
||||
finally:
|
||||
|
@ -510,7 +515,7 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
if props and not props.getChunked() and props.getCompression != 'NONE':
|
||||
raise StorageException("Data must be chunked to be compressed")
|
||||
grp = rec.getGroup()
|
||||
group = self.__getGroup(f, grp, create=True)
|
||||
group = self.__getNode(f['/'], grp, None, create=True)
|
||||
|
||||
# reverse sizes for hdf5
|
||||
szDims = rec.getSizes()
|
||||
|
@ -637,37 +642,87 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
|
||||
return f, fd
|
||||
|
||||
def __getGroup(self, f, name, create=False):
|
||||
def __getNode(self, rootNode, groupName, dsName=None, create=False):
|
||||
t0=time.time()
|
||||
|
||||
# expected output to be node for /group1::group2::group3/dataSet
|
||||
# expected output of /group1::group2::group3/dataSet-interpolated/1
|
||||
if groupName:
|
||||
if dsName:
|
||||
toNormalize=groupName + '/' + dsName
|
||||
else:
|
||||
toNormalize=groupName
|
||||
elif dsName:
|
||||
toNormalize=dsName
|
||||
else:
|
||||
# both None, return root node as default
|
||||
return rootNode
|
||||
|
||||
tokens=toNormalize.split('/')
|
||||
|
||||
# remove any empty tokens
|
||||
tokens = filter(None, tokens)
|
||||
|
||||
dsNameToken=None
|
||||
if dsName:
|
||||
# data set name was given, keep last token for ds name
|
||||
dsNameToken = tokens.pop()
|
||||
|
||||
# need to check final token for -interpolated
|
||||
isInterpToken = None
|
||||
if tokens:
|
||||
isInterpToken = tokens[-1]
|
||||
if isInterpToken.endswith('-interpolated'):
|
||||
del tokens[-1]
|
||||
if dsNameToken:
|
||||
dsNameToken = isInterpToken + '/' + dsNameToken
|
||||
else:
|
||||
dsNameToken = isInterpToken
|
||||
|
||||
if tokens:
|
||||
basePath='::'.join(tokens)
|
||||
else:
|
||||
basePath=None
|
||||
|
||||
node = None
|
||||
if create:
|
||||
parts = name.split('/')
|
||||
grp = None
|
||||
for s in parts:
|
||||
if not grp:
|
||||
if not s:
|
||||
s = '/'
|
||||
if s == '/' or s in f.keys():
|
||||
grp = f[s]
|
||||
if basePath is None:
|
||||
node = rootNode
|
||||
if basePath in rootNode.keys():
|
||||
node = rootNode[basePath]
|
||||
else:
|
||||
grp = f.create_group(s)
|
||||
node = rootNode.create_group(basePath)
|
||||
|
||||
if dsNameToken:
|
||||
for token in dsNameToken.split('/'):
|
||||
if token in node.keys():
|
||||
node = node[token]
|
||||
else:
|
||||
if s:
|
||||
if s in grp.keys():
|
||||
grp = grp[s]
|
||||
node = node.create_group(token)
|
||||
else:
|
||||
grp = grp.create_group(s)
|
||||
else:
|
||||
if name is None or len(name.strip()) == 0:
|
||||
# if no group is specific default to base group
|
||||
grp = f['/']
|
||||
if dsNameToken:
|
||||
if basePath:
|
||||
basePath += '/' + dsNameToken
|
||||
else:
|
||||
basePath = dsNameToken
|
||||
|
||||
|
||||
try:
|
||||
group=name
|
||||
if not group.startswith('/'):
|
||||
group = '/' + group
|
||||
grp = f[group]
|
||||
if basePath:
|
||||
node = rootNode[basePath]
|
||||
else:
|
||||
node = rootNode
|
||||
except:
|
||||
raise StorageException("No group " + name + " found")
|
||||
group = None
|
||||
if groupName:
|
||||
group = groupName
|
||||
if dsName:
|
||||
group += '/' + dsName
|
||||
elif dsName:
|
||||
group = dsName
|
||||
|
||||
# check old group structure
|
||||
node = self.__getGroup(rootNode, group)
|
||||
|
||||
t1=time.time()
|
||||
if timeMap.has_key('getGroup'):
|
||||
|
@ -675,6 +730,22 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
else:
|
||||
timeMap['getGroup']=t1-t0
|
||||
|
||||
return node
|
||||
|
||||
# deprecated, should only be called in transition period
|
||||
def __getGroup(self, rootNode, name):
|
||||
if name is None or len(name.strip()) == 0:
|
||||
# if no group is specific default to base group
|
||||
grp = rootNode
|
||||
else:
|
||||
try:
|
||||
group=name
|
||||
if group.startswith('/'):
|
||||
group = group[1:]
|
||||
grp = rootNode[group]
|
||||
except:
|
||||
raise StorageException("No group " + name + " found")
|
||||
|
||||
return grp
|
||||
|
||||
def __link(self, group, linkName, dataset):
|
||||
|
@ -793,5 +864,3 @@ class H5pyDataStore(IDataStore.IDataStore):
|
|||
finally:
|
||||
if lock:
|
||||
LockManager.releaseLock(lock)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue