awips2/deltaScripts/archived/future/combineH5Files.py
Richard Peter 55708f1d55 Issue #2854: Reorganize deltaScripts
Former-commit-id: 09f41a6b31 [formerly 56745c942e] [formerly 09f41a6b31 [formerly 56745c942e] [formerly c8e373c098 [formerly 469c2c597b80fd725f474e0d645656e4054fd69a]]]
Former-commit-id: c8e373c098
Former-commit-id: a526600e3e [formerly e1721cad00]
Former-commit-id: 18b4d133c7
2014-04-30 13:03:44 -05:00

82 lines
No EOL
2.7 KiB
Python

import os, subprocess, re, stat, sys, time
import h5py
matchRe = re.compile('.*?(-{1,2}\\d{6,}).h5')
def findEachGroup(group, datasetList):
if type(group) is h5py.highlevel.Group:
for g in group.keys():
findEachGroup(group[g], datasetList)
elif type(group) is h5py.highlevel.Dataset:
datasetList.append(group.name)
def processFile(filename, match):
startIndex = filename.find(match.groups()[0])
endIndex = filename.find('.h5')
reducedFilename = filename[0:startIndex] + filename[endIndex:]
if not os.path.exists(reducedFilename):
# this is the first one, just rename it
try:
os.rename(filename, reducedFilename)
except OSError, e:
print e
else:
# open the file, find the datasets
datasetList = []
hfile = None
try:
hfile = h5py.File(filename, 'r')
findEachGroup(hfile['/'], datasetList)
finally:
if hfile:
hfile.close()
fileSuccess = True
# for each dataset in the file, run h5copy it into the output file
for dataset in datasetList:
if not copy(filename, dataset, reducedFilename):
fileSuccess = False
# remove original file
if True: #if fileSuccess:
os.remove(filename)
def fileWalk(pth):
if os.path.isdir(pth):
innerFiles = os.listdir(pth)
for f in innerFiles:
fileWalk(pth + '/' + f)
else:
match = matchRe.match(pth)
if match:
processFile(pth, match)
def copy(filename, dataset, reducedFilename):
# note that this copies links as if they were real datasets, increasing the size of the output file
cmd = ['h5copy', '-p', '-i', filename, '-o', reducedFilename, '-s', dataset, '-d', dataset]
ret = subprocess.call(cmd)
success = (ret == 0)
if success:
os.chmod(reducedFilename, stat.S_IWUSR | stat.S_IWGRP | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
#print "Successfully copied filename:", filename, "dataset:", dataset
return True
else:
print "Failed to copy filename:", filename, "dataset:", dataset
return False
def main():
if len(sys.argv) < 2:
print "Please provide full path to input directory"
else:
inputDir = sys.argv[1]
t0 = time.time()
fileWalk(inputDir)
t1 = time.time()
print "Total copy time for directory", inputDir, (t1-t0), "seconds"
if __name__ == '__main__':
main()