Issue #1861 - re-defined modelsounding hdf5 path

Amend - forecast hours are now used in the name of the file instead of the hdf5 path. Updates
        to ensure that a file would be generated for every forecast hour; updates do not affect
        ingest time but they may have a slight affect on latency
Amend - change hdf5 file naming convention
Amend - create hard links to the original hdf5 files instead of duplicating them

Change-Id: I364bcfaabe0a2254e558e5b66588e4a26d2a2e68

Former-commit-id: 3bf459dc65ace7aa10f88f168278b16f3c37aa5f
This commit is contained in:
Bryan Kowal 2013-04-26 10:11:48 -05:00
parent b8f8f0e7b1
commit a64a65e976
12 changed files with 1128 additions and 846 deletions

View file

@ -0,0 +1,20 @@
#!/awips2/python/bin/python
import re
import sys
# we expect the filename as a command-line argument.
hdf5file = sys.argv[1]
matches = re.search(r'modelsounding-([0-9]+-[0-9]+-[0-9]+)-([0-9]+).h5', hdf5file, re.M|re.I)
if matches:
# extract the date
# extract the hour
date = matches.group(1)
hour = matches.group(2)
reftimeDirName = date + "_" + hour + ":00:00.0"
print reftimeDirName
else:
print "ERROR: unrecognized file - " + hdf5file + "!"
sys.exit(-1)

View file

@ -0,0 +1,20 @@
#!/awips2/python/bin/python
import re
import sys
# we expect the filename, model name, and forecast hour as arguments
hdf5file = sys.argv[1]
model = sys.argv[2]
forecastHour = sys.argv[3]
matches = re.search(r'modelsounding-([0-9]+-[0-9]+-[0-9]+-[0-9]+).h5', hdf5file, re.M|re.I)
if matches:
# extract the reftime
reftime = matches.group(1)
newFileName = "modelsounding-" + model + "-" + reftime + "-FH-" + str(forecastHour) + ".h5"
print newFileName
else:
print "ERROR: unrecognized file - " + hdf5file + "!"
sys.exit(-1)

View file

@ -0,0 +1,130 @@
#!/bin/bash
# DR #1846 - this update script will re-arrange the existing modelsounding hdf5 files to divide them by
# model name and site. Currently, every file will be copied to every potential path that it could be
# accessed at. But, any new files that are written after the upgrade is complete, will contain only
# the minimum amount of required data.
# ensure that we actually have modellsounding data to re-arrange
DATA_DIRECTORY="/awips2/edex/data/hdf5/modelsounding"
if [ ! -d ${DATA_DIRECTORY} ]; then
echo "INFO: No Model Sounding Data Was Found On The System!"
echo "INFO: Update Terminated ..."
exit 0
fi
# determine where we are
path_to_script=`readlink -f $0`
dir=$(dirname $path_to_script)
# first, retrieve all possible models
PSQL="/awips2/psql/bin/psql"
SQL="SELECT DISTINCT reporttype FROM awips.modelsounding ORDER BY reporttype;"
_modelslist=modelslist.txt
echo "INFO: update started."
pushd . > /dev/null 2>&1
cd ${DATA_DIRECTORY}
# retrieve the models
${PSQL} -U awips -d metadata -c "${SQL}" -t -o ${_modelslist}
if [ $? -ne 0 ]; then
echo "ERROR: Failed to retrieve the list of models."
echo "FATAL: The update has failed."
exit 1
fi
PYTHON="/awips2/python/bin/python"
_python_script="${dir}/determineRefTimeDirectory.py"
_python_script2="${dir}/modelsoundingFileName.py"
_fcsthourslist=fcsthourslist.txt
# now loop through the models
for model in `cat ${_modelslist}`; do
# create a directory for the model.
mkdir -p ${DATA_DIRECTORY}/${model}
if [ $? -ne 0 ]; then
echo "ERROR: Failed to create directory - ${DATA_DIRECTORY}/${model}!"
echo "FATAL: The update has failed."
exit 1
fi
# retrieve the potential forecast hours for the model that we are
# currently processing.
SQL_FCST_HOUR="SELECT DISTINCT (fcstSeconds / 3600) AS forecastHour FROM modelsounding WHERE reporttype = '${model}' ORDER BY forecastHour;"
${PSQL} -U awips -d metadata -c "${SQL_FCST_HOUR}" -t -o ${_fcsthourslist}
# loop through the hdf5 files
for file in `ls -1 *.h5`; do
# determine which reftime directory would be associated with the file
reftimeDirectory=`${PYTHON} ${_python_script} "${file}"`
if [ $? -ne 0 ]; then
echo "FATAL: The update has failed."
exit 1
fi
# create the reftime directory
mkdir -p "${DATA_DIRECTORY}/${model}/${reftimeDirectory}"
if [ $? -ne 0 ]; then
echo "ERROR: Failed to create directory - ${DATA_DIRECTORY}/${model}/${reftimeDirectory}!"
echo "FATAL: The update has failed."
exit 1
fi
# loop through the possible forecast hours
for fcstHour in `cat ${_fcsthourslist}`; do
# determine the new name of the file
destinationFile=`${PYTHON} ${_python_script2} "${file}" "${model}" ${fcstHour}`
if [ $? -ne 0 ]; then
echo "ERROR: Failed to determine the adjusted name of file - ${file}!"
echo "FATAL: The update has failed."
exit 1
fi
# create a link between the files
ln ${file} ${DATA_DIRECTORY}/${model}/${reftimeDirectory}/${destinationFile}
if [ $? -ne 0 ]; then
echo "ERROR: Failed create a link for ${file} to ${DATA_DIRECTORY}/${model}/${reftimeDirectory}/${destinationFile}!"
echo "FATAL: The update has failed."
exit 1
fi
done
done
rm -f ${_fcsthourslist}
if [ $? -ne 0 ]; then
echo "WARNING: Failed to remove temporary file - ${_fcsthourslist}."
fi
done
# remove the models list text file
rm -f ${_modelslist}
if [ $? -ne 0 ]; then
echo "WARNING: Failed to remove temporary file - ${_modelslist}."
fi
# remove the hdf5 files
rm -f *.h5
if [ $? -ne 0 ]; then
echo "ERROR: Failed to remove the obsolete hdf5 files!"
echo "WARNING: Removing the files manually is recommended."
fi
popd > /dev/null 2>&1
echo "INFO: the update has completed successfully!"
exit 0

View file

@ -12,7 +12,8 @@ Require-Bundle: com.raytheon.edex.common,
org.apache.commons.logging,
javax.persistence,
com.raytheon.uf.common.site;bundle-version="1.12.1174",
com.raytheon.uf.common.status;bundle-version="1.12.1174"
com.raytheon.uf.common.status;bundle-version="1.12.1174",
org.apache.commons.lang;bundle-version="2.3.0"
Export-Package: com.raytheon.edex.plugin.modelsounding,
com.raytheon.edex.plugin.modelsounding.common,
com.raytheon.edex.plugin.modelsounding.dao,

View file

@ -60,6 +60,9 @@ import com.raytheon.uf.edex.wmo.message.WMOHeader;
* 20080303 1026 jkorman Initial implementation.
* 20080408 1039 jkorman Added traceId for tracing data.
* 11/25/08 #1684 chammack Camel Refactor
* 04/29/13 #1861 bkowal Create a separate Point Data Container for
* every record so each forecast hour will
* receive a unique hdf5 file.
*
* </pre>
*
@ -148,9 +151,13 @@ public class ModelSoundingDecoder extends AbstractDecoder implements
Iterator<BUFRDataDocument> iterator = document.iterator();
List<SoundingSite> pdoList = new ArrayList<SoundingSite>();
while (iterator.hasNext()) {
/*
* Would it be better to cache the Point Data Container
* based on reftime and forecast hour?
*/
PointDataContainer container = PointDataContainer
.build(pdd);
while (iterator.hasNext()) {
SoundingSite soundingData = ModelSoundingDataAdapter
.createSoundingData(iterator, wmoHeader,
container);

View file

@ -0,0 +1,72 @@
/**
* This software was developed and / or modified by Raytheon Company,
* pursuant to Contract DG133W-05-CQ-1067 with the US Government.
*
* U.S. EXPORT CONTROLLED TECHNICAL DATA
* This software product contains export-restricted data whose
* export/transfer/disclosure is restricted by U.S. law. Dissemination
* to non-U.S. persons whether in the United States or abroad requires
* an export license or other authorization.
*
* Contractor Name: Raytheon Company
* Contractor Address: 6825 Pine Street, Suite 340
* Mail Stop B8
* Omaha, NE 68106
* 402.291.0100
*
* See the AWIPS II Master Rights File ("Master Rights File.pdf") for
* further licensing information.
**/
package com.raytheon.edex.plugin.modelsounding.common;
import com.raytheon.uf.common.dataplugin.persist.DefaultPathProvider;
import com.raytheon.uf.common.dataplugin.persist.IPersistable;
/**
* Path Provider for Model Sounding Data.
*
* <pre>
*
* SOFTWARE HISTORY
*
* Date Ticket# Engineer Description
* ------------ ---------- ----------- --------------------------
* Apr 26, 2013 bkowal Initial creation
*
* </pre>
*
* @author bkowal
* @version 1.0
*/
public class ModelSoundingPathProvider extends DefaultPathProvider {
private static final String FILENAME_SEPARATOR = "-";
private static final String FORECAST_HR_SPECIFIER = "FH";
/**
*
*/
public ModelSoundingPathProvider() {
}
@Override
public String getHDFFileName(String pluginName, IPersistable persistable) {
SoundingSite soundingSite = (SoundingSite) persistable;
long forecastHour = soundingSite.getFcstSeconds() / 3600;
StringBuilder stringBuilder = new StringBuilder(pluginName);
stringBuilder.append(FILENAME_SEPARATOR);
stringBuilder.append(soundingSite.getReportType());
stringBuilder.append(fileNameFormat.get().format(
soundingSite.getDataTime().getRefTime()));
stringBuilder.append(FILENAME_SEPARATOR);
stringBuilder.append(FORECAST_HR_SPECIFIER);
stringBuilder.append(FILENAME_SEPARATOR);
stringBuilder.append(Long.toString(forecastHour));
stringBuilder.append(DefaultPathProvider.HDF5_SUFFIX);
return stringBuilder.toString();
}
}

View file

@ -76,12 +76,8 @@ import com.vividsolutions.jts.geom.Geometry;
* Both refTime and forecastTime are included in the refTimeIndex since
* forecastTime is unlikely to be used.
*/
@org.hibernate.annotations.Table(
appliesTo = "modelsounding",
indexes = {
@Index(name = "modelsounding_refTimeIndex", columnNames = { "refTime", "forecastTime" } )
}
)
@org.hibernate.annotations.Table(appliesTo = "modelsounding", indexes = { @Index(name = "modelsounding_refTimeIndex", columnNames = {
"refTime", "forecastTime" }) })
@DynamicSerialize
@XmlAccessorType(XmlAccessType.NONE)
@XmlRootElement

View file

@ -19,8 +19,10 @@
**/
package com.raytheon.edex.plugin.modelsounding.dao;
import java.io.File;
import java.util.List;
import com.raytheon.edex.plugin.modelsounding.common.ModelSoundingPathProvider;
import com.raytheon.edex.plugin.modelsounding.common.SoundingSite;
import com.raytheon.uf.common.dataplugin.PluginException;
import com.raytheon.uf.edex.database.DataAccessLayerException;
@ -35,6 +37,10 @@ import com.raytheon.uf.edex.wmo.message.WMOHeader;
* Date Ticket# Engineer Description
* ------------ ---------- ----------- --------------------------
* 20080303 1026 jkorman Initial implementation.
* 20130426 1861 bkowal Added report type and forecast seconds as
* required keys for the hdf5 file name. Create
* a new method to generate hdf5 file names that
* will use the path provider.
*
* </pre>
*
@ -50,6 +56,7 @@ public class ModelSoundingDAO extends PointDataPluginDao<SoundingSite> {
*/
public ModelSoundingDAO(String pluginName) throws PluginException {
super(pluginName);
this.pathProvider = new ModelSoundingPathProvider();
}
/**
@ -118,7 +125,7 @@ public class ModelSoundingDAO extends PointDataPluginDao<SoundingSite> {
@Override
public String[] getKeysRequiredForFileName() {
return new String[] { "dataTime.refTime" };
return new String[] { "reportType", "dataTime.refTime", "fcstSeconds" };
}
@Override
@ -131,4 +138,12 @@ public class ModelSoundingDAO extends PointDataPluginDao<SoundingSite> {
return new SoundingSite();
}
@Override
protected String generatePointDataFileName(SoundingSite bean) {
return this.pluginName
+ File.separator
+ this.pathProvider.getHDFPath(this.pluginName, bean)
+ File.separator
+ this.pathProvider.getHDFFileName(this.pluginName, bean);
}
}

View file

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<pathKeySet>
<pathKey>
<key>reportType</key>
<order>0</order>
</pathKey>
<pathKey>
<key>dataTime.refTime</key>
<order>1</order>
</pathKey>
</pathKeySet>

View file

@ -56,6 +56,7 @@ import com.raytheon.uf.common.time.util.TimeUtil;
* ------------ ---------- ----------- --------------------------
* 1/08/09 1674 bphillip Initial creation
* 04/08/13 1293 bkowal Removed references to hdffileid.
* 04/30/13 1861 bkowal Added constant for hdf5 file suffix.
* </pre>
*
* @author bphillip
@ -65,6 +66,8 @@ public class DefaultPathProvider implements IHDFFilePathProvider {
private static final transient IUFStatusHandler statusHandler = UFStatus
.getHandler(DefaultPathProvider.class);
public static final String HDF5_SUFFIX = ".h5";
public static final ThreadLocal<SimpleDateFormat> fileNameFormat = new ThreadLocal<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {

View file

@ -70,6 +70,9 @@ import com.raytheon.uf.edex.database.plugin.PluginDao;
* Apr 13, 2009 chammack Initial creation
* Jan 14, 2013 1469 bkowal Removed the hdf5 data directory.
* Apr 15, 2013 1868 bsteffen Rewrite mergeAll in PluginDao.
* Apr 29, 2013 1861 bkowal Refactor hdf5 filename generation during reads
* into its own method so modelsounding dao can
* override it.
*
* </pre>
*
@ -487,6 +490,13 @@ public abstract class PointDataPluginDao<T extends PluginDataObject> extends
}
bm.putAll(obj);
T bean = (T) bm.getBean();
return this.generatePointDataFileName(bean);
} finally {
this.beanMapCache.offer(bm);
}
}
protected String generatePointDataFileName(T bean) {
return this.pluginName
+ File.separator
+ this.pathProvider.getHDFPath(this.pluginName,
@ -496,9 +506,6 @@ public abstract class PointDataPluginDao<T extends PluginDataObject> extends
+ DefaultPathProvider.fileNameFormat.get().format(
((PluginDataObject) bean).getDataTime()
.getRefTime()) + ".h5";
} finally {
this.beanMapCache.offer(bm);
}
}
public abstract T newObject();