Issue #2090 Improve the performance of Madis purging

Amend: Add note to madisPurgeRules.xml about purge keys

Change-Id: I5c0d86d63e7b725cf2b1e6419c8dfcf6ed8396ef

Former-commit-id: 994e45fdca [formerly 1570037bd3] [formerly 2aa51f1e5f] [formerly 6b228eca86 [formerly 2aa51f1e5f [formerly 3261439e5230bf9d2f2d7e5e81547ae00d924d1e]]]
Former-commit-id: 6b228eca86
Former-commit-id: 2cb2d474086b9e74384015121f31ce5a2c0387f1 [formerly c3c89ca163]
Former-commit-id: b7e3c6c36a
This commit is contained in:
Dustin Johnson 2013-06-11 14:14:09 -05:00
parent b9d6eb4cc5
commit 35de0052cc
3 changed files with 120 additions and 47 deletions

View file

@ -112,6 +112,7 @@ import com.raytheon.uf.edex.database.query.DatabaseQuery;
* May 07, 2013 1869 bsteffen Remove dataURI column from
* PluginDataObject.
* May 16, 2013 1869 bsteffen Rewrite dataURI property mappings.
* Jun 11, 2013 2090 djohnson Separate the hdf5 purge by ref time for reuse.
*
* </pre>
*
@ -1353,47 +1354,13 @@ public abstract class PluginDao extends CoreDao {
dataQuery.setMaxResults(500);
// fields for hdf5 purge
String previousFile = null;
StringBuilder pathBuilder = new StringBuilder();
do {
pdos = (List<PluginDataObject>) this.queryByCriteria(dataQuery);
if ((pdos != null) && !pdos.isEmpty()) {
this.delete(pdos);
if (trackHdf5 && (hdf5FileToUriPurged != null)) {
for (PluginDataObject pdo : pdos) {
pathBuilder.setLength(0);
IPersistable persist = (IPersistable) pdo;
pathBuilder
.append(PLUGIN_HDF5_DIR)
.append(pathProvider.getHDFPath(
this.pluginName, persist))
.append(File.separatorChar)
.append(pathProvider.getHDFFileName(
this.pluginName, persist));
String file = pathBuilder.toString();
if (trackToUri) {
List<String> uriList = hdf5FileToUriPurged
.get(file);
if (uriList == null) {
// sizing to 50 as most data types have numerous
// entries in a file
uriList = new ArrayList<String>(50);
hdf5FileToUriPurged.put(file, uriList);
}
uriList.add(file);
} else {
// only need to track file, tracking last file
// instead of constantly indexing hashMap
if (!file.equals(previousFile)) {
hdf5FileToUriPurged.put(file, null);
previousFile = file;
}
}
}
purgeHdf5ForPdos(trackToUri, hdf5FileToUriPurged, pdos);
}
results += pdos.size();
@ -1404,6 +1371,59 @@ public abstract class PluginDao extends CoreDao {
return results;
}
/**
* Purge HDF5 data for a list of PDOs. Extracted as is from
* {@link #purgeDataByRefTime} so it can be reused.
*
* @param trackToUri
* If true will track each URI that needs to be deleted from
* HDF5, if false will only track the hdf5 files that need to be
* deleted.
* @param hdf5FileToUriPurged
* Map to be populated by purgeDataByRefTime of all the hdf5
* files that need to be updated. If trackToUri is true, each
* file will have the exact data URI's to be removed from each
* file. If trackToUri is false, the map will have a null entry
* for the list and only track the files.
* @param pdos
* the pdos
*/
protected void purgeHdf5ForPdos(boolean trackToUri,
Map<String, List<String>> hdf5FileToUriPurged,
List<PluginDataObject> pdos) {
// fields for hdf5 purge
String previousFile = null;
for (PluginDataObject pdo : pdos) {
StringBuilder pathBuilder = new StringBuilder();
IPersistable persist = (IPersistable) pdo;
pathBuilder
.append(PLUGIN_HDF5_DIR)
.append(pathProvider.getHDFPath(this.pluginName, persist))
.append(File.separatorChar)
.append(pathProvider.getHDFFileName(this.pluginName,
persist));
String file = pathBuilder.toString();
if (trackToUri) {
List<String> uriList = hdf5FileToUriPurged.get(file);
if (uriList == null) {
// sizing to 50 as most data types have numerous
// entries in a file
uriList = new ArrayList<String>(50);
hdf5FileToUriPurged.put(file, uriList);
}
uriList.add(file);
} else {
// only need to track file, tracking last file
// instead of constantly indexing hashMap
if (!file.equals(previousFile)) {
hdf5FileToUriPurged.put(file, null);
previousFile = file;
}
}
}
}
/**
* Purges the HDF5 data according to the provided time and key.
*

View file

@ -23,11 +23,13 @@ package com.raytheon.uf.edex.plugin.madis;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import javax.xml.bind.JAXBException;
import com.raytheon.uf.common.dataplugin.PluginDataObject;
import com.raytheon.uf.common.dataplugin.PluginException;
import com.raytheon.uf.common.dataplugin.madis.MadisRecord;
import com.raytheon.uf.common.dataquery.db.QueryParam;
@ -36,14 +38,16 @@ import com.raytheon.uf.common.pointdata.spatial.ObStation;
import com.raytheon.uf.common.status.IUFStatusHandler;
import com.raytheon.uf.common.status.UFStatus;
import com.raytheon.uf.common.status.UFStatus.Priority;
import com.raytheon.uf.common.util.CollectionUtil;
import com.raytheon.uf.edex.database.DataAccessLayerException;
import com.raytheon.uf.edex.database.plugin.PluginDao;
import com.raytheon.uf.edex.database.query.DatabaseQuery;
import com.raytheon.uf.edex.pointdata.PointDataDbDescription;
import com.raytheon.uf.edex.pointdata.PointDataPluginDao;
import com.raytheon.uf.edex.pointdata.spatial.ObStationDao;
/**
* MadisDao MADIS data DAO
* MadisDao MADIS data DAO
*
* <pre>
*
@ -51,6 +55,7 @@ import com.raytheon.uf.edex.pointdata.spatial.ObStationDao;
* Date Ticket# Engineer Description
* ------------ ---------- ----------- --------------------------
* MAR 27, 2013 1746 dhladky MADIS data record creation
* Jun 11, 2013 2090 djohnson Override purgeDataByRefTime to improve purge performance.
*
* </pre>
*
@ -59,22 +64,22 @@ import com.raytheon.uf.edex.pointdata.spatial.ObStationDao;
*/
public class MadisDao extends PointDataPluginDao<MadisRecord> {
/** The station dao */
private ObStationDao obDao = new ObStationDao();
private static final IUFStatusHandler statusHandler = UFStatus
.getHandler(MadisDao.class);
.getHandler(MadisDao.class);
public List<?> queryBySpatialBox(double upperLeftLat, double upperLeftLon,
double lowerRightLat, double lowerRightLon)
throws DataAccessLayerException {
List<ObStation> stationList = obDao.queryBySpatialBox(upperLeftLat,
upperLeftLon, lowerRightLat, lowerRightLon);
List<String> stationNames = new ArrayList<String>();
for (ObStation ob: stationList) {
for (ObStation ob : stationList) {
stationNames.add(ob.getIcao());
}
@ -104,8 +109,8 @@ public class MadisDao extends PointDataPluginDao<MadisRecord> {
*/
public Object[] queryDataUriColumn(final String dataUri) {
String sql = "select datauri from awips.madis where datauri='" + dataUri
+ "';";
String sql = "select datauri from awips.madis where datauri='"
+ dataUri + "';";
Object[] results = executeSQLQuery(sql);
@ -128,7 +133,8 @@ public class MadisDao extends PointDataPluginDao<MadisRecord> {
} catch (JAXBException e) {
statusHandler.error("Unable to load madis Point Data Description",
e);
throw new PluginException("Unable to load madis Point Data Description!", e);
throw new PluginException(
"Unable to load madis Point Data Description!", e);
}
}
@ -138,7 +144,6 @@ public class MadisDao extends PointDataPluginDao<MadisRecord> {
return hdf5DataDescription;
}
public ObStationDao getObDao() {
return obDao;
}
@ -161,7 +166,7 @@ public class MadisDao extends PointDataPluginDao<MadisRecord> {
public MadisRecord newObject() {
return new MadisRecord();
}
/*
* (non-Javadoc)
*
@ -196,5 +201,51 @@ public class MadisDao extends PointDataPluginDao<MadisRecord> {
return dbDataDescription;
}
/**
* Overridden because {@link PluginDao} retrieves all PDO instances prior to
* purging them, in order to calculute HDF5 paths for each one. In the case
* of {@link Madis} objects, the granularity is only down to the hour level
* therefore we can just pull one, calculate the HDF5 path, and purge all
* entities without retrieving them.
*/
@Override
public int purgeDataByRefTime(Date refTime,
Map<String, String> productKeys, boolean trackHdf5,
boolean trackToUri, Map<String, List<String>> hdf5FileToUriPurged)
throws DataAccessLayerException {
DatabaseQuery dataQuery = new DatabaseQuery(this.daoClass);
if (refTime != null) {
dataQuery.addQueryParam(PURGE_VERSION_FIELD, refTime);
}
if ((productKeys != null) && (productKeys.size() > 0)) {
for (Map.Entry<String, String> pair : productKeys.entrySet()) {
dataQuery.addQueryParam(pair.getKey(), pair.getValue());
}
}
// Keep track of the old max results so we can use the same query to
// find an example pdo to get the HDF5 path
final Integer oldMaxResults = dataQuery.getMaxResults();
dataQuery.setMaxResults(1);
@SuppressWarnings("unchecked")
final List<PluginDataObject> pdos = (List<PluginDataObject>) this
.queryByCriteria(dataQuery);
if (CollectionUtil.isNullOrEmpty(pdos)) {
return 0;
}
// Restore the old max results so it targets all entities
dataQuery.setMaxResults(oldMaxResults);
int numberDeleted = this.deleteByCriteria(dataQuery);
if (trackHdf5 && (hdf5FileToUriPurged != null)) {
purgeHdf5ForPdos(trackToUri, hdf5FileToUriPurged, pdos);
}
return numberDeleted;
}
}

View file

@ -1,6 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!-- default keep 3 hours -->
<purgeRuleSet>
<!-- Purge keys must match paths keys exactly.
Please see MadisDao for details on purging. -->
<defaultRule>
<period>00-03:00:00</period>
</defaultRule>