From 91e9a79a699cac97073717f71484e2065dc7f5f5 Mon Sep 17 00:00:00 2001 From: Max Schenkelberg Date: Tue, 24 Sep 2013 17:03:24 -0500 Subject: [PATCH] Issue #2081 Removed the special case handling of spaces in dataURIs. Provided upgrade script for data types that have underscores in their dataURIs that should be spaces. Amend: Fixed working directory issue Amend: Added comments and encoded '%' character as well as '/' Change-Id: I5297d35e1cbc33ee8d2af24da02b101bb6b766ba Former-commit-id: b9fd9b144fec12590bce7acb3584ba0fea79d794 [formerly fb6b1d3af1f7ba5c78717f23c267f7f3455cbdd1] [formerly b0416f43ba92247c66fd16208fbb1b5a4ff54f9c] [formerly e7220b37e6d8f70b0bc0a921070fa7432650d4db [formerly b0416f43ba92247c66fd16208fbb1b5a4ff54f9c [formerly c3a682e2885c81248fa0aa3f11d82f1d34ff74f7]]] Former-commit-id: e7220b37e6d8f70b0bc0a921070fa7432650d4db Former-commit-id: 6f2fb8d97387e58f6c1d8a11cbb4b5ae8bd3d627 [formerly 9243407623321f6f0ba4ac64f0653565cce6ce66] Former-commit-id: 972b7a1d3afe8308bb85b65782dab257140f1306 --- deltaScripts/14.2.1/uri_update.py | 154 ++++++++++++++++++ deltaScripts/14.2.1/uri_update.sh | 33 ++++ .../dataplugin/annotations/DataURIUtil.java | 60 +++++-- .../acars/decoder/ACARSDataAdapter.java | 6 +- 4 files changed, 235 insertions(+), 18 deletions(-) create mode 100755 deltaScripts/14.2.1/uri_update.py create mode 100755 deltaScripts/14.2.1/uri_update.sh diff --git a/deltaScripts/14.2.1/uri_update.py b/deltaScripts/14.2.1/uri_update.py new file mode 100755 index 0000000000..923ae6f24d --- /dev/null +++ b/deltaScripts/14.2.1/uri_update.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +from subprocess import Popen, PIPE +import sys +import h5py +import re +import os + +postgresCmd = "psql -U awips -d metadata -t -q -A " +hdf5loc = os.sep + "awips2" + os.sep + "edex" + os.sep + "data" + os.sep + "hdf5" + os.sep +postgres_dataURISeparator = '/' +hdf5_dataURISeparator = '::' + +ID_ID = 'id' +DATAURI_ID = 'datauri' +REFTIME_ID = 'to_char(reftime, \'YYYY-MM-DD-HH24\') as reftime' + +def printUsage(): + print "usage: (1-n times) " + sys.exit() + +def executePostgresSQL(sql): + result = Popen(postgresCmd + "-c \"" + sql + "\"", stdout=PIPE, shell=True) + retVal = [] + for line in result.stdout: + retVal.append(line.strip().split("|")) + return retVal + +def executePostgresSQLFile(file): + result = Popen(postgresCmd + "-f \"" + file + "\"", stdout=PIPE, shell=True) + retVal = [] + for line in result.stdout: + retVal.append(line.strip().split("|")) + return retVal + + +def processReplacements(plugin, replacements, hdf5Path): + columns = [ID_ID, DATAURI_ID] + hdf5Columns = [] + if hdf5Path is not None: + columns.append(REFTIME_ID) + regex = re.compile("\[([\w]+)\]") + for column in regex.findall(hdf5Path): + hdf5Columns.append(column) + if column not in columns: + columns.append(column) + + sql = "SELECT " + columns[0] + ", " + columns[1] + for i in range(2, len(columns)): + sql = sql + ", " + columns[i] + + sql = sql + " FROM " + plugin + + results = executePostgresSQL(sql) + toUpdate = [] + + id_idx = columns.index(ID_ID) + uri_idx = columns.index(DATAURI_ID) + reftime_idx = columns.index(REFTIME_ID) + + for result in results: + uri = result[uri_idx] + parts = uri.split(postgres_dataURISeparator) + update = False + for replacement in replacements: + idx = replacement[0] + 1; + find = replacement[1] + replace = replacement[2] + if parts[idx].find(find) != -1: + parts[idx] = parts[idx].replace(find, replace) + update = True + + if update: + uri = "" + for i in range(1, len(parts)): + uri = uri + postgres_dataURISeparator + parts[i] + result.append(uri) # Append new uri to results + toUpdate.append(result) + + if len(toUpdate) > 0: + hdf5_file_mapping = {} + pathIndexes = [] + for hdf5PathColumn in hdf5Columns: + pathIndexes.append(columns.index(hdf5PathColumn)) + + updateFileName = os.sep + "tmp" + os.sep + plugin + ".uri_update_sql" + update_file = open(updateFileName, "w") + for update in toUpdate: + # Write UPDATE statement to sql file + id = update[id_idx] + new_uri = update[len(update)-1] # Last entry is updated uri + update_file.write("UPDATE " + plugin + " SET " + DATAURI_ID + + "='" + new_uri + "' WHERE " + ID_ID + "=" + + id + ";\n") + + if hdf5Path is not None: + path = plugin + os.sep + for pathIndex in pathIndexes: + path = path + update[pathIndex] + os.sep + path = path + plugin + "-" + update[reftime_idx] + ".h5" + file_updates = hdf5_file_mapping.get(path, None) + if file_updates is None: + file_updates = [] + hdf5_file_mapping[path] = file_updates + file_updates.append(update) + + update_file.close() + + # Execute and delete temporary file + executePostgresSQLFile(updateFileName) + os.remove(updateFileName) + + # Create hdf5 links from new uri to old uri + for hdf5File in hdf5_file_mapping.keys(): + absolutePath = hdf5loc + hdf5File + if os.path.exists(absolutePath) == True: + h5pyFile = h5py.File(absolutePath) + for entry in hdf5_file_mapping[hdf5File]: + old_uri = entry[uri_idx].replace(postgres_dataURISeparator, hdf5_dataURISeparator)[2:] + new_uri = entry[len(entry)-1].replace(postgres_dataURISeparator, hdf5_dataURISeparator)[2:] + hasOldUri = old_uri in h5pyFile + hasNewUri = new_uri in h5pyFile + if hasOldUri and not hasNewUri: + h5pyFile[new_uri] = h5pyFile[old_uri] + else: + print "Skipping linking", old_uri, "to", new_uri + ".", hasOldUri, hasNewUri + else: + print "Skipping non-existing file:", absolutePath + +if __name__ == '__main__': + numArgs = len(sys.argv) + if numArgs < 5: + printUsage() + + pluginName = sys.argv[1] + + replacements = [] + + inc = 3 + for i in range(2, numArgs, inc): + if (i + inc) <= numArgs: + replacements.append((int(sys.argv[i]), sys.argv[i + 1], sys.argv[i + 2])) + + if len(replacements) == 0: + printUsage() + + replacementArgs = len(replacements) * inc + + hdf5Path = None + + if (2 + replacementArgs) < numArgs: + hdf5Path = sys.argv[numArgs - 1] + + processReplacements(pluginName, replacements, hdf5Path) diff --git a/deltaScripts/14.2.1/uri_update.sh b/deltaScripts/14.2.1/uri_update.sh new file mode 100755 index 0000000000..75b1475464 --- /dev/null +++ b/deltaScripts/14.2.1/uri_update.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# This script updates all tables in A2 that should contain a space or / but instead have an underscore. +# uri_update.py takes the name of the plugin to update then 1-n sequences of uri index to check, +# character to look for at that index and character to replace it with. Optional last argument is +# an hdf5 path layout for modifying the datauris in hdf5. If no need to modify hdf5, this can be +# left blank. This was done in support of Redmine DR 2333 + +DIR=`dirname $0` + +# acars will replace _ with empty string to remove extra chars +python $DIR/uri_update.py acars 2 '_' '' + +# acars is special as it also needs to update the column that has extra spaces in it +PSQL="/awips2/psql/bin/psql" +${PSQL} -U awips -d metadata -c "UPDATE acars SET tailnumber = replace(tailnumber, ' ', '')" + +# bufrua needs to replace _ with space +python $DIR/uri_update.py bufrua 4 '_' ' ' + +# intlsigmet needs to replace _ with space +python $DIR/uri_update.py intlsigmet 3 '_' ' ' + +# satellite needs to replace _ with space at index 4 and 5 +python $DIR/uri_update.py satellite 4 '_' ' ' 5 '_' ' ' '[sectorid]/[physicalelement]/' + +# svrwx needs to replace _ with encoded %2F as the field actually contains a '/' in it +python $DIR/uri_update.py svrwx 3 '_' '%2F' + +# vaa needs to rplace _ with space at index 2 and _ with encoded '/' at index 6 +python $DIR/uri_update.py vaa 2 '_' ' ' 6 '_' '%2F' + + diff --git a/edexOsgi/com.raytheon.uf.common.dataplugin/src/com/raytheon/uf/common/dataplugin/annotations/DataURIUtil.java b/edexOsgi/com.raytheon.uf.common.dataplugin/src/com/raytheon/uf/common/dataplugin/annotations/DataURIUtil.java index 7bd40cbdcf..fb5f092a62 100644 --- a/edexOsgi/com.raytheon.uf.common.dataplugin/src/com/raytheon/uf/common/dataplugin/annotations/DataURIUtil.java +++ b/edexOsgi/com.raytheon.uf.common.dataplugin/src/com/raytheon/uf/common/dataplugin/annotations/DataURIUtil.java @@ -55,6 +55,8 @@ import com.raytheon.uf.common.util.ConvertUtil; * May 15, 2013 1869 bsteffen Move uri map creation from RecordFactory. * May 16, 2013 1869 bsteffen Rewrite dataURI property mappings. * Aug 30, 2013 2298 rjpeter Make getPluginName abstract and removed setPluginName. + * Sep 24, 2013 2081 mschenke Removed special handling of spaces and only handle + * {@link DataURI#SEPARATOR} specially * * * @@ -63,19 +65,30 @@ import com.raytheon.uf.common.util.ConvertUtil; */ public class DataURIUtil { - private static final String PLUGIN_NAME_KEY = "pluginName"; + private static final String PLUGIN_NAME_KEY = PluginDataObject.PLUGIN_NAME_ID; private static final String FIELD_SEPARATOR = "."; private static final Pattern FIELD_SEPARATOR_PATTERN = Pattern.compile("[" + FIELD_SEPARATOR + "]"); - private static final Pattern SEPARATOR_PATTERN = Pattern + private static final String DATAURI_SEPARATOR_ENCODED = "%2F"; + + private static final String DATAURI_SEPARATOR_ESCAPE_CHAR = "%"; + + private static final String DATAURI_SEPARATOR_CHAR_ENCODED = "%25"; + + private static final Pattern DATAURI_SEPARATOR_ENCODED_PATTERN = Pattern + .compile(DATAURI_SEPARATOR_ENCODED); + + private static final Pattern DATAURI_SEPARATOR_PATTERN = Pattern .compile(DataURI.SEPARATOR); - private static final Pattern UNDERSCORE_PATTERN = Pattern.compile("_"); + private static final Pattern DATAURI_SEPARATED_ESCAPE_CHAR_PATTERN = Pattern + .compile(DATAURI_SEPARATOR_ESCAPE_CHAR); - private static final Pattern SPACE_PATTERN = Pattern.compile(" "); + private static final Pattern DATAURI_SEPARATOR_CHAR_ENCODED_PATTERN = Pattern + .compile(DATAURI_SEPARATOR_CHAR_ENCODED); /* * Compares two fields with the DataURI annotations based off the position. @@ -122,7 +135,7 @@ public class DataURIUtil { for (DataURIFieldAccess access : getAccess(pdo.getClass())) { addToDataURI(uri, access.getFieldValue(pdo)); } - return SPACE_PATTERN.matcher(uri).replaceAll("_"); + return uri.toString(); } /** @@ -140,22 +153,31 @@ public class DataURIUtil { for (DataURIFieldAccess access : getAccess(pluginName)) { addToDataURI(uri, dataMap.get(access.getFieldName())); } - return SPACE_PATTERN.matcher(uri).replaceAll("_"); + return uri.toString(); } /* * Properly formats an arbitrary object into a dataURI. */ private static void addToDataURI(StringBuilder uri, Object property) { - uri.append("/"); - if (property == null) { - uri.append("null"); - } else if (property instanceof Calendar) { - uri.append(TimeUtil.formatCalendar((Calendar) property)); + String propertyString; + if (property instanceof Calendar) { + propertyString = TimeUtil.formatCalendar((Calendar) property); } else { - uri.append(SEPARATOR_PATTERN.matcher(String.valueOf(property)) - .replaceAll("_")); + propertyString = String.valueOf(property); } + + // This is done so if the property actually contained '%2F' that + // wouldn't get converted to '/' when tokenized. %2F becomes %252F + // because the '%' is replaced with '%25' + String escapeCharEscaped = DATAURI_SEPARATED_ESCAPE_CHAR_PATTERN + .matcher(propertyString).replaceAll( + DATAURI_SEPARATOR_CHAR_ENCODED); + // Now replace any '/' with %2F to escape slashes in the property + String fullyEscapedProperty = DATAURI_SEPARATOR_PATTERN.matcher( + escapeCharEscaped).replaceAll(DATAURI_SEPARATOR_ENCODED); + + uri.append(DataURI.SEPARATOR).append(fullyEscapedProperty); } /** @@ -336,8 +358,15 @@ public class DataURIUtil { * Split a URI on the seperator and remove empty first element. */ private static List tokenizeURI(String dataURI) { - dataURI = UNDERSCORE_PATTERN.matcher(dataURI).replaceAll(" "); - String[] tokens = SEPARATOR_PATTERN.split(dataURI); + String[] tokens = DATAURI_SEPARATOR_PATTERN.split(dataURI); + for (int i = 0; i < tokens.length; ++i) { + // Replace %2F with '/' + tokens[i] = DATAURI_SEPARATOR_ENCODED_PATTERN.matcher(tokens[i]) + .replaceAll(DataURI.SEPARATOR); + // Convert %25 to % + tokens[i] = DATAURI_SEPARATOR_CHAR_ENCODED_PATTERN.matcher( + tokens[i]).replaceAll(DATAURI_SEPARATOR_ESCAPE_CHAR); + } return Arrays.asList(tokens).subList(1, tokens.length); } @@ -495,4 +524,5 @@ public class DataURIUtil { } } } + } diff --git a/edexOsgi/com.raytheon.uf.edex.plugin.acars/src/com/raytheon/uf/edex/plugin/acars/decoder/ACARSDataAdapter.java b/edexOsgi/com.raytheon.uf.edex.plugin.acars/src/com/raytheon/uf/edex/plugin/acars/decoder/ACARSDataAdapter.java index 98a5c84a80..e599768c5f 100644 --- a/edexOsgi/com.raytheon.uf.edex.plugin.acars/src/com/raytheon/uf/edex/plugin/acars/decoder/ACARSDataAdapter.java +++ b/edexOsgi/com.raytheon.uf.edex.plugin.acars/src/com/raytheon/uf/edex/plugin/acars/decoder/ACARSDataAdapter.java @@ -209,7 +209,7 @@ public class ACARSDataAdapter { if (tailNumber != null) { rpt = new ACARSRecord(); - rpt.setTailNumber(tailNumber); + rpt.setTailNumber(tailNumber.trim()); rpt.setLocation(loc); getReceiver(subList, rpt, 7); @@ -257,7 +257,7 @@ public class ACARSDataAdapter { if (tailNumber != null) { rpt = new ACARSRecord(); - rpt.setTailNumber(tailNumber); + rpt.setTailNumber(tailNumber.trim()); rpt.setLocation(loc); rpt.setTimeObs(timeObs); @@ -302,7 +302,7 @@ public class ACARSDataAdapter { if (tailNumber != null) { rpt = new ACARSRecord(); - rpt.setTailNumber(tailNumber); + rpt.setTailNumber(tailNumber.trim()); rpt.setLocation(loc); rpt.setTimeObs(timeObs);