Issue #2081 Removed the special case handling of spaces in dataURIs. Provided upgrade script for data types that have underscores in their dataURIs that should be spaces.

Amend: Fixed working directory issue
Amend: Added comments and encoded '%' character as well as '/'

Change-Id: I5297d35e1cbc33ee8d2af24da02b101bb6b766ba

Former-commit-id: b9fd9b144f [formerly fb6b1d3af1] [formerly b0416f43ba] [formerly b9fd9b144f [formerly fb6b1d3af1] [formerly b0416f43ba] [formerly e7220b37e6 [formerly b0416f43ba [formerly c3a682e2885c81248fa0aa3f11d82f1d34ff74f7]]]]
Former-commit-id: e7220b37e6
Former-commit-id: 91e9a79a69 [formerly 972b7a1d3a] [formerly 6f2fb8d97387e58f6c1d8a11cbb4b5ae8bd3d627 [formerly 9243407623]]
Former-commit-id: 0267ea07ae4c3611444a785bda045b792e9c1f28 [formerly 600eb7a3a6]
Former-commit-id: 9c906f67cb
This commit is contained in:
Max Schenkelberg 2013-09-24 17:03:24 -05:00
parent e00e700acd
commit 8558087ae9
4 changed files with 235 additions and 18 deletions

154
deltaScripts/14.2.1/uri_update.py Executable file
View file

@ -0,0 +1,154 @@
#!/usr/bin/env python
from subprocess import Popen, PIPE
import sys
import h5py
import re
import os
postgresCmd = "psql -U awips -d metadata -t -q -A "
hdf5loc = os.sep + "awips2" + os.sep + "edex" + os.sep + "data" + os.sep + "hdf5" + os.sep
postgres_dataURISeparator = '/'
hdf5_dataURISeparator = '::'
ID_ID = 'id'
DATAURI_ID = 'datauri'
REFTIME_ID = 'to_char(reftime, \'YYYY-MM-DD-HH24\') as reftime'
def printUsage():
print "usage: <plugin> <index1> <findAt_Index1> <replaceWith_Index1> (1-n times) <optional hdf5 file path relative to plugin name>"
sys.exit()
def executePostgresSQL(sql):
result = Popen(postgresCmd + "-c \"" + sql + "\"", stdout=PIPE, shell=True)
retVal = []
for line in result.stdout:
retVal.append(line.strip().split("|"))
return retVal
def executePostgresSQLFile(file):
result = Popen(postgresCmd + "-f \"" + file + "\"", stdout=PIPE, shell=True)
retVal = []
for line in result.stdout:
retVal.append(line.strip().split("|"))
return retVal
def processReplacements(plugin, replacements, hdf5Path):
columns = [ID_ID, DATAURI_ID]
hdf5Columns = []
if hdf5Path is not None:
columns.append(REFTIME_ID)
regex = re.compile("\[([\w]+)\]")
for column in regex.findall(hdf5Path):
hdf5Columns.append(column)
if column not in columns:
columns.append(column)
sql = "SELECT " + columns[0] + ", " + columns[1]
for i in range(2, len(columns)):
sql = sql + ", " + columns[i]
sql = sql + " FROM " + plugin
results = executePostgresSQL(sql)
toUpdate = []
id_idx = columns.index(ID_ID)
uri_idx = columns.index(DATAURI_ID)
reftime_idx = columns.index(REFTIME_ID)
for result in results:
uri = result[uri_idx]
parts = uri.split(postgres_dataURISeparator)
update = False
for replacement in replacements:
idx = replacement[0] + 1;
find = replacement[1]
replace = replacement[2]
if parts[idx].find(find) != -1:
parts[idx] = parts[idx].replace(find, replace)
update = True
if update:
uri = ""
for i in range(1, len(parts)):
uri = uri + postgres_dataURISeparator + parts[i]
result.append(uri) # Append new uri to results
toUpdate.append(result)
if len(toUpdate) > 0:
hdf5_file_mapping = {}
pathIndexes = []
for hdf5PathColumn in hdf5Columns:
pathIndexes.append(columns.index(hdf5PathColumn))
updateFileName = os.sep + "tmp" + os.sep + plugin + ".uri_update_sql"
update_file = open(updateFileName, "w")
for update in toUpdate:
# Write UPDATE statement to sql file
id = update[id_idx]
new_uri = update[len(update)-1] # Last entry is updated uri
update_file.write("UPDATE " + plugin + " SET " + DATAURI_ID
+ "='" + new_uri + "' WHERE " + ID_ID + "="
+ id + ";\n")
if hdf5Path is not None:
path = plugin + os.sep
for pathIndex in pathIndexes:
path = path + update[pathIndex] + os.sep
path = path + plugin + "-" + update[reftime_idx] + ".h5"
file_updates = hdf5_file_mapping.get(path, None)
if file_updates is None:
file_updates = []
hdf5_file_mapping[path] = file_updates
file_updates.append(update)
update_file.close()
# Execute and delete temporary file
executePostgresSQLFile(updateFileName)
os.remove(updateFileName)
# Create hdf5 links from new uri to old uri
for hdf5File in hdf5_file_mapping.keys():
absolutePath = hdf5loc + hdf5File
if os.path.exists(absolutePath) == True:
h5pyFile = h5py.File(absolutePath)
for entry in hdf5_file_mapping[hdf5File]:
old_uri = entry[uri_idx].replace(postgres_dataURISeparator, hdf5_dataURISeparator)[2:]
new_uri = entry[len(entry)-1].replace(postgres_dataURISeparator, hdf5_dataURISeparator)[2:]
hasOldUri = old_uri in h5pyFile
hasNewUri = new_uri in h5pyFile
if hasOldUri and not hasNewUri:
h5pyFile[new_uri] = h5pyFile[old_uri]
else:
print "Skipping linking", old_uri, "to", new_uri + ".", hasOldUri, hasNewUri
else:
print "Skipping non-existing file:", absolutePath
if __name__ == '__main__':
numArgs = len(sys.argv)
if numArgs < 5:
printUsage()
pluginName = sys.argv[1]
replacements = []
inc = 3
for i in range(2, numArgs, inc):
if (i + inc) <= numArgs:
replacements.append((int(sys.argv[i]), sys.argv[i + 1], sys.argv[i + 2]))
if len(replacements) == 0:
printUsage()
replacementArgs = len(replacements) * inc
hdf5Path = None
if (2 + replacementArgs) < numArgs:
hdf5Path = sys.argv[numArgs - 1]
processReplacements(pluginName, replacements, hdf5Path)

View file

@ -0,0 +1,33 @@
#!/bin/bash
# This script updates all tables in A2 that should contain a space or / but instead have an underscore.
# uri_update.py takes the name of the plugin to update then 1-n sequences of uri index to check,
# character to look for at that index and character to replace it with. Optional last argument is
# an hdf5 path layout for modifying the datauris in hdf5. If no need to modify hdf5, this can be
# left blank. This was done in support of Redmine DR 2333
DIR=`dirname $0`
# acars will replace _ with empty string to remove extra chars
python $DIR/uri_update.py acars 2 '_' ''
# acars is special as it also needs to update the column that has extra spaces in it
PSQL="/awips2/psql/bin/psql"
${PSQL} -U awips -d metadata -c "UPDATE acars SET tailnumber = replace(tailnumber, ' ', '')"
# bufrua needs to replace _ with space
python $DIR/uri_update.py bufrua 4 '_' ' '
# intlsigmet needs to replace _ with space
python $DIR/uri_update.py intlsigmet 3 '_' ' '
# satellite needs to replace _ with space at index 4 and 5
python $DIR/uri_update.py satellite 4 '_' ' ' 5 '_' ' ' '[sectorid]/[physicalelement]/'
# svrwx needs to replace _ with encoded %2F as the field actually contains a '/' in it
python $DIR/uri_update.py svrwx 3 '_' '%2F'
# vaa needs to rplace _ with space at index 2 and _ with encoded '/' at index 6
python $DIR/uri_update.py vaa 2 '_' ' ' 6 '_' '%2F'

View file

@ -55,6 +55,8 @@ import com.raytheon.uf.common.util.ConvertUtil;
* May 15, 2013 1869 bsteffen Move uri map creation from RecordFactory.
* May 16, 2013 1869 bsteffen Rewrite dataURI property mappings.
* Aug 30, 2013 2298 rjpeter Make getPluginName abstract and removed setPluginName.
* Sep 24, 2013 2081 mschenke Removed special handling of spaces and only handle
* {@link DataURI#SEPARATOR} specially
*
* </pre>
*
@ -63,19 +65,30 @@ import com.raytheon.uf.common.util.ConvertUtil;
*/
public class DataURIUtil {
private static final String PLUGIN_NAME_KEY = "pluginName";
private static final String PLUGIN_NAME_KEY = PluginDataObject.PLUGIN_NAME_ID;
private static final String FIELD_SEPARATOR = ".";
private static final Pattern FIELD_SEPARATOR_PATTERN = Pattern.compile("["
+ FIELD_SEPARATOR + "]");
private static final Pattern SEPARATOR_PATTERN = Pattern
private static final String DATAURI_SEPARATOR_ENCODED = "%2F";
private static final String DATAURI_SEPARATOR_ESCAPE_CHAR = "%";
private static final String DATAURI_SEPARATOR_CHAR_ENCODED = "%25";
private static final Pattern DATAURI_SEPARATOR_ENCODED_PATTERN = Pattern
.compile(DATAURI_SEPARATOR_ENCODED);
private static final Pattern DATAURI_SEPARATOR_PATTERN = Pattern
.compile(DataURI.SEPARATOR);
private static final Pattern UNDERSCORE_PATTERN = Pattern.compile("_");
private static final Pattern DATAURI_SEPARATED_ESCAPE_CHAR_PATTERN = Pattern
.compile(DATAURI_SEPARATOR_ESCAPE_CHAR);
private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
private static final Pattern DATAURI_SEPARATOR_CHAR_ENCODED_PATTERN = Pattern
.compile(DATAURI_SEPARATOR_CHAR_ENCODED);
/*
* Compares two fields with the DataURI annotations based off the position.
@ -122,7 +135,7 @@ public class DataURIUtil {
for (DataURIFieldAccess access : getAccess(pdo.getClass())) {
addToDataURI(uri, access.getFieldValue(pdo));
}
return SPACE_PATTERN.matcher(uri).replaceAll("_");
return uri.toString();
}
/**
@ -140,22 +153,31 @@ public class DataURIUtil {
for (DataURIFieldAccess access : getAccess(pluginName)) {
addToDataURI(uri, dataMap.get(access.getFieldName()));
}
return SPACE_PATTERN.matcher(uri).replaceAll("_");
return uri.toString();
}
/*
* Properly formats an arbitrary object into a dataURI.
*/
private static void addToDataURI(StringBuilder uri, Object property) {
uri.append("/");
if (property == null) {
uri.append("null");
} else if (property instanceof Calendar) {
uri.append(TimeUtil.formatCalendar((Calendar) property));
String propertyString;
if (property instanceof Calendar) {
propertyString = TimeUtil.formatCalendar((Calendar) property);
} else {
uri.append(SEPARATOR_PATTERN.matcher(String.valueOf(property))
.replaceAll("_"));
propertyString = String.valueOf(property);
}
// This is done so if the property actually contained '%2F' that
// wouldn't get converted to '/' when tokenized. %2F becomes %252F
// because the '%' is replaced with '%25'
String escapeCharEscaped = DATAURI_SEPARATED_ESCAPE_CHAR_PATTERN
.matcher(propertyString).replaceAll(
DATAURI_SEPARATOR_CHAR_ENCODED);
// Now replace any '/' with %2F to escape slashes in the property
String fullyEscapedProperty = DATAURI_SEPARATOR_PATTERN.matcher(
escapeCharEscaped).replaceAll(DATAURI_SEPARATOR_ENCODED);
uri.append(DataURI.SEPARATOR).append(fullyEscapedProperty);
}
/**
@ -336,8 +358,15 @@ public class DataURIUtil {
* Split a URI on the seperator and remove empty first element.
*/
private static List<String> tokenizeURI(String dataURI) {
dataURI = UNDERSCORE_PATTERN.matcher(dataURI).replaceAll(" ");
String[] tokens = SEPARATOR_PATTERN.split(dataURI);
String[] tokens = DATAURI_SEPARATOR_PATTERN.split(dataURI);
for (int i = 0; i < tokens.length; ++i) {
// Replace %2F with '/'
tokens[i] = DATAURI_SEPARATOR_ENCODED_PATTERN.matcher(tokens[i])
.replaceAll(DataURI.SEPARATOR);
// Convert %25 to %
tokens[i] = DATAURI_SEPARATOR_CHAR_ENCODED_PATTERN.matcher(
tokens[i]).replaceAll(DATAURI_SEPARATOR_ESCAPE_CHAR);
}
return Arrays.asList(tokens).subList(1, tokens.length);
}
@ -495,4 +524,5 @@ public class DataURIUtil {
}
}
}
}

View file

@ -209,7 +209,7 @@ public class ACARSDataAdapter {
if (tailNumber != null) {
rpt = new ACARSRecord();
rpt.setTailNumber(tailNumber);
rpt.setTailNumber(tailNumber.trim());
rpt.setLocation(loc);
getReceiver(subList, rpt, 7);
@ -257,7 +257,7 @@ public class ACARSDataAdapter {
if (tailNumber != null) {
rpt = new ACARSRecord();
rpt.setTailNumber(tailNumber);
rpt.setTailNumber(tailNumber.trim());
rpt.setLocation(loc);
rpt.setTimeObs(timeObs);
@ -302,7 +302,7 @@ public class ACARSDataAdapter {
if (tailNumber != null) {
rpt = new ACARSRecord();
rpt.setTailNumber(tailNumber);
rpt.setTailNumber(tailNumber.trim());
rpt.setLocation(loc);
rpt.setTimeObs(timeObs);