Merge "Issue #2905 moved bufr splitter to separate utility" into development

Former-commit-id: fa3f25ba9e [formerly 5edb95ee329fdc06db19fcc753075c8a0ba51bad]
Former-commit-id: 00baa15f7a
This commit is contained in:
Richard Peter 2014-04-03 17:27:04 -05:00 committed by Gerrit Code Review
commit c3164b9f89
4 changed files with 350 additions and 172 deletions

View file

@ -0,0 +1,87 @@
/**
* This software was developed and / or modified by Raytheon Company,
* pursuant to Contract DG133W-05-CQ-1067 with the US Government.
*
* U.S. EXPORT CONTROLLED TECHNICAL DATA
* This software product contains export-restricted data whose
* export/transfer/disclosure is restricted by U.S. law. Dissemination
* to non-U.S. persons whether in the United States or abroad requires
* an export license or other authorization.
*
* Contractor Name: Raytheon Company
* Contractor Address: 6825 Pine Street, Suite 340
* Mail Stop B8
* Omaha, NE 68106
* 402.291.0100
*
* See the AWIPS II Master Rights File ("Master Rights File.pdf") for
* further licensing information.
**/
package com.raytheon.uf.common.nc.bufr;
import ucar.ma2.DataType;
import ucar.nc2.Variable;
/**
* Parsed BUFR data field value bundled with metadata
*
* <pre>
*
* SOFTWARE HISTORY
*
* Date Ticket# Engineer Description
* ------------ ---------- ----------- --------------------------
* Mar 31, 2014 2905 bclement Initial creation
*
* </pre>
*
* @author bclement
* @version 1.0
*/
public class BufrDataItem {
private final String name;
private final Object value;
private final DataType type;
private final Variable variable;
public BufrDataItem(String name, Object value, DataType type,
Variable variable) {
this.name = name;
this.value = value;
this.type = type;
this.variable = variable;
}
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @return the value
*/
public Object getValue() {
return value;
}
/**
* @return the type
*/
public DataType getType() {
return type;
}
/**
* @return the variable
*/
public Variable getVariable() {
return variable;
}
}

View file

@ -0,0 +1,132 @@
/**
* This software was developed and / or modified by Raytheon Company,
* pursuant to Contract DG133W-05-CQ-1067 with the US Government.
*
* U.S. EXPORT CONTROLLED TECHNICAL DATA
* This software product contains export-restricted data whose
* export/transfer/disclosure is restricted by U.S. law. Dissemination
* to non-U.S. persons whether in the United States or abroad requires
* an export license or other authorization.
*
* Contractor Name: Raytheon Company
* Contractor Address: 6825 Pine Street, Suite 340
* Mail Stop B8
* Omaha, NE 68106
* 402.291.0100
*
* See the AWIPS II Master Rights File ("Master Rights File.pdf") for
* further licensing information.
**/
package com.raytheon.uf.common.nc.bufr;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import ucar.nc2.iosp.bufr.writer.BufrSplitter;
import ucar.nc2.iosp.bufr.writer.BufrSplitter.Options;
import com.raytheon.uf.common.status.IUFStatusHandler;
import com.raytheon.uf.common.status.UFStatus;
/**
* Utility to split mixed-type BUFR files into separate messages. Creates a new
* BUFR file on the file system for each separate message.
*
* <pre>
*
* SOFTWARE HISTORY
*
* Date Ticket# Engineer Description
* ------------ ---------- ----------- --------------------------
* Apr 1, 2014 2905 bclement Initial creation
*
* </pre>
*
* @author bclement
* @version 1.0
*/
public class BufrFileSeparator {
private static final IUFStatusHandler log = UFStatus
.getHandler(BufrFileSeparator.class);
public static final File DEFAULT_TMP_DIR;
static {
final String edexHomeProp = "edex.home";
String baseDir = System.getProperty(edexHomeProp);
if (baseDir == null || baseDir.trim().isEmpty()) {
log.warn("Property '" + edexHomeProp
+ "' not set, defaulting to system tmp directory");
DEFAULT_TMP_DIR = new File(System.getProperty("java.io.tmpdir"));
} else {
DEFAULT_TMP_DIR = new File(baseDir + File.separator + "data",
"processing");
}
}
private static final FilenameFilter BUFR_FILTER = new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bufr");
}
};
/**
* Splits the mixed BUFR file into homogeneous BUFR files that are written
* to the file system.
*
* @param mixedBufrFile
* @return list of absolute paths to new BUFR files
* @throws IOException
*/
public static List<String> separate(File mixedBufrFile) throws IOException {
final File outputBaseDir = DEFAULT_TMP_DIR;
final String inputFile = mixedBufrFile.getAbsolutePath();
final File outputDir = getOutputDir(mixedBufrFile.getName(),
outputBaseDir);
Options options = new Options() {
@Override
public String getFileSpec() {
return inputFile;
}
@Override
public String getDirOut() {
return outputDir.getAbsolutePath();
}
};
BufrSplitter splitter = new BufrSplitter(options);
splitter.execute();
File[] files = outputDir.listFiles(BUFR_FILTER);
List<String> rval = new ArrayList<String>(files.length);
for (File f : files) {
rval.add(f.getAbsolutePath());
}
return rval;
}
/**
* Create a temporary output directory based on the input file name
*
* @param inputName
* @param outputBaseDir
* @return
*/
private static File getOutputDir(final String inputName,
final File outputBaseDir) {
String name = inputName + "-" + System.currentTimeMillis() + "-split";
File rval = new File(outputBaseDir, name);
if (rval.exists()) {
log.warn("BUFR splitter output directory already exists, is file "
+ inputName + " being processed twice?");
}
return rval;
}
}

View file

@ -20,11 +20,11 @@
package com.raytheon.uf.common.nc.bufr; package com.raytheon.uf.common.nc.bufr;
import java.io.File; import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Stack; import java.util.Stack;
import ucar.ma2.Array; import ucar.ma2.Array;
@ -37,8 +37,6 @@ import ucar.nc2.Attribute;
import ucar.nc2.NetcdfFile; import ucar.nc2.NetcdfFile;
import ucar.nc2.Structure; import ucar.nc2.Structure;
import ucar.nc2.Variable; import ucar.nc2.Variable;
import ucar.nc2.iosp.bufr.writer.BufrSplitter;
import ucar.nc2.iosp.bufr.writer.BufrSplitter.Options;
import com.raytheon.uf.common.numeric.UnsignedNumbers; import com.raytheon.uf.common.numeric.UnsignedNumbers;
import com.raytheon.uf.common.status.IUFStatusHandler; import com.raytheon.uf.common.status.IUFStatusHandler;
@ -58,6 +56,8 @@ import com.raytheon.uf.common.status.UFStatus;
* ------------ ---------- ----------- -------------------------- * ------------ ---------- ----------- --------------------------
* Mar 18, 2014 2905 bclement Initial creation * Mar 18, 2014 2905 bclement Initial creation
* Mar 26, 2014 2905 bclement fixed types, added scale/offset * Mar 26, 2014 2905 bclement fixed types, added scale/offset
* Apr 01, 2014 2905 bclement moved splitter functionality to separate utility
* added scanForStructField()
* *
* </pre> * </pre>
* *
@ -79,30 +79,11 @@ public class BufrParser {
private static final IUFStatusHandler log = UFStatus private static final IUFStatusHandler log = UFStatus
.getHandler(BufrParser.class); .getHandler(BufrParser.class);
private final static File DEFAULT_TMP_DIR; private final File bufrFile;
static { private final NetcdfFile ncfile;
final String edexHomeProp = "edex.home";
String baseDir = System.getProperty(edexHomeProp);
if (baseDir == null || baseDir.trim().isEmpty()) {
log.warn("Property '" + edexHomeProp
+ "' not set, defaulting to system tmp directory");
DEFAULT_TMP_DIR = new File(System.getProperty("java.io.tmpdir"));
} else {
DEFAULT_TMP_DIR = new File(baseDir + File.separator + "data",
"processing");
}
}
private final Options options; private final Iterator<Variable> varIter;
private final File[] splitFiles;
private int fileIndex = 0;
private NetcdfFile currentNcfile;
private Iterator<Variable> varIter;
private Variable currentVar; private Variable currentVar;
@ -118,59 +99,9 @@ public class BufrParser {
* @throws IOException * @throws IOException
*/ */
public BufrParser(final File bufrFile) throws IOException { public BufrParser(final File bufrFile) throws IOException {
this(bufrFile, DEFAULT_TMP_DIR); this.bufrFile = bufrFile;
} this.ncfile = NetcdfFile.open(bufrFile.getAbsolutePath());
this.varIter = ncfile.getVariables().iterator();
/**
* @param bufrFile
* BUFR file, may contain mixed message types
* @param outputBaseDir
* base directory for temporary storage of split files
* @throws IOException
*/
public BufrParser(final File bufrFile, final File outputBaseDir)
throws IOException {
final String inputFile = bufrFile.getAbsolutePath();
final File outputDir = getOutputDir(bufrFile.getName(), outputBaseDir);
options = new Options() {
@Override
public String getFileSpec() {
return inputFile;
}
@Override
public String getDirOut() {
return outputDir.getAbsolutePath();
}
};
BufrSplitter splitter = new BufrSplitter(options);
splitter.execute();
splitFiles = outputDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bufr");
}
});
}
/**
* Create a temporary output directory based on the input file name
*
* @param inputName
* @param outputBaseDir
* @return
*/
private static File getOutputDir(final String inputName,
final File outputBaseDir) {
String name = inputName + "-" + System.currentTimeMillis() + "-split";
File rval = new File(outputBaseDir, name);
if (rval.exists()) {
log.warn("BUFR splitter output directory already exists, is a file being processed twice?");
}
return rval;
} }
/** /**
@ -178,6 +109,10 @@ public class BufrParser {
* @throws IOException * @throws IOException
*/ */
public boolean hasNext() throws IOException { public boolean hasNext() throws IOException {
if (lastEvent == null) {
/* we haven't started the file yet */
return true;
}
if (!structStack.isEmpty()) { if (!structStack.isEmpty()) {
StructureLevel level = structStack.peek(); StructureLevel level = structStack.peek();
if (level.hasNext()) { if (level.hasNext()) {
@ -190,11 +125,8 @@ public class BufrParser {
if (varIter != null && varIter.hasNext()) { if (varIter != null && varIter.hasNext()) {
return true; return true;
} }
if (fileIndex < splitFiles.length) {
return true;
}
if (lastEvent != null && !lastEvent.equals(Event.END_FILE)) { if (lastEvent != null && !lastEvent.equals(Event.END_FILE)) {
/* only one more event left, the end of the last file */ /* only one more event left, the end of the file */
return true; return true;
} }
return false; return false;
@ -209,7 +141,9 @@ public class BufrParser {
*/ */
public Event next() throws IOException { public Event next() throws IOException {
Event rval; Event rval;
if (!structStack.isEmpty()) { if (lastEvent == null) {
rval = Event.START_FILE;
} else if (!structStack.isEmpty()) {
rval = nextMember(); rval = nextMember();
} else if (structIter != null && structIter.hasNext()) { } else if (structIter != null && structIter.hasNext()) {
/* in a variable with a sequence of structures, get the next one */ /* in a variable with a sequence of structures, get the next one */
@ -229,14 +163,8 @@ public class BufrParser {
/* no more variables, we are at the end of the bufr file */ /* no more variables, we are at the end of the bufr file */
structIter = null; structIter = null;
currentVar = null; currentVar = null;
varIter = null; rval = Event.END_FILE;
rval = endFile();
} }
} else if (fileIndex < splitFiles.length) {
/* start the next bufr file */
rval = startFile();
} else if (lastEvent != null && !lastEvent.equals(Event.END_FILE)) {
rval = endFile();
} else { } else {
/* don't set rval to null so we preserve the correct lastEvent */ /* don't set rval to null so we preserve the correct lastEvent */
return null; return null;
@ -323,10 +251,9 @@ public class BufrParser {
log.error("Structure variable members out of sync"); log.error("Structure variable members out of sync");
throw new IllegalStateException("Structure variable members out of sync"); throw new IllegalStateException("Structure variable members out of sync");
} }
Iterator<Variable> memberVarIter = ((Structure) parentVar) List<Variable> memberVars = ((Structure) parentVar).getVariables();
.getVariables().iterator();
return startStructure(new StructureLevel(childData, childMembers, return startStructure(new StructureLevel(childData, childMembers,
memberVarIter)); memberVars));
} }
/** /**
@ -340,8 +267,7 @@ public class BufrParser {
*/ */
private Event startStructure(Structure s, StructureData structData) private Event startStructure(Structure s, StructureData structData)
throws IOException { throws IOException {
StructureLevel level = new StructureLevel(structData, s.getVariables() StructureLevel level = new StructureLevel(structData, s.getVariables());
.iterator());
return startStructure(level); return startStructure(level);
} }
@ -400,49 +326,17 @@ public class BufrParser {
} }
/** /**
* Start processing the next NetCDF file * @return the NetCDF File
*
* @return
* @throws IOException
*/ */
private Event startFile() throws IOException { public NetcdfFile getNcfile() {
File f = splitFiles[fileIndex]; return ncfile;
fileIndex += 1;
currentNcfile = NetcdfFile.open(f.getAbsolutePath());
varIter = currentNcfile.getVariables().iterator();
return Event.START_FILE;
} }
/** /**
* Finalize processing of NetCDF file * @return BUFR file being processed
*
* @return
* @throws IOException
*/ */
private Event endFile() throws IOException { public File getFile() {
if (currentNcfile != null) { return bufrFile;
currentNcfile.close();
currentNcfile = null;
}
return Event.END_FILE;
}
/**
* @return null if no file is currently being processed
*/
public NetcdfFile getCurrentNcfile() {
return currentNcfile;
}
/**
* @return null if no file has started being processed
*/
public File getCurrentFile() {
if (fileIndex < splitFiles.length) {
return splitFiles[fileIndex];
} else {
return null;
}
} }
/** /**
@ -534,7 +428,7 @@ public class BufrParser {
* @param var * @param var
* @return * @return
*/ */
private DataType getUnscaledDataType(Variable var) { private static DataType getUnscaledDataType(Variable var) {
DataType rval; DataType rval;
/* /*
* We will promote unsigned values to the next largest signed type * We will promote unsigned values to the next largest signed type
@ -566,7 +460,7 @@ public class BufrParser {
* @param var * @param var
* @return true if the field has a scale factor or addition offset * @return true if the field has a scale factor or addition offset
*/ */
private boolean isScaledOrOffset(Variable var) { private static boolean isScaledOrOffset(Variable var) {
return var.findAttribute(OFFSET_ATTRIB) != null return var.findAttribute(OFFSET_ATTRIB) != null
|| var.findAttribute(SCALE_FACTOR_ATTRIB) != null; || var.findAttribute(SCALE_FACTOR_ATTRIB) != null;
} }
@ -583,6 +477,22 @@ public class BufrParser {
if (typedArray == null) { if (typedArray == null) {
return null; return null;
} }
Variable var = getFieldVariable();
return getFieldScalarValue(typedArray, var, charArrayAsString);
}
/**
* @param typedArray
* storage for field value
* @param var
* NetCDF variable
* @param charArrayAsString
* true if character arrays should be treated as strings
* @return null if value is a missing value
* @throws IOException
*/
private static Object getFieldScalarValue(TypedArray typedArray,
Variable var, boolean charArrayAsString) {
Array array = typedArray.array; Array array = typedArray.array;
DataType type = typedArray.type; DataType type = typedArray.type;
Object value; Object value;
@ -598,23 +508,23 @@ public class BufrParser {
value = array.getObject(0); value = array.getObject(0);
} }
return processValue(value); return processValue(value, var);
} }
/** /**
* Perform any promotion, scaling or missing value operations * Perform any promotion, scaling or missing value operations
* *
* @param value * @param value
* @param var
* @return * @return
*/ */
private Object processValue(Object value) { private static Object processValue(Object value, Variable var) {
Variable var = getFieldVariable(); Object rval = promoteValueType(value, var);
Object rval = promoteValueType(var, value); if (isMissingValue(rval, var)) {
if (isMissingValue(var, rval)) {
rval = null; rval = null;
} else if (isScaledOrOffset(var)) { } else if (isScaledOrOffset(var)) {
if (value instanceof Number) { if (value instanceof Number) {
rval = scaleAndOffset((Number) rval); rval = scaleAndOffset((Number) rval, var);
} else { } else {
log.warn("Scale or offset attribute on non-numerical field: " log.warn("Scale or offset attribute on non-numerical field: "
+ var.getFullName()); + var.getFullName());
@ -626,11 +536,11 @@ public class BufrParser {
/** /**
* Promote unsigned numbers to next largest data type if needed * Promote unsigned numbers to next largest data type if needed
* *
* @param var
* @param value * @param value
* @param var
* @return * @return
*/ */
private Object promoteValueType(Variable var, Object value) { private static Object promoteValueType(Object value, Variable var) {
if (value == null) { if (value == null) {
return null; return null;
} }
@ -660,14 +570,15 @@ public class BufrParser {
* Apply scale factor or addition offset if present * Apply scale factor or addition offset if present
* *
* @param value * @param value
* @param var
* @return * @return
*/ */
public Number scaleAndOffset(Number value) { public static Number scaleAndOffset(Number value, Variable var) {
Number scaleFactor = getScaleFactor(); Number scaleFactor = getFieldAttributeAsNum(SCALE_FACTOR_ATTRIB, var);
if (scaleFactor != null) { if (scaleFactor != null) {
value = value.doubleValue() * scaleFactor.doubleValue(); value = value.doubleValue() * scaleFactor.doubleValue();
} }
Number offset = getOffset(); Number offset = getFieldAttributeAsNum(OFFSET_ATTRIB, var);
if (offset != null) { if (offset != null) {
value = value.doubleValue() + offset.doubleValue(); value = value.doubleValue() + offset.doubleValue();
} }
@ -687,11 +598,12 @@ public class BufrParser {
if (typedArray == null) { if (typedArray == null) {
return null; return null;
} }
Variable var = getFieldVariable();
Array array = typedArray.array; Array array = typedArray.array;
int len = (int) array.getSize(); int len = (int) array.getSize();
Collection<Object> rval = new ArrayList<Object>(len); Collection<Object> rval = new ArrayList<Object>(len);
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
rval.add(processValue(array.getObject(i))); rval.add(processValue(array.getObject(i), var));
} }
return rval; return rval;
} }
@ -777,6 +689,22 @@ public class BufrParser {
return rval; return rval;
} }
/**
* Get attribute value for field from variable
*
* @param name
* @param var
* @return
*/
private static Number getFieldAttributeAsNum(String name, Variable var) {
Number rval = null;
Attribute attr = var.findAttributeIgnoreCase(name);
if (attr != null) {
rval = attr.getNumericValue();
}
return rval;
}
/** /**
* Get variable object for current field * Get variable object for current field
* *
@ -794,17 +722,18 @@ public class BufrParser {
} }
/** /**
* @param var
* @param unscaledValue * @param unscaledValue
* field value before any scaling or offset is applied * field value before any scaling or offset is applied
* @param var
* @return true if value matches the missing value for field * @return true if value matches the missing value for field
*/ */
private boolean isMissingValue(Variable var, Object unscaledValue) { private static boolean isMissingValue(Object unscaledValue, Variable var) {
if (unscaledValue == null) { if (unscaledValue == null) {
return true; return true;
} }
boolean rval; boolean rval;
Attribute missingAttrib = getFieldAttribute(MISSING_VAL_ATTRIB); Attribute missingAttrib = var
.findAttributeIgnoreCase(MISSING_VAL_ATTRIB);
if (missingAttrib == null) { if (missingAttrib == null) {
/* if there is no special missing value, all values are valid */ /* if there is no special missing value, all values are valid */
rval = false; rval = false;
@ -851,19 +780,41 @@ public class BufrParser {
} }
/** /**
* clean up temporary files * Get field from current structure level. Does not affect the current state
* of the parser. Only searches current level (does not go into
* substructures).
*
* @param fieldName
* @param charArrayAsString
* @return null if no field found or parser is not currently parsing a
* structure
*/ */
public void clean() { public BufrDataItem scanForStructField(String fieldName, boolean charArrayAsString) {
for (File f : splitFiles) { BufrDataItem rval = null;
if (!f.delete()) { if ( structStack.isEmpty()){
log.error("Unable to delete temporary file: " return rval;
+ f.getAbsolutePath());
} }
} StructureLevel level = structStack.peek();
File outdir = new File(options.getDirOut()); Iterator<Member> memberIter = level.getMemberList().iterator();
if (!outdir.delete()) { Iterator<Variable> varIter = level.getMemberVarList().iterator();
log.error("Unable to delete temporary directory: " while (memberIter.hasNext() && varIter.hasNext()) {
+ outdir.getAbsolutePath()); Member member = memberIter.next();
Variable variable = varIter.next();
DataType type = member.getDataType();
if (!type.equals(DataType.STRUCTURE) && !type.equals(DataType.SEQUENCE)) {
/* current member is a field */
if (member.getName().equals(fieldName)){
StructureData sd = level.getStructData();
Array array = sd.getArray(member);
Object value = getFieldScalarValue(new TypedArray(array,
type), variable, charArrayAsString);
rval = new BufrDataItem(member.getName(), value, type,
variable);
break;
} }
} }
} }
return rval;
}
}

View file

@ -20,6 +20,7 @@
package com.raytheon.uf.common.nc.bufr; package com.raytheon.uf.common.nc.bufr;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import ucar.ma2.ArraySequence; import ucar.ma2.ArraySequence;
import ucar.ma2.StructureData; import ucar.ma2.StructureData;
@ -40,6 +41,7 @@ import com.raytheon.uf.common.status.UFStatus;
* Date Ticket# Engineer Description * Date Ticket# Engineer Description
* ------------ ---------- ----------- -------------------------- * ------------ ---------- ----------- --------------------------
* Mar 25, 2014 2905 bclement Initial creation * Mar 25, 2014 2905 bclement Initial creation
* Apr 01, 2014 2905 bclement added references to member and variable lists
* *
* </pre> * </pre>
* *
@ -53,8 +55,12 @@ public class StructureLevel {
private final StructureData structData; private final StructureData structData;
private final List<Member> memberList;
private final Iterator<Member> memberIter; private final Iterator<Member> memberIter;
private final List<Variable> memberVarList;
private final Iterator<Variable> memberVarIter; private final Iterator<Variable> memberVarIter;
private Member currentMember; private Member currentMember;
@ -66,8 +72,8 @@ public class StructureLevel {
* @param memberVarIter * @param memberVarIter
*/ */
public StructureLevel(StructureData structData, public StructureLevel(StructureData structData,
Iterator<Variable> memberVarIter) { List<Variable> memberVariables) {
this(structData, structData.getStructureMembers(), memberVarIter); this(structData, structData.getStructureMembers(), memberVariables);
} }
/** /**
@ -76,10 +82,12 @@ public class StructureLevel {
* @param memberVarIter * @param memberVarIter
*/ */
public StructureLevel(StructureData structData, StructureMembers members, public StructureLevel(StructureData structData, StructureMembers members,
Iterator<Variable> memberVarIter) { List<Variable> memberVariables) {
this.structData = structData; this.structData = structData;
this.memberIter = members.getMembers().iterator(); this.memberList = members.getMembers();
this.memberVarIter = memberVarIter; this.memberIter = memberList.iterator();
this.memberVarList = memberVariables;
this.memberVarIter = memberVarList.iterator();
} }
/** /**
@ -126,17 +134,17 @@ public class StructureLevel {
} }
/** /**
* @return the memberIter * @return the memberList
*/ */
public Iterator<Member> getMemberIter() { public List<Member> getMemberList() {
return memberIter; return memberList;
} }
/** /**
* @return the memberVarIter * @return the memberVarList
*/ */
public Iterator<Variable> getMemberVarIter() { public List<Variable> getMemberVarList() {
return memberVarIter; return memberVarList;
} }
/** /**