ASM #16859 - Text decoder processes collectives inefficiently

Change-Id: Ib38ca77d66eccc84b7ffb64901b2eef64ee82af4

Former-commit-id: dac7434202e9d5b5aafc4784f6a9201b5a9f0394
This commit is contained in:
David Friedman 2014-12-03 15:44:27 +00:00
parent 8261961462
commit 47f151fe85
3 changed files with 77 additions and 61 deletions

View file

@ -19,6 +19,9 @@
**/
package com.raytheon.uf.edex.plugin.text.impl.separator;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -49,6 +52,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData;
* Fixed calculation of message end.
* Apr 01, 2014 2915 dgilling Support re-factored TextDBStaticData.
* May 14, 2014 2536 bclement moved WMO Header to common
* Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder.
* </pre>
*
* @author jkorman
@ -115,6 +119,12 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
}
}
static boolean charSeqStartsWith(CharSequence sequence, String searchString) {
return sequence.length() >= searchString.length()
&& searchString.contentEquals(sequence.subSequence(0,
searchString.length()));
}
/*
* (non-Javadoc)
*
@ -142,14 +152,17 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
if (endIndex <= startIndex) {
endIndex = rawData.length - 1;
}
String rawMsg = new String(rawData, startIndex, endIndex - startIndex);
StringBuilder sb = null;
if ((rawMsg.indexOf(METAR) == 0) || (rawMsg.indexOf(SPECI) == 0)) {
productType = (rawMsg.indexOf(METAR) == 0 ? METAR : SPECI);
CharBuffer rawMsg = Charset.forName("ISO-8859-1").decode(
ByteBuffer.wrap(rawData, startIndex, endIndex - startIndex));
if (charSeqStartsWith(rawMsg, METAR)) {
productType = METAR;
} else if (charSeqStartsWith(rawMsg, SPECI)) {
productType = SPECI;
}
if ("TAF".equals(afos_id.getNnn())) {
sb = new StringBuilder(rawMsg);
StringBuilder sb = new StringBuilder(rawMsg);
Matcher m = P_TAF.matcher(sb);
while (m.find()) {
sb.delete(m.start(), m.end());
@ -157,13 +170,16 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
}
sb.insert(0, "\n");
sb.insert(0, "TAFXXX\nTAF");
rawMsg = sb.toString();
rawMsg = CharBuffer.allocate(sb.length());
rawMsg.append(sb);
rawMsg.flip();
}
Matcher nnnxxxMatcher = NNNXXX.matcher(rawMsg);
if (nnnxxxMatcher.find() && nnnxxxMatcher.start() == 0) {
rawMsg = rawMsg.substring(nnnxxxMatcher.end());
rawMsg.position(rawMsg.position() + nnnxxxMatcher.end());
}
StringBuilder buffer = new StringBuilder(rawMsg);
CharBuffer buffer = rawMsg;
boolean parsing = true;
while (parsing) {
@ -276,7 +292,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
// filter out junk characters
while (buffer.length() > 0
&& !checkCharNum(buffer.charAt(0))) {
buffer.deleteCharAt(0);
buffer.get();
}
// again, trash data if it is less than 20 bytes
@ -321,9 +337,9 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
* @param XXX_id
* @param parsedMsg
*/
private void parseCollMsg(StringBuilder buffer, StringBuilder XXX_id,
private void parseCollMsg(CharBuffer buffer, StringBuilder XXX_id,
StringBuilder parsedMsg) {
String msgId = null;
CharSequence msgId = null;
// Check the status of the special case flags and if necessary,
// skip the special case characters.
@ -331,8 +347,8 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
if (checkFouHeader && fouFlag) {
// Get the length of the FWC header section and save section
// to store at the beginning of each product.
if (buffer.charAt(0) == (char) 0x1e) {
buffer.deleteCharAt(0);
if (buffer.length() > 0 && buffer.charAt(0) == (char) 0x1e) {
buffer.get();
}
if (!getTextSegment(buffer, fouHeader, OCSEP)) {
@ -344,8 +360,8 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
checkFouHeader = false;
}
String blank = buffer.substring(0,
buffer.length() < 5 ? buffer.length() : 5);
String blank = buffer.subSequence(0, Math.min(5, buffer.length()))
.toString();
if (blank.equals("METAR") || blank.equals("SPECI")
|| blank.equals("TESTM") || blank.equals("TESTS")) {
@ -353,9 +369,9 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
return;
}
buffer.deleteCharAt(0);
buffer.get();
if (buffer.charAt(0) == ' ') {
buffer.deleteCharAt(0);
buffer.get();
}
reportType = blank;
@ -366,20 +382,20 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
while (buffer.length() > 0) {
char c = buffer.charAt(0);
if ((c == '\n') || (c == '\r')) {
buffer.deleteCharAt(0);
buffer.get();
} else {
break;
}
}
// The next test on blank uses at most three characters
blank = buffer.substring(0, buffer.length() < 3 ? buffer.length()
: 3);
blank = buffer.subSequence(0, Math.min(3, buffer.length()))
.toString();
} else if (pirFlag) {
if (buffer != null) {
for (int i = 0; i < buffer.length(); i++) {
if (buffer.charAt(i) == '\r') {
buffer.setCharAt(i, '\n');
buffer.put(buffer.position() + i, '\n');
}
}
@ -391,7 +407,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
while (buffer.length() > 0) {
char c = buffer.charAt(0);
if ((c == ' ') || (c == '\n')) {
buffer.deleteCharAt(0);
buffer.get();
} else {
break;
}
@ -400,17 +416,17 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
}
pirFlag = false;
}
blank = buffer.toString();
blank = buffer.subSequence(0, Math.min(3, buffer.length())).toString();
if (blank.startsWith("AMD") || blank.startsWith("COR")) {
if (safeStrpbrk(buffer, CSPC)) {
buffer.deleteCharAt(0);
buffer.get();
}
}
// Skip junk characters
while (buffer.length() > 0
&& !(checkCharNum(buffer.charAt(0)) && (buffer.charAt(0) != EOM))) {
buffer.deleteCharAt(0);
buffer.get();
}
// Grab the first word of each line to act as the XXX of the afos id.
@ -452,7 +468,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator {
parsedMsg.setLength(parsedMsg.length() - 3);
} else if (buffer.charAt(0) == '=') {
if (safeStrpbrk(buffer, CSPL)) {
buffer.deleteCharAt(0);
buffer.get();
}
} else if ((buffer.charAt(0) == EOM)
&& (parsedMsg.length() > (MIN_COLL_DATA_LEN - 1))) {

View file

@ -19,6 +19,9 @@
**/
package com.raytheon.uf.edex.plugin.text.impl.separator;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@ -47,6 +50,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData;
* Apr 01, 2014 2915 dgilling Support re-factored TextDBStaticData.
* Apr 02, 2014 2652 skorolev Corrected a removing of excess control characters.
* May 14, 2014 2536 bclement moved WMO Header to common
* Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder.
* </pre>
*
* @author jkorman
@ -122,12 +126,13 @@ public class UACollectiveSeparator extends WMOMessageSeparator {
if (endIndex <= startIndex) {
endIndex = rawData.length - 1;
}
String rawMsg = new String(rawData, startIndex, endIndex - startIndex);
CharBuffer rawMsg = Charset.forName("ISO-8859-1").decode(
ByteBuffer.wrap(rawData, startIndex, endIndex - startIndex));
Matcher nnnxxxMatcher = NNNXXX.matcher(rawMsg);
if (nnnxxxMatcher.find() && nnnxxxMatcher.start() == 0) {
rawMsg = rawMsg.substring(nnnxxxMatcher.end());
rawMsg.position(rawMsg.position() + nnnxxxMatcher.end());
}
StringBuilder buffer = new StringBuilder(rawMsg);
CharBuffer buffer = rawMsg;
String hdrStr = wmoHdr.getWmoHeader();
String dataDes = createDataDes(wmoHdr);
String origin = wmoHdr.getCccc();
@ -213,7 +218,7 @@ public class UACollectiveSeparator extends WMOMessageSeparator {
// filter out junk characters
while (buffer.length() > 0
&& !checkCharNum(buffer.charAt(0))) {
buffer.deleteCharAt(0);
buffer.get();
}
// again, trash data if it is less than 20 bytes
@ -400,35 +405,35 @@ public class UACollectiveSeparator extends WMOMessageSeparator {
// ---------------------------------------------------------
// Called by the decodeUpAirMsg function.
// ---------------------------------------------------------------------------
private void parseUpairMsg(StringBuilder buffer, StringBuilder stationNum,
private void parseUpairMsg(CharBuffer buffer, StringBuilder stationNum,
StringBuilder parsedMsg, String dataDes) {
stationNum.setLength(0);
// Check each message for the \036 record separator and increment past
// it.
if (!checkCharNum(buffer.charAt(0))) {
buffer.deleteCharAt(0);
buffer.get();
}
// Check for UEXX or UJXX formatted messages and decode
if (dataDes.endsWith("XX") || dataDes.endsWith("81")
|| dataDes.endsWith("82")) {
stationNum.append(assignTextSegment(buffer.toString(), CSPC));
stationNum.append(assignTextSegment(buffer, CSPC));
getTextSegment(buffer, parsedMsg, MARKERANDEOM);
}
// Check for USUS80 or 90 formatted messages and decode
else if (dataDes.endsWith("80") || dataDes.endsWith("90")) {
if (checkCharNum(buffer.charAt(0))) {
buffer.deleteCharAt(0);
buffer.get();
} else {
stationNum.append(assignTextSegment(buffer.toString(), CSPC));
stationNum.append(assignTextSegment(buffer, CSPC));
}
getTextSegment(buffer, parsedMsg, CSEP);
} else {
// Otherwise it's standard format so decode
if (!checkCharNum(buffer.charAt(0))) {
buffer.deleteCharAt(0);
buffer.get();
} else {
// Move to the third field of the message to get the station
@ -444,11 +449,11 @@ public class UACollectiveSeparator extends WMOMessageSeparator {
if (len - buffer.length() >= 4) {
x++;
}
buffer.deleteCharAt(0);
buffer.get();
}
}
stationNum.append(assignTextSegment(buffer.toString(), CSPC));
stationNum.append(assignTextSegment(buffer, CSPC));
}
getTextSegment(buffer, parsedMsg, CSEP);
@ -462,11 +467,11 @@ public class UACollectiveSeparator extends WMOMessageSeparator {
trim_message(parsedMsg);
}
} else if (buffer.charAt(0) == '=') {
buffer.deleteCharAt(0);
buffer.get();
while (buffer.length() > 0) {
char c = buffer.charAt(0);
if ((c == '\n') || (c == '\r')) {
buffer.deleteCharAt(0);
buffer.get();
} else {
break;
}

View file

@ -19,10 +19,12 @@
**/
package com.raytheon.uf.edex.plugin.text.impl.separator;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
@ -48,6 +50,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData;
* Jul 10, 2009 2191 rjpeter Reimplemented.
* Mar 04, 2014 2652 skorolev Corrected NNNXXX pattern.
* Mar 14, 2014 2652 skorolev Changed logging for skipped headers.
* Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder.
* </pre>
*
* @author
@ -317,7 +320,7 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator {
* @param charSet
* @return
*/
static boolean safeStrpbrk(StringBuilder src, Pattern charSet) {
static boolean safeStrpbrk(CharBuffer src, Pattern charSet) {
return getTextSegment(src, null, charSet);
}
@ -333,24 +336,17 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator {
* @param charSet
* @return
*/
static boolean getTextSegment(StringBuilder src, StringBuilder out,
static boolean getTextSegment(CharBuffer src, StringBuilder out,
Pattern charSet) {
String s = src.toString();
String[] sArr = charSet.split(s, 2);
Matcher m = charSet.matcher(src);
boolean found = m.find();
int pos = found ? m.start() : src.length();
if (out != null) {
out.setLength(0);
out.append(sArr[0]);
out.append(src, 0, pos);
}
if (sArr.length != 2) {
// no pattern found
src.setLength(0);
return false;
}
src.delete(0, sArr[0].length());
return true;
src.position(src.position() + pos);
return found;
}
/**
@ -361,10 +357,9 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator {
* @param charSet
* @return
*/
static String assignTextSegment(String src, Pattern charSet) {
String[] sArr = charSet.split(src, 2);
return sArr[0];
static CharSequence assignTextSegment(CharSequence src, Pattern charSet) {
Matcher m = charSet.matcher(src);
return src.subSequence(0, m.find() ? m.start() : src.length());
}
// -- fileScope
@ -409,10 +404,10 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator {
public static final void main(String[] args) {
StringBuilder sb = new StringBuilder(
CharBuffer cb = CharBuffer.wrap(
"\r\r\nKOFF 1912/20/15\n\n\r BECMG");
safeStrpbrk(sb, nl);
safeStrpbrk(cb, nl);
// Pattern NNNXXX = Pattern.compile("\\w{4,6}(?:\\s{1,2})?[\\r\\n]+(?:"
// + (char) 0x1e + ")?");