From 47f151fe85b62af32d56eb58afac08f9ab3d2218 Mon Sep 17 00:00:00 2001 From: David Friedman Date: Wed, 3 Dec 2014 15:44:27 +0000 Subject: [PATCH] ASM #16859 - Text decoder processes collectives inefficiently Change-Id: Ib38ca77d66eccc84b7ffb64901b2eef64ee82af4 Former-commit-id: dac7434202e9d5b5aafc4784f6a9201b5a9f0394 --- .../separator/StdCollectiveSeparator.java | 68 ++++++++++++------- .../impl/separator/UACollectiveSeparator.java | 33 +++++---- .../impl/separator/WMOMessageSeparator.java | 37 +++++----- 3 files changed, 77 insertions(+), 61 deletions(-) diff --git a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/StdCollectiveSeparator.java b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/StdCollectiveSeparator.java index c8c310a063..e59db7a2d1 100644 --- a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/StdCollectiveSeparator.java +++ b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/StdCollectiveSeparator.java @@ -19,6 +19,9 @@ **/ package com.raytheon.uf.edex.plugin.text.impl.separator; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -49,6 +52,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData; * Fixed calculation of message end. * Apr 01, 2014 2915 dgilling Support re-factored TextDBStaticData. * May 14, 2014 2536 bclement moved WMO Header to common + * Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder. * * * @author jkorman @@ -115,6 +119,12 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { } } + static boolean charSeqStartsWith(CharSequence sequence, String searchString) { + return sequence.length() >= searchString.length() + && searchString.contentEquals(sequence.subSequence(0, + searchString.length())); + } + /* * (non-Javadoc) * @@ -142,14 +152,17 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { if (endIndex <= startIndex) { endIndex = rawData.length - 1; } - String rawMsg = new String(rawData, startIndex, endIndex - startIndex); - StringBuilder sb = null; - if ((rawMsg.indexOf(METAR) == 0) || (rawMsg.indexOf(SPECI) == 0)) { - productType = (rawMsg.indexOf(METAR) == 0 ? METAR : SPECI); + + CharBuffer rawMsg = Charset.forName("ISO-8859-1").decode( + ByteBuffer.wrap(rawData, startIndex, endIndex - startIndex)); + if (charSeqStartsWith(rawMsg, METAR)) { + productType = METAR; + } else if (charSeqStartsWith(rawMsg, SPECI)) { + productType = SPECI; } if ("TAF".equals(afos_id.getNnn())) { - sb = new StringBuilder(rawMsg); + StringBuilder sb = new StringBuilder(rawMsg); Matcher m = P_TAF.matcher(sb); while (m.find()) { sb.delete(m.start(), m.end()); @@ -157,13 +170,16 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { } sb.insert(0, "\n"); sb.insert(0, "TAFXXX\nTAF"); - rawMsg = sb.toString(); + rawMsg = CharBuffer.allocate(sb.length()); + rawMsg.append(sb); + rawMsg.flip(); } Matcher nnnxxxMatcher = NNNXXX.matcher(rawMsg); if (nnnxxxMatcher.find() && nnnxxxMatcher.start() == 0) { - rawMsg = rawMsg.substring(nnnxxxMatcher.end()); + rawMsg.position(rawMsg.position() + nnnxxxMatcher.end()); } - StringBuilder buffer = new StringBuilder(rawMsg); + + CharBuffer buffer = rawMsg; boolean parsing = true; while (parsing) { @@ -276,7 +292,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { // filter out junk characters while (buffer.length() > 0 && !checkCharNum(buffer.charAt(0))) { - buffer.deleteCharAt(0); + buffer.get(); } // again, trash data if it is less than 20 bytes @@ -321,9 +337,9 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { * @param XXX_id * @param parsedMsg */ - private void parseCollMsg(StringBuilder buffer, StringBuilder XXX_id, + private void parseCollMsg(CharBuffer buffer, StringBuilder XXX_id, StringBuilder parsedMsg) { - String msgId = null; + CharSequence msgId = null; // Check the status of the special case flags and if necessary, // skip the special case characters. @@ -331,8 +347,8 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { if (checkFouHeader && fouFlag) { // Get the length of the FWC header section and save section // to store at the beginning of each product. - if (buffer.charAt(0) == (char) 0x1e) { - buffer.deleteCharAt(0); + if (buffer.length() > 0 && buffer.charAt(0) == (char) 0x1e) { + buffer.get(); } if (!getTextSegment(buffer, fouHeader, OCSEP)) { @@ -344,8 +360,8 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { checkFouHeader = false; } - String blank = buffer.substring(0, - buffer.length() < 5 ? buffer.length() : 5); + String blank = buffer.subSequence(0, Math.min(5, buffer.length())) + .toString(); if (blank.equals("METAR") || blank.equals("SPECI") || blank.equals("TESTM") || blank.equals("TESTS")) { @@ -353,9 +369,9 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { return; } - buffer.deleteCharAt(0); + buffer.get(); if (buffer.charAt(0) == ' ') { - buffer.deleteCharAt(0); + buffer.get(); } reportType = blank; @@ -366,20 +382,20 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { while (buffer.length() > 0) { char c = buffer.charAt(0); if ((c == '\n') || (c == '\r')) { - buffer.deleteCharAt(0); + buffer.get(); } else { break; } } // The next test on blank uses at most three characters - blank = buffer.substring(0, buffer.length() < 3 ? buffer.length() - : 3); + blank = buffer.subSequence(0, Math.min(3, buffer.length())) + .toString(); } else if (pirFlag) { if (buffer != null) { for (int i = 0; i < buffer.length(); i++) { if (buffer.charAt(i) == '\r') { - buffer.setCharAt(i, '\n'); + buffer.put(buffer.position() + i, '\n'); } } @@ -391,7 +407,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { while (buffer.length() > 0) { char c = buffer.charAt(0); if ((c == ' ') || (c == '\n')) { - buffer.deleteCharAt(0); + buffer.get(); } else { break; } @@ -400,17 +416,17 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { } pirFlag = false; } - blank = buffer.toString(); + blank = buffer.subSequence(0, Math.min(3, buffer.length())).toString(); if (blank.startsWith("AMD") || blank.startsWith("COR")) { if (safeStrpbrk(buffer, CSPC)) { - buffer.deleteCharAt(0); + buffer.get(); } } // Skip junk characters while (buffer.length() > 0 && !(checkCharNum(buffer.charAt(0)) && (buffer.charAt(0) != EOM))) { - buffer.deleteCharAt(0); + buffer.get(); } // Grab the first word of each line to act as the XXX of the afos id. @@ -452,7 +468,7 @@ public class StdCollectiveSeparator extends WMOMessageSeparator { parsedMsg.setLength(parsedMsg.length() - 3); } else if (buffer.charAt(0) == '=') { if (safeStrpbrk(buffer, CSPL)) { - buffer.deleteCharAt(0); + buffer.get(); } } else if ((buffer.charAt(0) == EOM) && (parsedMsg.length() > (MIN_COLL_DATA_LEN - 1))) { diff --git a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/UACollectiveSeparator.java b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/UACollectiveSeparator.java index 17eaafe07e..9f022fcc29 100644 --- a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/UACollectiveSeparator.java +++ b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/UACollectiveSeparator.java @@ -19,6 +19,9 @@ **/ package com.raytheon.uf.edex.plugin.text.impl.separator; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; @@ -47,6 +50,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData; * Apr 01, 2014 2915 dgilling Support re-factored TextDBStaticData. * Apr 02, 2014 2652 skorolev Corrected a removing of excess control characters. * May 14, 2014 2536 bclement moved WMO Header to common + * Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder. * * * @author jkorman @@ -122,12 +126,13 @@ public class UACollectiveSeparator extends WMOMessageSeparator { if (endIndex <= startIndex) { endIndex = rawData.length - 1; } - String rawMsg = new String(rawData, startIndex, endIndex - startIndex); + CharBuffer rawMsg = Charset.forName("ISO-8859-1").decode( + ByteBuffer.wrap(rawData, startIndex, endIndex - startIndex)); Matcher nnnxxxMatcher = NNNXXX.matcher(rawMsg); if (nnnxxxMatcher.find() && nnnxxxMatcher.start() == 0) { - rawMsg = rawMsg.substring(nnnxxxMatcher.end()); + rawMsg.position(rawMsg.position() + nnnxxxMatcher.end()); } - StringBuilder buffer = new StringBuilder(rawMsg); + CharBuffer buffer = rawMsg; String hdrStr = wmoHdr.getWmoHeader(); String dataDes = createDataDes(wmoHdr); String origin = wmoHdr.getCccc(); @@ -213,7 +218,7 @@ public class UACollectiveSeparator extends WMOMessageSeparator { // filter out junk characters while (buffer.length() > 0 && !checkCharNum(buffer.charAt(0))) { - buffer.deleteCharAt(0); + buffer.get(); } // again, trash data if it is less than 20 bytes @@ -400,35 +405,35 @@ public class UACollectiveSeparator extends WMOMessageSeparator { // --------------------------------------------------------- // Called by the decodeUpAirMsg function. // --------------------------------------------------------------------------- - private void parseUpairMsg(StringBuilder buffer, StringBuilder stationNum, + private void parseUpairMsg(CharBuffer buffer, StringBuilder stationNum, StringBuilder parsedMsg, String dataDes) { stationNum.setLength(0); // Check each message for the \036 record separator and increment past // it. if (!checkCharNum(buffer.charAt(0))) { - buffer.deleteCharAt(0); + buffer.get(); } // Check for UEXX or UJXX formatted messages and decode if (dataDes.endsWith("XX") || dataDes.endsWith("81") || dataDes.endsWith("82")) { - stationNum.append(assignTextSegment(buffer.toString(), CSPC)); + stationNum.append(assignTextSegment(buffer, CSPC)); getTextSegment(buffer, parsedMsg, MARKERANDEOM); } // Check for USUS80 or 90 formatted messages and decode else if (dataDes.endsWith("80") || dataDes.endsWith("90")) { if (checkCharNum(buffer.charAt(0))) { - buffer.deleteCharAt(0); + buffer.get(); } else { - stationNum.append(assignTextSegment(buffer.toString(), CSPC)); + stationNum.append(assignTextSegment(buffer, CSPC)); } getTextSegment(buffer, parsedMsg, CSEP); } else { // Otherwise it's standard format so decode if (!checkCharNum(buffer.charAt(0))) { - buffer.deleteCharAt(0); + buffer.get(); } else { // Move to the third field of the message to get the station @@ -444,11 +449,11 @@ public class UACollectiveSeparator extends WMOMessageSeparator { if (len - buffer.length() >= 4) { x++; } - buffer.deleteCharAt(0); + buffer.get(); } } - stationNum.append(assignTextSegment(buffer.toString(), CSPC)); + stationNum.append(assignTextSegment(buffer, CSPC)); } getTextSegment(buffer, parsedMsg, CSEP); @@ -462,11 +467,11 @@ public class UACollectiveSeparator extends WMOMessageSeparator { trim_message(parsedMsg); } } else if (buffer.charAt(0) == '=') { - buffer.deleteCharAt(0); + buffer.get(); while (buffer.length() > 0) { char c = buffer.charAt(0); if ((c == '\n') || (c == '\r')) { - buffer.deleteCharAt(0); + buffer.get(); } else { break; } diff --git a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/WMOMessageSeparator.java b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/WMOMessageSeparator.java index 4f560d0f43..99b3a8fc12 100644 --- a/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/WMOMessageSeparator.java +++ b/edexOsgi/com.raytheon.uf.edex.plugin.text/src/com/raytheon/uf/edex/plugin/text/impl/separator/WMOMessageSeparator.java @@ -19,10 +19,12 @@ **/ package com.raytheon.uf.edex.plugin.text.impl.separator; +import java.nio.CharBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; @@ -48,6 +50,7 @@ import com.raytheon.uf.edex.plugin.text.impl.WMOReportData; * Jul 10, 2009 2191 rjpeter Reimplemented. * Mar 04, 2014 2652 skorolev Corrected NNNXXX pattern. * Mar 14, 2014 2652 skorolev Changed logging for skipped headers. + * Dec 03, 2014 ASM #16859 D. Friedman Use CharBuffer instead of StringBuilder. * * * @author @@ -317,7 +320,7 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator { * @param charSet * @return */ - static boolean safeStrpbrk(StringBuilder src, Pattern charSet) { + static boolean safeStrpbrk(CharBuffer src, Pattern charSet) { return getTextSegment(src, null, charSet); } @@ -333,24 +336,17 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator { * @param charSet * @return */ - static boolean getTextSegment(StringBuilder src, StringBuilder out, + static boolean getTextSegment(CharBuffer src, StringBuilder out, Pattern charSet) { - String s = src.toString(); - String[] sArr = charSet.split(s, 2); - + Matcher m = charSet.matcher(src); + boolean found = m.find(); + int pos = found ? m.start() : src.length(); if (out != null) { out.setLength(0); - out.append(sArr[0]); + out.append(src, 0, pos); } - - if (sArr.length != 2) { - // no pattern found - src.setLength(0); - return false; - } - - src.delete(0, sArr[0].length()); - return true; + src.position(src.position() + pos); + return found; } /** @@ -361,10 +357,9 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator { * @param charSet * @return */ - static String assignTextSegment(String src, Pattern charSet) { - String[] sArr = charSet.split(src, 2); - - return sArr[0]; + static CharSequence assignTextSegment(CharSequence src, Pattern charSet) { + Matcher m = charSet.matcher(src); + return src.subSequence(0, m.find() ? m.start() : src.length()); } // -- fileScope @@ -409,10 +404,10 @@ public abstract class WMOMessageSeparator extends AbstractRecordSeparator { public static final void main(String[] args) { - StringBuilder sb = new StringBuilder( + CharBuffer cb = CharBuffer.wrap( "\r\r\nKOFF 1912/20/15\n\n\r BECMG"); - safeStrpbrk(sb, nl); + safeStrpbrk(cb, nl); // Pattern NNNXXX = Pattern.compile("\\w{4,6}(?:\\s{1,2})?[\\r\\n]+(?:" // + (char) 0x1e + ")?");