Issue #3470 Update wrapper to call wrapperCapture.sh on process restart

Former-commit-id: 477119b739 [formerly 477119b739 [formerly e537db61848adeea0cef18e6a769597d746d6620]]
Former-commit-id: 96a1821753
Former-commit-id: f7e0a6cf11
This commit is contained in:
Richard Peter 2014-08-07 14:01:33 -05:00
parent f8b524dbaf
commit fd18b7a134
12 changed files with 230 additions and 39 deletions

View file

@ -72,6 +72,7 @@ export SHLIB_PATH=$PROJECT/sharedlib
### End AWIPS 1 support ###
export HOSTNAME=`hostname`
export SHORT_HOSTNAME=`hostname -s`
# set Python & Java into the path
export PATH=$awips_home/bin:${JAVA_INSTALL}/bin:${PYTHON_INSTALL}/bin:$PATH

View file

@ -99,6 +99,8 @@ wrapper.java.additional.6=-Djava.util.Arrays.useLegacyMergeSort=true
# garbage collection settings
wrapper.java.additional.gc.1=-XX:+UseConcMarkSweepGC
wrapper.java.additional.gc.2=-XX:+CMSIncrementalMode
wrapper.java.additional.gc.3=-XX:+HeapDumpOnOutOfMemoryError
wrapper.java.additional.gc.4=-XX:HeapDumpPath=/data/fxa/cave/${SHORT_HOSTNAME}/
# use qpid binding URL instead of default address string format
wrapper.java.additional.qpid.1=-Dqpid.dest_syntax=BURL
@ -163,7 +165,13 @@ wrapper.java.app.mainclass=com.raytheon.uf.edex.esb.Main
# Application parameters. Add parameters as needed starting from 2
wrapper.app.parameter.2=start
wrapper.ping.timeout=300
wrapper.ping.timeout=30
# NOTE: script must be located at /awips2/qpid/bin/yajsw/scripts for it to be found
wrapper.script.ABORT=wrapperCapture.sh
wrapper.script.ABORT.timeout=120
wrapper.script.RESTART=wrapperCapture.sh
wrapper.script.RESTART.timeout=120
#********************************************************************
# Monitor the Application
@ -174,15 +182,16 @@ wrapper.java.monitor.heap.threshold.percent = 90
wrapper.java.monitor.deadlock = true
# application will be restarted and a warning message will be logged
wrapper.filter.action.deadlock.restart=${WRAPPER_DEADLOCK_ACTION}
wrapper.filter.trigger.deadlock=wrapper.java.monitor.deadlock: DEADLOCK IN THREADS:
wrapper.filter.action.deadlock=${WRAPPER_DEADLOCK_ACTION}
# restart the application if it crashes
wrapper.on_exit.default=${WRAPPER_ON_EXIT_ACTION}
# restart the application if it runs out of memory
wrapper.trigger.1=java.lang.OutOfMemoryError
wrapper.trigger.action=${WRAPPER_TRIGGER_ACTION}
wrapper.filter.trigger.oom=java.lang.OutOfMemoryError
wrapper.filter.action.oom=${WRAPPER_TRIGGER_ACTION}
#********************************************************************
#********************************************************************fil
# Wrapper Logging Properties
#********************************************************************
# Format of output for the console. (See docs for formats)

View file

@ -330,13 +330,13 @@ runJmap() {
fi
local cmd="/awips2/java/bin/jmap -dump:format=b,file=${dumpPath}"
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1 &" >> $processFile
$cmd $options $pid >> $log 2>&1 &
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1" >> $processFile
$cmd $options $pid >> $log 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
$cmd $options -F $pid >> $log 2>&1 &
$cmd $options -F $pid >> $log 2>&1
fi
}

View file

@ -0,0 +1,180 @@
#!/bin/sh
#####################################################################
# This software was developed and / or modified by Raytheon Company,
# pursuant to Contract DG133W-05-CQ-1067 with the US Government.
#
# U.S. EXPORT CONTROLLED TECHNICAL DATA
# This software product contains export-restricted data whose
# export/transfer/disclosure is restricted by U.S. law. Dissemination
# to non-U.S. persons whether in the United States or abroad requires
# an export license or other authorization.
#
# Contractor Name: Raytheon Company
# Contractor Address: 6825 Pine Street, Suite 340
# Mail Stop B8
# Omaha, NE 68106
# 402.291.0100
#
# See the AWIPS II Master Rights File ("Master Rights File.pdf") for
# further licensing information.
#####################################################################
#####################################################################
# Script for capturing data from a wrapper java process when the
# wrapper restarts the process
#
# SOFTWARE HISTORY
#
# Date Ticket# Engineer Description
# ------------- -------- ----------- --------------------------
# Aug 07, 2014 3470 rjpeter Initial creation
#
#####################################################################
# NOTE: Script must be located at /awips2/qpid/bin/yajsw/scripts for it to work
# base path to save capture data to, will create subdirectory for each server
basePath="/data/fxa/cave"
state=$1
string_state=$2
pid=$4
path_to_script=`readlink -f $0`
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: Wrapper running $path_to_script due to state transition for pid $pid. New State $state|$string_state"
# ensure directory is created and has write permissions
checkDir() {
dir="$1"
if [ ! -d "$dir" ]; then
mkdir -p $dir
if [ ! -d "$dir" ]; then
message="Unable to create qpid capture data directory\n$dir"
echo -e "Capture failed: $message"
exit 1
fi
fi
if [ ! -w "$dir" ]; then
message="Do not have write permissions to qpid capture data directory\n$dir"
echo -e "Capture failed: $message"
exit 1
fi
}
# gets top output of local server
runTop() {
local curTime=`date "+%Y%m%d_%H:%M:%S"`
echo "$curTime: Capturing top"
echo "$curTime: Capturing top" >> $processFile
local out_file="${dataPath}/top.log"
export COLUMNS=160
top -b -c -n1 >> $out_file 2>&1
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: top captured"
}
# runs jstack 10 times, if it fails will run again with -F
runJstack() {
local curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: Capturing jstacks"
local pid="$1"
local count=1
local cmd="/awips2/java/bin/jstack"
local prePath="${dataPath}/pid_${pid}_"
local log=""
while [ "$count" -le "10" ]; do
curTime=`date "+%Y%m%d_%H:%M:%S"`
log="${prePath}jstack_${count}.log"
echo "${curTime}: Running command: ${cmd} ${pid} >> ${log} 2>&1" >> $processFile
echo "Running for $curTime" >> $log
${cmd} ${pid} >> ${log} 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
curTime=`date "+%Y%m%d_%H:%M:%S"`
echo "${curTime}: jstack for $pid failed to connect, rerunning with -F" >> $processFile
${cmd} -F ${pid} >> ${log} 2>&1
fi
let "count+=1"
done
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: jstacks captured"
}
# runs jmap -heap
runJmapHeap() {
local curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: Capturing jmap -heap"
local pid=$1
local prePath="${dataPath}/pid_${pid}_"
local log="${prePath}jmapHeap.log"
local cmd="/awips2/java/bin/jmap -heap"
echo "${curTime}: Running command: $cmd $pid >> $log 2>&1" >> $processFile
$cmd $pid >> $log 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
curTime=`date "+%Y%m%d_%H:%M:%S"`
echo "${curTime}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
$cmd -F $pid >> $log 2>&1
fi
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: jmap -heap captured"
}
# runs jmap, if it fails will run again with -F
runJmap() {
local curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: Capturing jmap -dump"
local pid=$1
local prePath="${dataPath}/pid_${pid}_jmap"
local log="${prePath}.log"
local dumpPath="${prePath}.hprof"
local cmd="/awips2/java/bin/jmap -dump:format=b,file=${dumpPath}"
echo "${curTime}: Running command: $cmd $pid >> $log 2>&1" >> $processFile
$cmd $pid >> $log 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
curTime=`date "+%Y%m%d_%H:%M:%S"`
echo "${curTime}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
$cmd -F $pid >> $log 2>&1
fi
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: jmap -dump captured"
}
if [[ "$pid" != "-1" ]]; then
process=`ps -ef | grep $pid | grep java`
if [[ "$process" != "" ]]; then
hostName=`hostname -s`
dataPath="${basePath}/${hostName}/wrapperCaptureData_${curTime}_pid_$pid"
checkDir $dataPath
processFile=${dataPath}/capture_info.log
echo "Wrapper running $0 due to state transition for pid $pid. New State $state|$string_state" >> $processFile
echo "Process information:" >> $processFile
ps -ef | grep $pid >> $processFile
runTop &
runJstack $pid &
runJmapHeap $pid &
# TODO: Double check if jvm already dumped one
runJmap $pid &
wait
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: Data captured to $dataPath"
else
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: PID $pid is no longer running, nothing to capture"
fi
else
curTime=`date +%Y%m%d_%H%M%S`
echo "$curTime: PID was -1, process no longer running, nothing to capture"
fi

View file

@ -5,4 +5,5 @@ yajsw/src/main/java/org/rzo/yajsw/os/posix/bsd/AppStarter.java
yajsw/src/main/java/org/rzo/yajsw/os/posix/bsd/BSDProcess.java
yajsw/src/main/java/org/rzo/yajsw/wrapper/AbstractWrappedProcess.java
yajsw/src/main/java/org/rzo/yajsw/wrapper/WrappedJavaProcess.java
yajsw/src/main/java/org/rzo/yajsw/script/AbstractScript.java
yajsw/src/main/java/org/rzo/yajsw/script/ShellScript.java

View file

@ -81,36 +81,29 @@ public abstract class AbstractScript implements Script
synchronized public void executeWithTimeout(final String line)
{
Object result = null;
_timerTimeout = TIMER.newTimeout(new TimerTask()
{
/**
* Changed by rjpeter Aug 07, 2014.
*/
_future = EXECUTOR.submit(new Callable<Object>() {
@Override
public Object call() {
return execute(line);
}
});
public void run(Timeout arg0) throws Exception
{
log("script takes too long -> interrupt");
try
{
interrupt();
}
catch (Throwable e)
{
}
}
}
, _timeout, TimeUnit.MILLISECONDS);
_future = EXECUTOR.submit(new Callable<Object>()
{
public Object call()
{
Object result = execute(line);
if (_timerTimeout != null)
_timerTimeout.cancel();
_timerTimeout = null;
return result;
}
});
// wait for script to finish
try {
_future.get(_timeout, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
log("script " + _name + " took too long -> interrupt");
try {
interrupt();
} catch (Throwable e1) {
}
} catch (Exception e) {
}
}
/*

View file

@ -70,7 +70,14 @@ wrapper.java.additional.7=-Dqpid.broker.exceptionHandler.continue=true
# Maximum Java Heap Size (in MB)
wrapper.java.maxmemory=1536
wrapper.ping.timeout=300
wrapper.ping.interval=5
wrapper.ping.timeout=30
# NOTE: script must be located at /awips2/qpid/bin/yajsw/scripts for it to be found
wrapper.script.ABORT=wrapperCapture.sh
wrapper.script.ABORT.timeout=120
wrapper.script.RESTART=wrapperCapture.sh
wrapper.script.RESTART.timeout=120
#********************************************************************
# Monitor the Application

View file

@ -14,7 +14,7 @@ diff -crB a/qpid-java.spec b/qpid-java.spec
!
! Name: awips2-qpid-java
Version: 0.18
! Release: 4%{?dist}
! Release: 5%{?dist}
Summary: Java implementation of Apache Qpid
License: Apache Software License
Group: Development/Java