Issue #3470 Update wrapper to call wrapperCapture.sh on process restart
Former-commit-id:477119b739
[formerly477119b739
[formerly e537db61848adeea0cef18e6a769597d746d6620]] Former-commit-id:96a1821753
Former-commit-id:f7e0a6cf11
This commit is contained in:
parent
f8b524dbaf
commit
fd18b7a134
12 changed files with 230 additions and 39 deletions
|
@ -72,6 +72,7 @@ export SHLIB_PATH=$PROJECT/sharedlib
|
|||
### End AWIPS 1 support ###
|
||||
|
||||
export HOSTNAME=`hostname`
|
||||
export SHORT_HOSTNAME=`hostname -s`
|
||||
|
||||
# set Python & Java into the path
|
||||
export PATH=$awips_home/bin:${JAVA_INSTALL}/bin:${PYTHON_INSTALL}/bin:$PATH
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -99,6 +99,8 @@ wrapper.java.additional.6=-Djava.util.Arrays.useLegacyMergeSort=true
|
|||
# garbage collection settings
|
||||
wrapper.java.additional.gc.1=-XX:+UseConcMarkSweepGC
|
||||
wrapper.java.additional.gc.2=-XX:+CMSIncrementalMode
|
||||
wrapper.java.additional.gc.3=-XX:+HeapDumpOnOutOfMemoryError
|
||||
wrapper.java.additional.gc.4=-XX:HeapDumpPath=/data/fxa/cave/${SHORT_HOSTNAME}/
|
||||
|
||||
# use qpid binding URL instead of default address string format
|
||||
wrapper.java.additional.qpid.1=-Dqpid.dest_syntax=BURL
|
||||
|
@ -163,7 +165,13 @@ wrapper.java.app.mainclass=com.raytheon.uf.edex.esb.Main
|
|||
# Application parameters. Add parameters as needed starting from 2
|
||||
wrapper.app.parameter.2=start
|
||||
|
||||
wrapper.ping.timeout=300
|
||||
wrapper.ping.timeout=30
|
||||
|
||||
# NOTE: script must be located at /awips2/qpid/bin/yajsw/scripts for it to be found
|
||||
wrapper.script.ABORT=wrapperCapture.sh
|
||||
wrapper.script.ABORT.timeout=120
|
||||
wrapper.script.RESTART=wrapperCapture.sh
|
||||
wrapper.script.RESTART.timeout=120
|
||||
|
||||
#********************************************************************
|
||||
# Monitor the Application
|
||||
|
@ -174,15 +182,16 @@ wrapper.java.monitor.heap.threshold.percent = 90
|
|||
|
||||
wrapper.java.monitor.deadlock = true
|
||||
# application will be restarted and a warning message will be logged
|
||||
wrapper.filter.action.deadlock.restart=${WRAPPER_DEADLOCK_ACTION}
|
||||
wrapper.filter.trigger.deadlock=wrapper.java.monitor.deadlock: DEADLOCK IN THREADS:
|
||||
wrapper.filter.action.deadlock=${WRAPPER_DEADLOCK_ACTION}
|
||||
|
||||
# restart the application if it crashes
|
||||
wrapper.on_exit.default=${WRAPPER_ON_EXIT_ACTION}
|
||||
# restart the application if it runs out of memory
|
||||
wrapper.trigger.1=java.lang.OutOfMemoryError
|
||||
wrapper.trigger.action=${WRAPPER_TRIGGER_ACTION}
|
||||
wrapper.filter.trigger.oom=java.lang.OutOfMemoryError
|
||||
wrapper.filter.action.oom=${WRAPPER_TRIGGER_ACTION}
|
||||
|
||||
#********************************************************************
|
||||
#********************************************************************fil
|
||||
# Wrapper Logging Properties
|
||||
#********************************************************************
|
||||
# Format of output for the console. (See docs for formats)
|
||||
|
|
|
@ -330,13 +330,13 @@ runJmap() {
|
|||
fi
|
||||
|
||||
local cmd="/awips2/java/bin/jmap -dump:format=b,file=${dumpPath}"
|
||||
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1 &" >> $processFile
|
||||
$cmd $options $pid >> $log 2>&1 &
|
||||
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1" >> $processFile
|
||||
$cmd $options $pid >> $log 2>&1
|
||||
|
||||
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
||||
t1=`date "+%Y%m%d %H:%M:%S"`
|
||||
echo "${t1}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
|
||||
$cmd $options -F $pid >> $log 2>&1 &
|
||||
$cmd $options -F $pid >> $log 2>&1
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
180
javaUtilities/yajsw-scripts/wrapperCapture.sh
Executable file
180
javaUtilities/yajsw-scripts/wrapperCapture.sh
Executable file
|
@ -0,0 +1,180 @@
|
|||
#!/bin/sh
|
||||
#####################################################################
|
||||
# This software was developed and / or modified by Raytheon Company,
|
||||
# pursuant to Contract DG133W-05-CQ-1067 with the US Government.
|
||||
#
|
||||
# U.S. EXPORT CONTROLLED TECHNICAL DATA
|
||||
# This software product contains export-restricted data whose
|
||||
# export/transfer/disclosure is restricted by U.S. law. Dissemination
|
||||
# to non-U.S. persons whether in the United States or abroad requires
|
||||
# an export license or other authorization.
|
||||
#
|
||||
# Contractor Name: Raytheon Company
|
||||
# Contractor Address: 6825 Pine Street, Suite 340
|
||||
# Mail Stop B8
|
||||
# Omaha, NE 68106
|
||||
# 402.291.0100
|
||||
#
|
||||
# See the AWIPS II Master Rights File ("Master Rights File.pdf") for
|
||||
# further licensing information.
|
||||
#####################################################################
|
||||
#####################################################################
|
||||
# Script for capturing data from a wrapper java process when the
|
||||
# wrapper restarts the process
|
||||
#
|
||||
# SOFTWARE HISTORY
|
||||
#
|
||||
# Date Ticket# Engineer Description
|
||||
# ------------- -------- ----------- --------------------------
|
||||
# Aug 07, 2014 3470 rjpeter Initial creation
|
||||
#
|
||||
#####################################################################
|
||||
# NOTE: Script must be located at /awips2/qpid/bin/yajsw/scripts for it to work
|
||||
|
||||
# base path to save capture data to, will create subdirectory for each server
|
||||
basePath="/data/fxa/cave"
|
||||
|
||||
state=$1
|
||||
string_state=$2
|
||||
pid=$4
|
||||
|
||||
path_to_script=`readlink -f $0`
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: Wrapper running $path_to_script due to state transition for pid $pid. New State $state|$string_state"
|
||||
|
||||
# ensure directory is created and has write permissions
|
||||
checkDir() {
|
||||
dir="$1"
|
||||
if [ ! -d "$dir" ]; then
|
||||
mkdir -p $dir
|
||||
if [ ! -d "$dir" ]; then
|
||||
message="Unable to create qpid capture data directory\n$dir"
|
||||
echo -e "Capture failed: $message"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -w "$dir" ]; then
|
||||
message="Do not have write permissions to qpid capture data directory\n$dir"
|
||||
echo -e "Capture failed: $message"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# gets top output of local server
|
||||
runTop() {
|
||||
local curTime=`date "+%Y%m%d_%H:%M:%S"`
|
||||
echo "$curTime: Capturing top"
|
||||
echo "$curTime: Capturing top" >> $processFile
|
||||
local out_file="${dataPath}/top.log"
|
||||
export COLUMNS=160
|
||||
top -b -c -n1 >> $out_file 2>&1
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: top captured"
|
||||
}
|
||||
|
||||
# runs jstack 10 times, if it fails will run again with -F
|
||||
runJstack() {
|
||||
local curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: Capturing jstacks"
|
||||
local pid="$1"
|
||||
local count=1
|
||||
local cmd="/awips2/java/bin/jstack"
|
||||
local prePath="${dataPath}/pid_${pid}_"
|
||||
local log=""
|
||||
|
||||
while [ "$count" -le "10" ]; do
|
||||
curTime=`date "+%Y%m%d_%H:%M:%S"`
|
||||
log="${prePath}jstack_${count}.log"
|
||||
|
||||
echo "${curTime}: Running command: ${cmd} ${pid} >> ${log} 2>&1" >> $processFile
|
||||
echo "Running for $curTime" >> $log
|
||||
${cmd} ${pid} >> ${log} 2>&1
|
||||
|
||||
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
||||
curTime=`date "+%Y%m%d_%H:%M:%S"`
|
||||
echo "${curTime}: jstack for $pid failed to connect, rerunning with -F" >> $processFile
|
||||
${cmd} -F ${pid} >> ${log} 2>&1
|
||||
fi
|
||||
let "count+=1"
|
||||
done
|
||||
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: jstacks captured"
|
||||
}
|
||||
|
||||
# runs jmap -heap
|
||||
runJmapHeap() {
|
||||
local curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: Capturing jmap -heap"
|
||||
local pid=$1
|
||||
local prePath="${dataPath}/pid_${pid}_"
|
||||
|
||||
local log="${prePath}jmapHeap.log"
|
||||
local cmd="/awips2/java/bin/jmap -heap"
|
||||
echo "${curTime}: Running command: $cmd $pid >> $log 2>&1" >> $processFile
|
||||
$cmd $pid >> $log 2>&1
|
||||
|
||||
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
||||
curTime=`date "+%Y%m%d_%H:%M:%S"`
|
||||
echo "${curTime}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
|
||||
$cmd -F $pid >> $log 2>&1
|
||||
fi
|
||||
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: jmap -heap captured"
|
||||
}
|
||||
|
||||
# runs jmap, if it fails will run again with -F
|
||||
runJmap() {
|
||||
local curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: Capturing jmap -dump"
|
||||
local pid=$1
|
||||
local prePath="${dataPath}/pid_${pid}_jmap"
|
||||
|
||||
local log="${prePath}.log"
|
||||
local dumpPath="${prePath}.hprof"
|
||||
local cmd="/awips2/java/bin/jmap -dump:format=b,file=${dumpPath}"
|
||||
echo "${curTime}: Running command: $cmd $pid >> $log 2>&1" >> $processFile
|
||||
$cmd $pid >> $log 2>&1
|
||||
|
||||
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
||||
curTime=`date "+%Y%m%d_%H:%M:%S"`
|
||||
echo "${curTime}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
|
||||
$cmd -F $pid >> $log 2>&1
|
||||
fi
|
||||
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: jmap -dump captured"
|
||||
}
|
||||
|
||||
|
||||
|
||||
if [[ "$pid" != "-1" ]]; then
|
||||
process=`ps -ef | grep $pid | grep java`
|
||||
|
||||
if [[ "$process" != "" ]]; then
|
||||
hostName=`hostname -s`
|
||||
dataPath="${basePath}/${hostName}/wrapperCaptureData_${curTime}_pid_$pid"
|
||||
checkDir $dataPath
|
||||
processFile=${dataPath}/capture_info.log
|
||||
echo "Wrapper running $0 due to state transition for pid $pid. New State $state|$string_state" >> $processFile
|
||||
echo "Process information:" >> $processFile
|
||||
ps -ef | grep $pid >> $processFile
|
||||
runTop &
|
||||
runJstack $pid &
|
||||
runJmapHeap $pid &
|
||||
# TODO: Double check if jvm already dumped one
|
||||
runJmap $pid &
|
||||
wait
|
||||
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: Data captured to $dataPath"
|
||||
else
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: PID $pid is no longer running, nothing to capture"
|
||||
fi
|
||||
else
|
||||
curTime=`date +%Y%m%d_%H%M%S`
|
||||
echo "$curTime: PID was -1, process no longer running, nothing to capture"
|
||||
fi
|
|
@ -5,4 +5,5 @@ yajsw/src/main/java/org/rzo/yajsw/os/posix/bsd/AppStarter.java
|
|||
yajsw/src/main/java/org/rzo/yajsw/os/posix/bsd/BSDProcess.java
|
||||
yajsw/src/main/java/org/rzo/yajsw/wrapper/AbstractWrappedProcess.java
|
||||
yajsw/src/main/java/org/rzo/yajsw/wrapper/WrappedJavaProcess.java
|
||||
yajsw/src/main/java/org/rzo/yajsw/script/AbstractScript.java
|
||||
yajsw/src/main/java/org/rzo/yajsw/script/ShellScript.java
|
||||
|
|
|
@ -81,36 +81,29 @@ public abstract class AbstractScript implements Script
|
|||
|
||||
synchronized public void executeWithTimeout(final String line)
|
||||
{
|
||||
Object result = null;
|
||||
_timerTimeout = TIMER.newTimeout(new TimerTask()
|
||||
{
|
||||
/**
|
||||
* Changed by rjpeter Aug 07, 2014.
|
||||
*/
|
||||
_future = EXECUTOR.submit(new Callable<Object>() {
|
||||
@Override
|
||||
public Object call() {
|
||||
return execute(line);
|
||||
}
|
||||
});
|
||||
|
||||
public void run(Timeout arg0) throws Exception
|
||||
{
|
||||
log("script takes too long -> interrupt");
|
||||
try
|
||||
{
|
||||
interrupt();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
, _timeout, TimeUnit.MILLISECONDS);
|
||||
_future = EXECUTOR.submit(new Callable<Object>()
|
||||
{
|
||||
public Object call()
|
||||
{
|
||||
Object result = execute(line);
|
||||
if (_timerTimeout != null)
|
||||
_timerTimeout.cancel();
|
||||
_timerTimeout = null;
|
||||
return result;
|
||||
}
|
||||
});
|
||||
// wait for script to finish
|
||||
try {
|
||||
_future.get(_timeout, TimeUnit.MILLISECONDS);
|
||||
} catch (TimeoutException e) {
|
||||
log("script " + _name + " took too long -> interrupt");
|
||||
try {
|
||||
interrupt();
|
||||
} catch (Throwable e1) {
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
BIN
rpms/awips2.qpid/0.18/SOURCES/awips2/slf4j-log4j12-1.7.5.jar
Normal file
BIN
rpms/awips2.qpid/0.18/SOURCES/awips2/slf4j-log4j12-1.7.5.jar
Normal file
Binary file not shown.
|
@ -70,7 +70,14 @@ wrapper.java.additional.7=-Dqpid.broker.exceptionHandler.continue=true
|
|||
# Maximum Java Heap Size (in MB)
|
||||
wrapper.java.maxmemory=1536
|
||||
|
||||
wrapper.ping.timeout=300
|
||||
wrapper.ping.interval=5
|
||||
wrapper.ping.timeout=30
|
||||
|
||||
# NOTE: script must be located at /awips2/qpid/bin/yajsw/scripts for it to be found
|
||||
wrapper.script.ABORT=wrapperCapture.sh
|
||||
wrapper.script.ABORT.timeout=120
|
||||
wrapper.script.RESTART=wrapperCapture.sh
|
||||
wrapper.script.RESTART.timeout=120
|
||||
|
||||
#********************************************************************
|
||||
# Monitor the Application
|
||||
|
|
Binary file not shown.
|
@ -14,7 +14,7 @@ diff -crB a/qpid-java.spec b/qpid-java.spec
|
|||
!
|
||||
! Name: awips2-qpid-java
|
||||
Version: 0.18
|
||||
! Release: 4%{?dist}
|
||||
! Release: 5%{?dist}
|
||||
Summary: Java implementation of Apache Qpid
|
||||
License: Apache Software License
|
||||
Group: Development/Java
|
||||
|
|
Loading…
Add table
Reference in a new issue