697 lines
20 KiB
Bash
Executable file
697 lines
20 KiB
Bash
Executable file
#!/bin/bash
|
|
# base path to save capture data to, will create subdirectory for each workstation
|
|
basePath="/awips2/fxa/cave"
|
|
|
|
edexGrepString="edex.run.mode="
|
|
|
|
xorgLogPath="/var/log"
|
|
|
|
# the database host to grab current running queries for
|
|
DATABASE_HOST="localhost"
|
|
|
|
# Flags to control what data capture grabs, to enable flag must be YES, anything else will be considered off.
|
|
RUN_JSTACK="Y"
|
|
JSTACK_ITERATIONS="15"
|
|
RUN_JMAP="Y"
|
|
RUN_QPID_STAT="Y"
|
|
MOVE_ALL_HS_ERR_PID="Y"
|
|
GRAB_CAVE_AND_ALERTVIZ_LOGS="Y"
|
|
GRAB_SCREENSHOT="Y"
|
|
GRAB_CURRENT_QUERIES="Y"
|
|
GRAB_XORG_LOG="Y"
|
|
EDEX_MODE="N"
|
|
FORCE="N"
|
|
TGZ_OUTPUT="Y"
|
|
RUN_VERSIONS="Y"
|
|
ACCCUM="N"
|
|
|
|
cavePid=""
|
|
edexProcCount=0
|
|
|
|
# print usage message
|
|
usage() {
|
|
echo "Script for capturing information about cave/edex and general server health."
|
|
echo
|
|
echo "Following options allowed"
|
|
echo -e "-quick"
|
|
echo " Turns off jmap and reduces jstack iterations to 5"
|
|
echo
|
|
echo -e "-d {y/n}\t\tdefault [$RUN_JMAP]"
|
|
echo " Run jmap to grab the heap dump information"
|
|
echo
|
|
echo -e "-e {request/ingest/ingestGrib/ingestDat}"
|
|
echo " Run edex mode and grab information about the jvm passed. May be used multiple times to grab data about multiple jvms"
|
|
echo
|
|
echo -e "-f {y/n}\t\tdefault [$FORCE]"
|
|
echo " Force a jstack/jmap by default"
|
|
echo
|
|
echo -e "-l {y/n}\t\tdefault [$GRAB_CAVE_AND_ALERTVIZ_LOGS]"
|
|
echo " Captures the cave and alertviz logs. If run for a specific pid the only cave log captured will be for that pid"
|
|
echo
|
|
echo -e "-m {y/n}\t\tdefault [$MOVE_ALL_HS_ERR_PID]"
|
|
echo " Captures all hs_err_pid's found"
|
|
echo
|
|
echo -e "-p {PID}\t\tdefault none"
|
|
echo " Run capture for a specific PID, crash information will not be captured. Defaults to none and runs against all pids found."
|
|
echo
|
|
echo -e "-q {y/n}\t\tdefault [$RUN_QPID_STAT]"
|
|
echo " Run qpid-stat"
|
|
echo
|
|
echo -e "-Q {y/n}\t\tdefault [$GRAB_CURRENT_QUERIES]"
|
|
echo " Grab current running database queries"
|
|
echo
|
|
echo -e "-r \"Reason for capture\""
|
|
echo " The reason for capture, so popup will not be shown"
|
|
echo
|
|
echo -e "-s {y/n}\t\tdefault [$RUN_JSTACK]"
|
|
echo " Run jstack to grab the thread stack information"
|
|
echo
|
|
echo -e "-screen {y/n}\t\tdefault [$GRAB_SCREENSHOT]"
|
|
echo " Screen print the current workstation (local user must be running capture)"
|
|
echo
|
|
echo -e "-V {y/n}\t\tdefault [$RUN_VERSIONS]"
|
|
echo " Grab version information"
|
|
echo
|
|
echo -e "-z {y/n}\t\tdefault [$TGZ_OUTPUT]"
|
|
echo " Tar and gzip the captured data"
|
|
echo
|
|
echo -e "-h"
|
|
echo " Display this usage statement"
|
|
exit 0
|
|
}
|
|
|
|
# ensure directory is created and has write permissions
|
|
checkDir() {
|
|
dir="$1"
|
|
if [ ! -d "$dir" ]; then
|
|
mkdir -p $dir
|
|
if [ ! -d "$dir" ]; then
|
|
message="Unable to create capture data directory\n$dir"
|
|
zenity --error --no-wrap --title="Capture Failed" --text="$message" > /dev/null 2>&1 &
|
|
|
|
echo -e "Capture failed: $message"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if [ ! -w "$dir" ]; then
|
|
message="Do not have write permissions to capture data directory\n$dir"
|
|
zenity --error --no-wrap --title="Capture Failed" --text="$message" > /dev/null 2>&1 &
|
|
|
|
echo -e "Capture failed: $message"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
checkYes() {
|
|
local __resultvar="$1"
|
|
if [ $2 == "YES" -o $2 == "Y" -o $2 == "yes" -o $2 == "y" ]; then
|
|
eval $__resultvar="y"
|
|
else
|
|
eval $__resultvar="n"
|
|
fi
|
|
}
|
|
|
|
#check if at least 1 GB of free mem exists to run jmap
|
|
checkJmapMem() {
|
|
CAP_MEM=1000
|
|
let freeMem=`free -m | grep Mem | awk '{print $7}'`
|
|
|
|
if [ $freeMem -gt $CAP_MEM ]; then
|
|
echo "${t1}: ${freeMem}M free, running jmap" >> $processFile
|
|
return 1
|
|
else
|
|
echo "${t1}: ${freeMem}M free, skipping jmap" >> $processFile
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
|
|
# runs import to grab screen shot of users desktop
|
|
grabScreenShot() {
|
|
if [ "$GRAB_SCREENSHOT" == "y" ]; then
|
|
echo "Capturing screen shot of desktop"
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "${t1}: Capturing screen shot of desktop" >> $processFile
|
|
possibleScreens=`w -hs $user | awk '{print $3}' | sort -u`
|
|
count=0
|
|
for pScreen in $possibleScreens;
|
|
do
|
|
if [[ $pScreen =~ :[0-9]+\.[0-9]+ ]]; then
|
|
import -window root -display $pScreen ${dataPath}/screenShot_${count}.png > ${dataPath}/screenShot_${count}.log 2>&1 &
|
|
let "count+=1"
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
# runs import to grab screen shot of users desktop
|
|
grabXorgLog() {
|
|
if [ "$GRAB_XORG_LOG" == "y" ]; then
|
|
echo "Capturing Xorg logs"
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "${t1}: Capturing Xorg logs" >> $processFile
|
|
cp ${xorgLogPath}/Xorg*.log* ${dataPath} >> $processFile 2>&1 &
|
|
fi
|
|
}
|
|
|
|
checkForProcsAsOtherUsers() {
|
|
if [ ! -z "$procs" ]; then
|
|
numMyProcs=`echo "$myProcs" | wc -l`
|
|
numProcs=`echo "$procs" | wc -l`
|
|
|
|
if [ "$numMyProcs" -ne "$numProcs" ]; then
|
|
notMyProcs=`echo "$procs" | grep -v $user`
|
|
|
|
# preserve IFS and set it to line feed only
|
|
PREV_IFS=$IFS
|
|
IFS=$'\n'
|
|
usersFound=0
|
|
|
|
for proc in $notMyProcs
|
|
do
|
|
procUser=`echo $proc | awk '{print $1}'`
|
|
|
|
count=0
|
|
found=0
|
|
|
|
while [ "$count" -lt "$usersFound" ]; do
|
|
if [ "${procUsers[$count]}" == "$procUser" ]; then
|
|
found=1
|
|
fi
|
|
let "count+=1"
|
|
done
|
|
|
|
if [ "$found" -eq "0" ]; then
|
|
procUsers[$usersFound]="$procUser"
|
|
let "usersFound+=1"
|
|
fi
|
|
done
|
|
|
|
# restore IFS
|
|
IFS=$PREV_IFS
|
|
|
|
message="Processes found running as other users, please run capture as:\n"
|
|
count=0
|
|
while [ "$count" -lt "$usersFound" ]; do
|
|
message="${message}\n${procUsers[$count]}"
|
|
let "count+=1"
|
|
done
|
|
|
|
zenity --info --no-wrap --title="!!! Capture Must Be Rerun !!!" --text="$message" > /dev/null 2>&1 &
|
|
echo -e "Capture Must Be Rerun:\n$message\n"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# gets the reason for running capture
|
|
reasonForCapture() {
|
|
if [ -z "$reason" ]; then
|
|
reason=`zenity --list --title "Reason for Running Capture " --width 300 --height 260 --text "Select reason for running capture\n" --radiolist --column "Cause" --column "Reason" --editable TRUE "Received Out of Memory Error" FALSE "Cave slow down" FALSE "Cave unresponsive/froze" FALSE "Cave crashed" FALSE "Other"`
|
|
rerun=0
|
|
|
|
if [ -z "$reason" ]; then
|
|
rerun=1
|
|
elif [ "$reason" == "Other" ]; then
|
|
rerun=1
|
|
fi
|
|
|
|
if [ "$rerun" -eq "1" ]; then
|
|
reason=`zenity --text-info --title "Please Enter Reason for Running Capture" --editable --width 400 --height 250`
|
|
fi
|
|
fi
|
|
|
|
echo $reason >> ${dataPath}/capture_reason.log
|
|
}
|
|
|
|
# for a specified pid run jstack a specified number of times in a row
|
|
runJstack() {
|
|
local pid="$1"
|
|
local numIterations="$2"
|
|
local options="-l"
|
|
|
|
if [ "$FORCE" == "y" ]; then
|
|
options="${options} -F"
|
|
fi
|
|
|
|
local cmd="/awips2/java/bin/jstack"
|
|
local count=1
|
|
local prePath="${dataPath}/pid_${pid}_"
|
|
local log=""
|
|
while [ "$count" -le "$numIterations" ]; do
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
log="${prePath}jstack_${count}.log"
|
|
|
|
echo "${t1}: Running command: ${cmd} ${options} ${pid} >> ${log} 2>&1" >> $processFile
|
|
echo "Running for $t1" >> $log
|
|
${cmd} ${options} ${pid} >> ${log} 2>&1
|
|
|
|
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "${t1}: jstack for $pid failed to connect, rerunning with -F" >> $processFile
|
|
${cmd} ${options} -F ${pid} >> ${log} 2>&1
|
|
fi
|
|
let "count+=1"
|
|
done
|
|
}
|
|
|
|
# Launchs a background process for each PID to pull jstacks
|
|
launchJstacks() {
|
|
# grab all jstacks
|
|
if [ "${RUN_JSTACK}" == "y" ]; then
|
|
if [ ! -z ${cavePid} ]; then
|
|
echo "Capturing thread stack for pid $cavePid"
|
|
else
|
|
echo "Capturing all process thread stacks"
|
|
fi
|
|
|
|
local count=0
|
|
while [ "$count" -lt "$numProcs" ]; do
|
|
runJstack ${pids[$count]} ${JSTACK_ITERATIONS} &
|
|
let "count+=1"
|
|
done
|
|
fi
|
|
}
|
|
|
|
# runs jmap in background, if it fails will run again with -F
|
|
runJmap() {
|
|
local pid=$1
|
|
local prePath="${dataPath}/pid_${pid}_"
|
|
local options=""
|
|
|
|
if [ "$FORCE" == "y" ]; then
|
|
options="${options} -F"
|
|
fi
|
|
|
|
local t1=`date "+%Y%m%d %H:%M:%S"`
|
|
local log="${prePath}dump.log"
|
|
local dumpPath="${prePath}dump"
|
|
local heapPath="${prePath}heap"
|
|
|
|
if [ "$ACCUM" == "y" ]; then
|
|
# accum needs to change hprof by date
|
|
local t2=`date "+%Y%m%d_%H%M%S"`
|
|
dumpPath="${dumpPath}_${t2}.hprof"
|
|
heapPath="${heapPath}_${t2}.txt"
|
|
else
|
|
dumpPath="${dumpPath}.hprof"
|
|
heapPath="${heapPath}.txt"
|
|
fi
|
|
|
|
echo "${t1}: Running command: /awips2/java/bin/jmap -heap $pid" >> $processFile
|
|
/awips2/java/bin/jmap -heap $pid >> ${heapPath} 2>&1
|
|
|
|
# Java 1.7 has a bug that causes jmap to crash processes using the G1 garbage collector
|
|
# more info at http://stackoverflow.com/questions/20571004/garbage-collector-first-and-jmap-eof-bug
|
|
# workaround is to add the 'live' option to jmap to limit the dump to only live objects
|
|
local cmd="/awips2/java/bin/jmap -dump:live,format=b,file=${dumpPath}"
|
|
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1" >> $processFile
|
|
$cmd $options $pid >> $log 2>&1
|
|
|
|
if [[ "$?" != "0" && $FORCE != "y" ]]; then
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "${t1}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
|
|
$cmd $options -F $pid >> $log 2>&1
|
|
fi
|
|
}
|
|
|
|
# Launchs a background process for each PID to pull jmap
|
|
launchJmaps() {
|
|
# grab all jmaps
|
|
if [ "$RUN_JMAP" == "y" ]; then
|
|
if [ ! -z ${cavePid} ]; then
|
|
echo "Capturing process heap dump for pid $cavePid"
|
|
else
|
|
echo "Capturing all Heap Dumps"
|
|
fi
|
|
|
|
local count=0
|
|
while [ "$count" -lt "$numProcs" ]; do
|
|
runJmap ${pids[$count]} &
|
|
let "count+=1"
|
|
done
|
|
fi
|
|
}
|
|
|
|
# runs qpid-stat
|
|
runQpidStat() {
|
|
local qpidHost=cp1f
|
|
local prePath="${dataPath}/"
|
|
local t1=`date "+%Y%m%d %H:%M:%S"`
|
|
local cmd="/awips2/python/bin/qpid-stat -q -Smsg -L500 ${qpidHost}"
|
|
local log="${prepath}qpid-stat-queues.log"
|
|
echo "${t1}: Running command: $cmd >> $log 2>&1 &" >> $processFile
|
|
if [ "$ACCUM" == "y" ]; then
|
|
echo >> $log
|
|
echo >> $log
|
|
echo "Running for $t1" >> $log
|
|
fi
|
|
$cmd >> $log 2>&1 &
|
|
|
|
log="${prepath}qpid-stat-sessions.log"
|
|
cmd="/awips2/python/bin/qpid-stat -s -Smsg -L500 ${qpidHost}"
|
|
echo "${t1}: Running command: $cmd >> $log 2>&1 &" >> $processFile
|
|
if [ "$ACCUM" == "y" ]; then
|
|
echo >> $log
|
|
echo >> $log
|
|
echo "Running for $t1" >> $log
|
|
fi
|
|
$cmd >> $log 2>&1 &
|
|
}
|
|
|
|
# runs versions.sh to grab version info
|
|
runVersions() {
|
|
local t1=`date "+%Y%m%d %H:%M:%S"`
|
|
local cmd="/awips2/cave/versions.sh"
|
|
echo "${t1}: Running command: $cmd >> ${dataPath}/versions.log 2>&1" >> $processFile
|
|
$cmd >> ${dataPath}/versions.log 2>&1
|
|
}
|
|
|
|
# take in pid, output process name without args
|
|
getCommandName() {
|
|
ps --no-header c -p $1 -o cmd
|
|
}
|
|
|
|
# take in pid, output parent process id
|
|
getParentPid() {
|
|
ps --no-header -p $1 -o ppid
|
|
}
|
|
|
|
# take in pid, output associated cave executable pid if found, otherwise output given pid
|
|
determineCaveProcess() {
|
|
local RVAL=$1
|
|
# check if supplied PID is for the cave executable
|
|
local CMD_NAME=$(getCommandName $1)
|
|
if [[ ! $CMD_NAME =~ cave ]]
|
|
then
|
|
# worker pid probably provided, check parent
|
|
RVAL=$(getParentPid $1)
|
|
if [[ ! $(getCommandName $RVAL) =~ cave ]]
|
|
then
|
|
# parent wasn't cave either... continue on using PID provided
|
|
echo "${t1}: Unable to find cave process for pid $1, proceeding with provided PID" >> $processFile
|
|
RVAL=$1
|
|
fi
|
|
fi
|
|
echo $RVAL
|
|
}
|
|
|
|
# parse command line
|
|
while [ ! -z "$1" ]; do
|
|
arg=$1
|
|
shift 1
|
|
|
|
case $arg in
|
|
-a) ACCUM="$1"; shift 1;;
|
|
-d) RUN_JMAP="$1"; shift 1;;
|
|
-e) EDEX_MODE="Y"; edexProcs[$edexProcCount]="$1"; shift 1; let "edexProcCount+=1";;
|
|
-f) FORCE="$1"; shift 1;;
|
|
-l) GRAB_CAVE_AND_ALERTVIZ_LOGS="$1"; shift 1;;
|
|
-m) MOVE_ALL_HS_ERR_PID="$1"; shift 1;;
|
|
-p) cavePid="$1"; shift 1;;
|
|
-q) RUN_QPID_STAT="$1"; shift 1;;
|
|
-Q) GRAB_CURRENT_QUERIES="$1"; shift 1;;
|
|
-s) RUN_JSTACK="$1"; shift 1;;
|
|
-screen) GRAB_SCREENSHOT="$1"; shift 1;;
|
|
-V) RUN_VERSIONS="$1"; shift 1;;
|
|
-z) TGZ_OUTPUT="$1"; shift 1;;
|
|
-quick) RUN_JMAP="N"; JSTACK_ITERATIONS=5;;
|
|
-h|*) usage;;
|
|
esac
|
|
done
|
|
|
|
# validate inputs
|
|
checkYes RUN_JSTACK $RUN_JSTACK
|
|
checkYes RUN_JMAP $RUN_JMAP
|
|
checkYes RUN_QPID_STAT $RUN_QPID_STAT
|
|
checkYes FORCE $FORCE
|
|
checkYes MOVE_ALL_HS_ERR_PID $MOVE_ALL_HS_ERR_PID
|
|
checkYes GRAB_CURRENT_QUERIES $GRAB_CURRENT_QUERIES
|
|
checkYes GRAB_CAVE_AND_ALERTVIZ_LOGS $GRAB_CAVE_AND_ALERTVIZ_LOGS
|
|
checkYes EDEX_MODE $EDEX_MODE
|
|
checkYes TGZ_OUTPUT $TGZ_OUTPUT
|
|
checkYes ACCCUM $ACCCUM
|
|
checkYes RUN_VERSIONS $RUN_VERSIONS
|
|
checkYes GRAB_SCREENSHOT $GRAB_SCREENSHOT
|
|
checkYes GRAB_XORG_LOG $GRAB_XORG_LOG
|
|
|
|
# if PID mode don't grab other hs_err_pids
|
|
if [ ! -z $cavePid ]; then
|
|
MOVE_ALL_HS_ERR_PID="n"
|
|
fi
|
|
|
|
# if accum don't tgz
|
|
if [ "$ACCUM" == "y" ]; then
|
|
TGZ_OUTPUT="n"
|
|
RUN_VERSIONS="n"
|
|
fi
|
|
|
|
if [ "$EDEX_MODE" == "y" ]; then
|
|
reason="n"
|
|
GRAB_CAVE_AND_ALERTVIZ_LOGS="n"
|
|
MOVE_ALL_HS_ERR_PID="n"
|
|
GRAB_XORG_LOG="n"
|
|
fi
|
|
|
|
umask 0002
|
|
|
|
checkDir $basePath
|
|
|
|
user=`whoami`
|
|
|
|
hostName=`hostname -s`
|
|
fullHostName=`hostname`
|
|
# remove the -testBed items
|
|
strippedHostName=${hostName%-}
|
|
hostPath="${basePath}/${hostName}"
|
|
|
|
checkDir $hostPath
|
|
|
|
curTime=`date +%Y%m%d_%H%M%S`
|
|
curDir=`pwd`
|
|
|
|
if [ "${ACCUM}" == "y" ]; then
|
|
curDay=`date +%Y%m%d`
|
|
dataPath="${hostPath}/captureData_${curDay}"
|
|
else
|
|
dataPath="${hostPath}/captureData_${curTime}"
|
|
fi
|
|
|
|
checkDir $dataPath
|
|
|
|
cd $dataPath
|
|
processFile=${dataPath}/capture_info.log
|
|
export COLUMNS=160
|
|
top -b -c -n1 >> "${dataPath}/top_$hostName.log"
|
|
vmstat -w 1 5 >> "${dataPath}/vmstat_$hostName.log"
|
|
|
|
if [ "$ACCUM" == "y" ]; then
|
|
echo "" >> "${dataPath}/top_$hostName.log"
|
|
echo "" >> "${dataPath}/top_$hostName.log"
|
|
echo "" >> "${dataPath}/vmstat_$hostName.log"
|
|
fi
|
|
|
|
|
|
if [ "$EDEX_MODE" == "y" ]; then
|
|
grepString="$edexGrepString("
|
|
count=0
|
|
|
|
while [ "$count" -lt "$edexProcCount" ]; do
|
|
if [ "$count" -ne "0" ]; then
|
|
grepString="${grepString}|"
|
|
fi
|
|
|
|
grepString="${grepString}${edexProcs[$count]}"
|
|
let "count+=1"
|
|
done
|
|
|
|
grepString="${grepString}) "
|
|
procs=`ps -ef | grep -E "$grepString" | grep -v "grep"`
|
|
else
|
|
#list of cave process ids to get ps output for
|
|
caveProcNums=""
|
|
for parent in $(pgrep '^cave$')
|
|
do
|
|
# the cave process starts a new JVM as a child process
|
|
# find all children of the cave process
|
|
children=$(pgrep -P $parent)
|
|
if [[ -z $children ]]
|
|
then
|
|
# no children, assume that this is a main cave process
|
|
caveProcNums="$caveProcNums $parent"
|
|
else
|
|
# otherwise, only get ps output for children
|
|
caveProcNums="$caveProcNums $children"
|
|
fi
|
|
done
|
|
if [ ! "${caveProcNums}" = "" ]; then
|
|
procs=$(ps --no-header -fp $caveProcNums)
|
|
fi
|
|
fi
|
|
|
|
if [ ! -z "$cavePid" ]; then
|
|
# limit cave procs to the requested PID
|
|
echo "Running in PID mode, only requesting for pid $cavePid" >> $processFile
|
|
procs=`echo "$procs" | grep "$cavePid"`
|
|
fi
|
|
|
|
myProcs=`echo "$procs" | grep "$user"`
|
|
|
|
echo "${procs}" >> $processFile
|
|
echo "" >> $processFile
|
|
echo "" >> $processFile
|
|
|
|
checkForProcsAsOtherUsers
|
|
|
|
if [ ! -z "${myProcs}" ]; then
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "Processes found for user $user, capturing data to $dataPath"
|
|
echo "${t1}: Processes found for user $user, capturing data to $dataPath" >> $processFile
|
|
echo "" >> $processFile
|
|
numProcs=`echo "$myProcs" | wc -l`
|
|
|
|
# preserve IFS and set it to line feed only
|
|
PREV_IFS=$IFS
|
|
IFS=$'\n'
|
|
count=0
|
|
|
|
# grab the pids for future use
|
|
for proc in $myProcs
|
|
do
|
|
pids[$count]=`echo "$proc" | awk '{print $2}'`
|
|
let "count+=1"
|
|
done
|
|
IFS=$PREV_IFS
|
|
|
|
launchJstacks
|
|
|
|
checkJmapMem
|
|
let rc=$?
|
|
if [ $rc -eq 1 ]; then
|
|
launchJmaps
|
|
fi
|
|
|
|
else
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
echo "*** NO processes found for user $user, capturing limited data to $dataPath"
|
|
echo "${t1}: NO processes found for $user" >> $processFile
|
|
echo "" >> $processFile
|
|
fi
|
|
|
|
# grab Xorg logs
|
|
grabXorgLog
|
|
|
|
# grab screen shot, spawns background process for each screen
|
|
grabScreenShot
|
|
|
|
# grab qpid stat
|
|
runQpidStat
|
|
|
|
# ls users home directory to check nas performance
|
|
/usr/bin/time -p ls -la ~ > ${dataPath}/nas_check_ls_home.txt 2>&1 &
|
|
|
|
# get reason for running capture
|
|
if [ "$reason" != "n" ]; then
|
|
reasonForCapture &
|
|
fi
|
|
|
|
# move all hs_err_pid from user's home directory to capture directory
|
|
if [ "${MOVE_ALL_HS_ERR_PID}" == "y" ]; then
|
|
numErrFiles=`ls ${HOME}/hs_err_pid* 2> /dev/null | wc -l`
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
|
|
if [ "${numErrFiles}" == "0" ]; then
|
|
echo "*** NO hs_err_pid files to capture"
|
|
echo "${t1}: No hs_err_pid files to capture" >> $processFile
|
|
else
|
|
echo "Capturing ${numErrFiles} hs_err_pids"
|
|
echo "${t1}: Capturing ${numErrFiles} hs_err_pids" >> $processFile
|
|
mv ${HOME}/hs_err_pid* ${dataPath}
|
|
fi
|
|
|
|
echo "" >> $processFile
|
|
fi
|
|
|
|
# Grab the cave console logs for the last 24 hours as well as the current alertviz database, if pid mode only grab cave for that pid
|
|
if [ "${GRAB_CAVE_AND_ALERTVIZ_LOGS}" == "y" ]; then
|
|
dir="${HOME}/caveData/logs/${hostName}"
|
|
if [ ! -d $dir ]; then
|
|
dir="${HOME}/caveData/logs/${strippedHostName}"
|
|
if [ ! -d $dir ]; then
|
|
dir="${HOME}/caveData/logs/${fullHostName}"
|
|
fi
|
|
fi
|
|
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
if [ -d $dir ]; then
|
|
echo "Capturing alertviz logs"
|
|
echo "${t1}: Capturing alertviz logs" >> $processFile
|
|
mkdir ${dataPath}/alertVizDatabase
|
|
cp -r $dir ${dataPath}/alertVizDatabase
|
|
else
|
|
echo "*** NO alertviz logs to capture"
|
|
echo "${t1}: *** Can't find alertviz logs to capture" >> $processFile
|
|
echo "" >> $processFile
|
|
fi
|
|
|
|
dir="${HOME}/caveData/logs/consoleLogs/${hostName}"
|
|
if [ ! -d $dir ]; then
|
|
dir="${HOME}/caveData/logs/consoleLogs/${strippedHostName}"
|
|
if [ ! -d $dir ]; then
|
|
dir="${HOME}/caveData/logs/consoleLogs/${fullHostName}"
|
|
fi
|
|
fi
|
|
|
|
t1=`date "+%Y%m%d %H:%M:%S"`
|
|
# grab any logs written to in last 2 hours, or pid mode only that log
|
|
if [ -d $dir ]; then
|
|
echo "Capturing cave logs"
|
|
echo "${t1}: Capturing cave logs" >> $processFile
|
|
mkdir -p ${dataPath}/consoleLogs
|
|
if [ ! -z ${cavePid} ]; then
|
|
# logs have cave executable pid or worker pid in the name (-o means OR)
|
|
find $dir -type f -name "*$(determineCaveProcess ${cavePid})*" -o -name "*${cavePid}*" -exec cp {} ${dataPath}/consoleLogs \;
|
|
else
|
|
find $dir -type f -mmin -120 -exec cp {} ${dataPath}/consoleLogs \;
|
|
fi
|
|
else
|
|
echo "*** NO cave logs to capture"
|
|
echo "${t1}: *** Can't find cave logs to capture" >> $processFile
|
|
echo "" >> $processFile
|
|
fi
|
|
fi
|
|
|
|
# grab the version information
|
|
if [ "$RUN_VERSIONS" == "y" ]; then
|
|
runVersions
|
|
fi
|
|
|
|
# wait for any backgrounded processes by this script to finish
|
|
wait
|
|
|
|
message=""
|
|
|
|
# tar/gz the output
|
|
if [ "${TGZ_OUTPUT}" == "y" ]; then
|
|
echo "Tar/zipping captured data"
|
|
if [ ! -z ${cavePid} ]; then
|
|
tgzFile="${hostPath}/captureData_${curTime}_pid_${cavePid}.tgz"
|
|
else
|
|
tgzFile="${hostPath}/captureData_${curTime}.tgz"
|
|
fi
|
|
|
|
cd ..
|
|
tar -czf $tgzFile --remove-files captureData_${curTime}
|
|
rm -rf ${dataPath}
|
|
message="Data captured to $tgzFile"
|
|
else
|
|
message="Data captured to ${dataPath}"
|
|
fi
|
|
|
|
zenity --info --no-wrap --title="Capture Done" --text="$message" > /dev/null 2>&1 &
|
|
echo
|
|
echo $message
|
|
rm ~/.pgpass
|
|
|
|
cd $curDir
|
|
|