awips2/edexOsgi/com.raytheon.uf.tools.cli/impl/capture
2022-05-05 12:34:50 -05:00

796 lines
23 KiB
Bash
Executable file

#!/bin/bash
# base path to save capture data to, will create subdirectory for each workstation
basePath="/data/fxa/cave"
edexGrepString="edex.run.mode="
xorgLogPath="/var/log"
# the remote servers to grab top on. Use to get general state of servers
if [ ! -z "${DX_SERVERS}" ]; then
REMOTE_SERVERS_TO_CHECK="${DX_SERVERS}"
else
REMOTE_SERVERS_TO_CHECK="dv1 dv2 dv3 dv4 dv5"
fi
if [ ! -z "${PX_SERVERS}" ]; then
REMOTE_SERVERS_TO_CHECK="${REMOTE_SERVERS_TO_CHECK} ${PX_SERVERS}"
else
REMOTE_SERVERS_TO_CHECK="${REMOTE_SERVERS_TO_CHECK} pv1 pv2"
fi
# the database host to grab current running queries for
DATABASE_HOST="dv1"
# Flags to control what data capture grabs, to enable flag must be YES, anything else will be considered off.
RUN_JSTACK="Y"
JSTACK_ITERATIONS="15"
RUN_JMAP="Y"
RUN_QPID_STAT="Y"
MOVE_ALL_HS_ERR_PID="Y"
# For remote top you must have ssh keys setup to allow automatic login, otherwise password prompt will get sent to log file and script will never exit
GRAB_REMOTE_TOP="Y"
GRAB_REMOTE_VMSTAT="Y"
GRAB_CAVE_AND_ALERTVIZ_LOGS="Y"
GRAB_SCREENSHOT="Y"
GRAB_CURRENT_QUERIES="Y"
GRAB_XORG_LOG="Y"
EDEX_MODE="N"
FORCE="N"
TGZ_OUTPUT="Y"
RUN_VERSIONS="Y"
ACCCUM="N"
#max username length for IdM sites
UID_LEN="32"
cavePid=""
edexProcCount=0
# print usage message
usage() {
echo "Script for capturing information about cave/edex and general server health."
echo
echo "Following options allowed"
echo -e "-quick"
echo " Turns off jmap and reduces jstack iterations to 5"
echo
echo -e "-c \"{host names}\"\tdefault [$REMOTE_SERVERS_TO_CHECK]"
echo " The servers to grab top information from, make sure list is quoted and space delimited"
echo
echo -e "-d {y/n}\t\tdefault [$RUN_JMAP]"
echo " Run jmap to grab the heap dump information"
echo
echo -e "-e {request/ingest/ingestGrib/ingestDat}"
echo " Run edex mode and grab information about the jvm passed. May be used multiple times to grab data about multiple jvms"
echo
echo -e "-f {y/n}\t\tdefault [$FORCE]"
echo " Force a jstack/jmap by default"
echo
echo -e "-l {y/n}\t\tdefault [$GRAB_CAVE_AND_ALERTVIZ_LOGS]"
echo " Captures the cave and alertviz logs. If run for a specific pid the only cave log captured will be for that pid"
echo
echo -e "-m {y/n}\t\tdefault [$MOVE_ALL_HS_ERR_PID]"
echo " Captures all hs_err_pid's found"
echo
echo -e "-p {PID}\t\tdefault none"
echo " Run capture for a specific PID, crash information will not be captured. Defaults to none and runs against all pids found."
echo
echo -e "-q {y/n}\t\tdefault [$RUN_QPID_STAT]"
echo " Run qpid-stat"
echo
echo -e "-Q {y/n}\t\tdefault [$GRAB_CURRENT_QUERIES]"
echo " Grab current running database queries"
echo
echo -e "-r \"Reason for capture\""
echo " The reason for capture, so popup will not be shown"
echo
echo -e "-s {y/n}\t\tdefault [$RUN_JSTACK]"
echo " Run jstack to grab the thread stack information"
echo
echo -e "-screen {y/n}\t\tdefault [$GRAB_SCREENSHOT]"
echo " Screen print the current workstation (local user must be running capture)"
echo
echo -e "-t {y/n}\t\tdefault [$GRAB_REMOTE_TOP]"
echo " Captures top information from servers, auto login must be enabled"
echo
echo -e "-v {y/n}\t\tdefault [$GRAB_REMOTE_VMSTAT]"
echo " Captures vmstat information from servers, auto login must be enabled"
echo
echo -e "-V {y/n}\t\tdefault [$RUN_VERSIONS]"
echo " Grab version information"
echo
echo -e "-z {y/n}\t\tdefault [$TGZ_OUTPUT]"
echo " Tar and gzip the captured data"
echo
echo -e "-h"
echo " Display this usage statement"
exit 0
}
# ensure directory is created and has write permissions
checkDir() {
dir="$1"
if [ ! -d "$dir" ]; then
mkdir -p $dir
if [ ! -d "$dir" ]; then
message="Unable to create capture data directory\n$dir"
zenity --error --no-wrap --title="Capture Failed" --text="$message" > /dev/null 2>&1 &
echo -e "Capture failed: $message"
exit 1
fi
fi
if [ ! -w "$dir" ]; then
message="Do not have write permissions to capture data directory\n$dir"
zenity --error --no-wrap --title="Capture Failed" --text="$message" > /dev/null 2>&1 &
echo -e "Capture failed: $message"
exit 1
fi
}
checkYes() {
local __resultvar="$1"
if [ $2 == "YES" -o $2 == "Y" -o $2 == "yes" -o $2 == "y" ]; then
eval $__resultvar="y"
else
eval $__resultvar="n"
fi
}
#check if at least 1 GB of free mem exists to run jmap
checkJmapMem() {
CAP_MEM=1000
let freeMem=`free -m | grep Mem | awk '{print $7}'`
if [ $freeMem -gt $CAP_MEM ]; then
echo "${t1}: ${freeMem}M free, running jmap" >> $processFile
return 1
else
echo "${t1}: ${freeMem}M free, skipping jmap" >> $processFile
return 0
fi
}
# runs import to grab screen shot of users desktop
grabScreenShot() {
if [ "$GRAB_SCREENSHOT" == "y" ]; then
echo "Capturing screen shot of desktop"
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: Capturing screen shot of desktop" >> $processFile
possibleScreens=`w -hs $user | awk '{print $3}' | sort -u`
count=0
for pScreen in $possibleScreens;
do
if [[ $pScreen =~ :[0-9]+(\.[0-9]+)? ]]; then
import -window root -display $pScreen ${dataPath}/screenShot_${count}.png > ${dataPath}/screenShot_${count}.log 2>&1 &
let "count+=1"
fi
done
fi
}
# runs import to grab screen shot of users desktop
grabXorgLog() {
if [ "$GRAB_XORG_LOG" == "y" ]; then
echo "Capturing Xorg logs"
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: Capturing Xorg logs" >> $processFile
cp ${xorgLogPath}/Xorg*.log* ${dataPath} >> $processFile 2>&1 &
fi
}
# runs ssh command to grab top on a remote server, requires auto login to be setup
grabRemoteTop() {
if [ "$GRAB_REMOTE_TOP" == "y" ]; then
echo "Capturing top on remote servers"
for server in ${REMOTE_SERVERS_TO_CHECK};
do
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: Capturing top for $server" >> $processFile
out_file="${dataPath}/top_$server.log"
ssh $server "sh -c 'export COLUMNS=160; top -b -c -n1' " >> $out_file 2>&1 &
done
fi
}
# runs ssh command to grab vmstat on a remote server, requires auto login to be setup
grabRemoteVmstat() {
if [ "$GRAB_REMOTE_VMSTAT" == "y" ]; then
echo "Capturing vmstat on remote servers"
for server in ${REMOTE_SERVERS_TO_CHECK};
do
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: Capturing vmstat for $server" >> $processFile
out_file="${dataPath}/vmstat_$server.log"
ssh $server "sh -c 'vmstat -w 1 5' " >> $out_file 2>&1 &
done
fi
}
grabCurrentDatabaseQueries() {
if [ "$GRAB_CURRENT_QUERIES" == "y" ]; then
echo "Capturing current database queries"
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: Capturing current database queries" >> $processFile
out_file="${dataPath}/database_queries.log"
echo "dv1:5432:metadata:awips:awips" > ~/.pgpass; chmod 600 ~/.pgpass
psql -d metadata -U awips -h ${DATABASE_HOST} -c "select datname, pid, client_addr, query, now()-xact_start as runningTime from pg_stat_activity where state != 'idle' order by runningTime desc;" >> $out_file 2>&1 &
fi
}
checkForProcsAsOtherUsers() {
if [ ! -z "$procs" ]; then
numMyProcs=`echo "$myProcs" | wc -l`
numProcs=`echo "$procs" | wc -l`
if [ "$numMyProcs" -ne "$numProcs" ]; then
notMyProcs=`echo "$procs" | grep -v $user`
# preserve IFS and set it to line feed only
PREV_IFS=$IFS
IFS=$'\n'
usersFound=0
for proc in $notMyProcs
do
procUser=`echo $proc | awk '{print $1}'`
count=0
found=0
while [ "$count" -lt "$usersFound" ]; do
if [ "${procUsers[$count]}" == "$procUser" ]; then
found=1
fi
let "count+=1"
done
if [ "$found" -eq "0" ]; then
procUsers[$usersFound]="$procUser"
let "usersFound+=1"
fi
done
# restore IFS
IFS=$PREV_IFS
message="Processes found running as other users, please run capture as:\n"
count=0
while [ "$count" -lt "$usersFound" ]; do
message="${message}\n${procUsers[$count]}"
let "count+=1"
done
zenity --info --no-wrap --title="!!! Capture Must Be Rerun !!!" --text="$message" > /dev/null 2>&1 &
echo -e "Capture Must Be Rerun:\n$message\n"
fi
fi
}
# gets the reason for running capture
reasonForCapture() {
if [ -z "$reason" ]; then
if [ -z "${DISPLAY}" ]; then
reason="unknown, DISPLAY not set"
fi
if [ -z "$reason" ]; then
reason=`zenity --list --title "Reason for Running Capture " --width 360 --height 300 --text "Select reason for running capture\n" --radiolist --column "Cause" --column "Reason" --editable TRUE "Received Out of Memory Error" FALSE "Cave slow down" FALSE "Cave unresponsive/frozen" FALSE "Cave crashed" FALSE "Other"`
if [[ -z $reason ]]; then
return 1
fi
if [ "$reason" == "Other" ]; then
reason=`zenity --text-info --title "Please Enter Reason for Running Capture" --editable --width 400 --height 250`
fi
fi
fi
echo $reason >> ${dataPath}/capture_reason.log
return 0
}
# for a specified pid run jstack a specified number of times in a row
runJstack() {
local pid="$1"
local numIterations="$2"
local options="-l"
if [ "$FORCE" == "y" ]; then
options="${options} -F"
fi
local cmd="/awips2/java/bin/jstack"
local count=1
local prePath="${dataPath}/pid_${pid}_"
local log=""
while [ "$count" -le "$numIterations" ]; do
t1=`date "+%Y%m%d %H:%M:%S"`
log="${prePath}jstack_${count}.log"
echo "${t1}: Running command: ${cmd} ${options} ${pid} >> ${log} 2>&1" >> $processFile
echo "Running for $t1" >> $log
${cmd} ${options} ${pid} >> ${log} 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: jstack for $pid failed to connect, rerunning with -F" >> $processFile
${cmd} ${options} -F ${pid} >> ${log} 2>&1
fi
let "count+=1"
done
}
# Launchs a background process for each PID to pull jstacks
launchJstacks() {
# grab all jstacks
if [ "${RUN_JSTACK}" == "y" ]; then
if [ ! -z ${cavePid} ]; then
echo "Capturing thread stack for pid $cavePid"
else
echo "Capturing all process thread stacks"
fi
local count=0
while [ "$count" -lt "$numProcs" ]; do
runJstack ${pids[$count]} ${JSTACK_ITERATIONS} &
let "count+=1"
done
fi
}
# runs jmap in background, if it fails will run again with -F
runJmap() {
local pid=$1
local prePath="${dataPath}/pid_${pid}_"
local options=""
if [ "$FORCE" == "y" ]; then
options="${options} -F"
fi
local t1=`date "+%Y%m%d %H:%M:%S"`
local log="${prePath}dump.log"
local dumpPath="${prePath}dump"
local heapPath="${prePath}heap"
if [ "$ACCUM" == "y" ]; then
# accum needs to change hprof by date
local t2=`date "+%Y%m%d_%H%M%S"`
dumpPath="${dumpPath}_${t2}.hprof"
heapPath="${heapPath}_${t2}.txt"
else
dumpPath="${dumpPath}.hprof"
heapPath="${heapPath}.txt"
fi
echo "${t1}: Running command: /awips2/java/bin/jmap -heap $pid" >> $processFile
/awips2/java/bin/jmap -heap $pid >> ${heapPath} 2>&1
# Java 1.7 has a bug that causes jmap to crash processes using the G1 garbage collector
# more info at http://stackoverflow.com/questions/20571004/garbage-collector-first-and-jmap-eof-bug
# workaround is to add the 'live' option to jmap to limit the dump to only live objects
local cmd="/awips2/java/bin/jmap -dump:live,format=b,file=${dumpPath}"
echo "${t1}: Running command: $cmd $options $pid >> $log 2>&1" >> $processFile
$cmd $options $pid >> $log 2>&1
if [[ "$?" != "0" && $FORCE != "y" ]]; then
t1=`date "+%Y%m%d %H:%M:%S"`
echo "${t1}: jmap for $pid failed to connect, rerunning with -F" >> $processFile
$cmd $options -F $pid >> $log 2>&1
fi
}
# Launchs a background process for each PID to pull jmap
launchJmaps() {
# grab all jmaps
if [ "$RUN_JMAP" == "y" ]; then
if [ ! -z ${cavePid} ]; then
echo "Capturing process heap dump for pid $cavePid"
else
echo "Capturing all Heap Dumps"
fi
local count=0
while [ "$count" -lt "$numProcs" ]; do
runJmap ${pids[$count]} &
let "count+=1"
done
fi
}
# runs qpid-stat
runQpidStat() {
local qpidHost=cpv1
local prePath="${dataPath}/"
local t1=`date "+%Y%m%d %H:%M:%S"`
local cmd="/awips2/python/bin/qpid-stat -q -Smsg -L500 ${qpidHost}"
local log="${prepath}qpid-stat-queues.log"
echo "${t1}: Running command: $cmd >> $log 2>&1 &" >> $processFile
if [ "$ACCUM" == "y" ]; then
echo >> $log
echo >> $log
echo "Running for $t1" >> $log
fi
$cmd >> $log 2>&1 &
log="${prepath}qpid-stat-sessions.log"
cmd="/awips2/python/bin/qpid-stat -s -Smsg -L500 ${qpidHost}"
echo "${t1}: Running command: $cmd >> $log 2>&1 &" >> $processFile
if [ "$ACCUM" == "y" ]; then
echo >> $log
echo >> $log
echo "Running for $t1" >> $log
fi
$cmd >> $log 2>&1 &
}
# runs versions.sh to grab version info
runVersions() {
local t1=`date "+%Y%m%d %H:%M:%S"`
local cmd="/awips2/cave/versions.sh"
echo "${t1}: Running command: $cmd >> ${dataPath}/versions.log 2>&1" >> $processFile
$cmd >> ${dataPath}/versions.log 2>&1
}
# take in pid, output process name without args
getCommandName() {
ps --no-header c -p $1 -o cmd
}
# take in pid, output parent process id
getParentPid() {
ps --no-header -p $1 -o ppid
}
# take in pid, output associated cave executable pid if found, otherwise output given pid
determineCaveProcess() {
local RVAL=$1
# check if supplied PID is for the cave executable
local CMD_NAME=$(getCommandName $1)
if [[ ! $CMD_NAME =~ cave ]]
then
# worker pid probably provided, check parent
RVAL=$(getParentPid $1)
if [[ ! $(getCommandName $RVAL) =~ cave ]]
then
# parent wasn't cave either... continue on using PID provided
echo "${t1}: Unable to find cave process for pid $1, proceeding with provided PID" >> $processFile
RVAL=$1
fi
fi
echo $RVAL
}
# parse command line
while [ ! -z "$1" ]; do
arg=$1
shift 1
case $arg in
-a) ACCUM="$1"; shift 1;;
-c) REMOTE_SERVERS_TO_CHECK="$1"; shift 1;;
-d) RUN_JMAP="$1"; shift 1;;
-e) EDEX_MODE="Y"; edexProcs[$edexProcCount]="$1"; shift 1; let "edexProcCount+=1";;
-f) FORCE="$1"; shift 1;;
-l) GRAB_CAVE_AND_ALERTVIZ_LOGS="$1"; shift 1;;
-m) MOVE_ALL_HS_ERR_PID="$1"; shift 1;;
-p) cavePid="$1"; shift 1;;
-q) RUN_QPID_STAT="$1"; shift 1;;
-Q) GRAB_CURRENT_QUERIES="$1"; shift 1;;
-r) reason="$1"; shift 1;;
-s) RUN_JSTACK="$1"; shift 1;;
-screen) GRAB_SCREENSHOT="$1"; shift 1;;
-t) GRAB_REMOTE_TOP="$1"; shift 1;;
-v) GRAB_REMOTE_VMSTAT="$1"; shift 1;;
-V) RUN_VERSIONS="$1"; shift 1;;
-z) TGZ_OUTPUT="$1"; shift 1;;
-quick) RUN_JMAP="N"; JSTACK_ITERATIONS=5;;
-h|*) usage;;
esac
done
# validate inputs
checkYes RUN_JSTACK $RUN_JSTACK
checkYes RUN_JMAP $RUN_JMAP
checkYes RUN_QPID_STAT $RUN_QPID_STAT
checkYes FORCE $FORCE
checkYes MOVE_ALL_HS_ERR_PID $MOVE_ALL_HS_ERR_PID
checkYes GRAB_REMOTE_TOP $GRAB_REMOTE_TOP
checkYes GRAB_REMOTE_VMSTAT $GRAB_REMOTE_VMSTAT
checkYes GRAB_CURRENT_QUERIES $GRAB_CURRENT_QUERIES
checkYes GRAB_CAVE_AND_ALERTVIZ_LOGS $GRAB_CAVE_AND_ALERTVIZ_LOGS
checkYes EDEX_MODE $EDEX_MODE
checkYes TGZ_OUTPUT $TGZ_OUTPUT
checkYes ACCCUM $ACCCUM
checkYes RUN_VERSIONS $RUN_VERSIONS
checkYes GRAB_SCREENSHOT $GRAB_SCREENSHOT
checkYes GRAB_XORG_LOG $GRAB_XORG_LOG
# if PID mode don't grab other hs_err_pids
if [ ! -z $cavePid ]; then
MOVE_ALL_HS_ERR_PID="n"
fi
# if accum don't tgz
if [ "$ACCUM" == "y" ]; then
TGZ_OUTPUT="n"
RUN_VERSIONS="n"
fi
if [ "$EDEX_MODE" == "y" ]; then
if [ -z "$reason" ]; then
reason="EDEX capture"
fi
GRAB_CAVE_AND_ALERTVIZ_LOGS="n"
MOVE_ALL_HS_ERR_PID="n"
GRAB_REMOTE_TOP="n"
GRAB_REMOTE_VMSTAT="n"
GRAB_XORG_LOG="n"
fi
umask 0002
checkDir $basePath
user=`whoami`
hostName=`hostname -s`
fullHostName=`hostname`
# remove the -testBed items
strippedHostName=${hostName%-}
hostPath="${basePath}/${hostName}"
checkDir $hostPath
curTime=`date +%Y%m%d_%H%M%S`
curDir=`pwd`
if [ "${ACCUM}" == "y" ]; then
curDay=`date +%Y%m%d`
dataPath="${hostPath}/captureData_${curDay}"
else
dataPath="${hostPath}/captureData_${curTime}"
fi
checkDir $dataPath
cd $dataPath
processFile=${dataPath}/capture_info.log
export COLUMNS=160
top -b -c -n1 >> "${dataPath}/top_$hostName.log"
vmstat -w 1 5 >> "${dataPath}/vmstat_$hostName.log"
if [ "$ACCUM" == "y" ]; then
echo "" >> "${dataPath}/top_$hostName.log"
echo "" >> "${dataPath}/top_$hostName.log"
echo "" >> "${dataPath}/vmstat_$hostName.log"
fi
#expand user field to account for IdM sites
export PS_FORMAT="user:${UID_LEN},pid,ppid,c,stime,tty,stat,time,cmd"
if [ "$EDEX_MODE" == "y" ]; then
grepString="$edexGrepString("
count=0
while [ "$count" -lt "$edexProcCount" ]; do
if [ "$count" -ne "0" ]; then
grepString="${grepString}|"
fi
grepString="${grepString}${edexProcs[$count]}"
let "count+=1"
done
grepString="${grepString}) "
procs=`ps ef | grep -E "$grepString" | grep -v "grep"`
else
#list of cave process ids to get ps output for
caveProcNums=""
for parent in $(pgrep '^cave$')
do
# the cave process starts a new JVM as a child process
# find all children of the cave process
children=$(pgrep -P $parent)
if [[ -z $children ]]
then
# no children, assume that this is a main cave process
caveProcNums="$caveProcNums $parent"
else
# otherwise, only get ps output for children
caveProcNums="$caveProcNums $children"
fi
done
if [ ! "${caveProcNums}" = "" ]; then
procs=$(ps --no-header fp $caveProcNums)
fi
fi
unset PS_FORMAT
if [ ! -z "$cavePid" ]; then
# limit cave procs to the requested PID
echo "Running in PID mode, only requesting for pid $cavePid" >> $processFile
procs=`echo "$procs" | grep "$cavePid"`
fi
myProcs=`echo "$procs" | grep "$user"`
echo "${procs}" >> $processFile
echo "" >> $processFile
echo "" >> $processFile
checkForProcsAsOtherUsers
if [ ! -z "${myProcs}" ]; then
t1=`date "+%Y%m%d %H:%M:%S"`
echo "Processes found for user $user, capturing data to $dataPath"
echo "${t1}: Processes found for user $user, capturing data to $dataPath" >> $processFile
echo "" >> $processFile
numProcs=`echo "$myProcs" | wc -l`
# preserve IFS and set it to line feed only
PREV_IFS=$IFS
IFS=$'\n'
count=0
# grab the pids for future use
for proc in $myProcs
do
pids[$count]=`echo "$proc" | awk '{print $2}'`
let "count+=1"
done
IFS=$PREV_IFS
launchJstacks
checkJmapMem
let rc=$?
if [ $rc -eq 1 ]; then
launchJmaps
fi
else
t1=`date "+%Y%m%d %H:%M:%S"`
echo "*** NO processes found for user $user, capturing limited data to $dataPath"
echo "${t1}: NO processes found for $user" >> $processFile
echo "" >> $processFile
fi
# grab Xorg logs
grabXorgLog
# grab top for servers
grabRemoteTop
# grab vm stat for servers
grabRemoteVmstat
# grab current database queries
grabCurrentDatabaseQueries
# grab screen shot, spawns background process for each screen
grabScreenShot
# grab qpid stat
runQpidStat
# ls users home directory to check nas performance
/usr/bin/time -p ls -la ~ > ${dataPath}/nas_check_ls_home.txt 2>&1 &
# move all hs_err_pid from user's home directory to capture directory
if [ "${MOVE_ALL_HS_ERR_PID}" == "y" ]; then
numErrFiles=`ls ${HOME}/hs_err_pid* 2> /dev/null | wc -l`
t1=`date "+%Y%m%d %H:%M:%S"`
if [ "${numErrFiles}" == "0" ]; then
echo "*** NO hs_err_pid files to capture"
echo "${t1}: No hs_err_pid files to capture" >> $processFile
else
echo "Capturing ${numErrFiles} hs_err_pids"
echo "${t1}: Capturing ${numErrFiles} hs_err_pids" >> $processFile
mv ${HOME}/hs_err_pid* ${dataPath}
fi
echo "" >> $processFile
fi
# Grab the cave console logs for the last 24 hours as well as the current alertviz database, if pid mode only grab cave for that pid
if [ "${GRAB_CAVE_AND_ALERTVIZ_LOGS}" == "y" ]; then
dir="${HOME}/caveData/logs/${hostName}"
if [ ! -d $dir ]; then
dir="${HOME}/caveData/logs/${strippedHostName}"
if [ ! -d $dir ]; then
dir="${HOME}/caveData/logs/${fullHostName}"
fi
fi
t1=`date "+%Y%m%d %H:%M:%S"`
if [ -d $dir ]; then
echo "Capturing alertviz logs"
echo "${t1}: Capturing alertviz logs" >> $processFile
mkdir ${dataPath}/alertVizDatabase
cp -r $dir ${dataPath}/alertVizDatabase
else
echo "*** NO alertviz logs to capture"
echo "${t1}: *** Can't find alertviz logs to capture" >> $processFile
echo "" >> $processFile
fi
dir="${HOME}/caveData/logs/consoleLogs/${hostName}"
if [ ! -d $dir ]; then
dir="${HOME}/caveData/logs/consoleLogs/${strippedHostName}"
if [ ! -d $dir ]; then
dir="${HOME}/caveData/logs/consoleLogs/${fullHostName}"
fi
fi
t1=`date "+%Y%m%d %H:%M:%S"`
# grab any logs written to in last 2 hours, or pid mode only that log
if [ -d $dir ]; then
echo "Capturing cave logs"
echo "${t1}: Capturing cave logs" >> $processFile
mkdir -p ${dataPath}/consoleLogs
if [ ! -z ${cavePid} ]; then
# logs have cave executable pid or worker pid in the name (-o means OR)
find $dir -type f -name "*$(determineCaveProcess ${cavePid})*" -o -name "*${cavePid}*" -exec cp {} ${dataPath}/consoleLogs \;
else
find $dir -type f -mmin -120 -exec cp {} ${dataPath}/consoleLogs \;
fi
else
echo "*** NO cave logs to capture"
echo "${t1}: *** Can't find cave logs to capture" >> $processFile
echo "" >> $processFile
fi
fi
# grab the version information
if [ "$RUN_VERSIONS" == "y" ]; then
runVersions
fi
# get reason for running capture
if ! reasonForCapture ; then
# kill all child processes
kill $(ps -o pid= --ppid $$)
echo Canceling.
wait
rm -rf "$dataPath"
exit 1
fi
# wait for any backgrounded processes by this script to finish
wait
message=""
# tar/gz the output
if [ "${TGZ_OUTPUT}" == "y" ]; then
echo "Tar/zipping captured data"
if [ ! -z ${cavePid} ]; then
tgzFile="${hostPath}/captureData_${curTime}_pid_${cavePid}.tgz"
else
tgzFile="${hostPath}/captureData_${curTime}.tgz"
fi
cd ..
tar -czf $tgzFile --remove-files captureData_${curTime}
rm -rf ${dataPath}
message="Data captured to $tgzFile"
else
message="Data captured to ${dataPath}"
fi
zenity --info --no-wrap --title="Capture Done" --text="$message" > /dev/null 2>&1 &
echo
echo $message
rm ~/.pgpass
cd $curDir