From d1fb6163f6989e63431a733e081661edda937012 Mon Sep 17 00:00:00 2001 From: JAMES LETTS Date: Mon, 3 Nov 2014 02:40:52 -0800 Subject: [PATCH] Updates. --- condor_history_analyze.sh | 40 +++++++++++++++++++++++++++------------ condor_history_dump.sh | 21 ++++++++++++++++++-- debug.sh | 3 ++- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/condor_history_analyze.sh b/condor_history_analyze.sh index d8cc525..ba15aaa 100755 --- a/condor_history_analyze.sh +++ b/condor_history_analyze.sh @@ -1,8 +1,13 @@ #!/bin/bash # To do: Report CRAB3 vs CRAB2 jobs # DAG jobs remove from held table +# +# AccountingGroup=production are production jobs +# then GlobalJobId=crab3 are CRAB3 jobs +# rest are CRAB2 POOLNAME=$1 +FILE=$2 if [ -z $POOLNAME ] ; then echo "ERROR: Please specify a pool name." @@ -16,9 +21,16 @@ else source $glideinWMSMonitor_RELEASE_DIR/bashrc fi -# get the latest dumped history file from the web server: -FILE=$glideinWMSMonitor_OUTPUT_DIR/`ls -1rt /crabprod/CSstoragePath/Monitor \ - | grep ^monitor-anaops-history | grep \.txt$ | tail -1` +# get the latest dumped history file from the web server if not defined on command line +if [ -z $FILE ] ; then + FILE=$glideinWMSMonitor_OUTPUT_DIR/`ls -1rt /crabprod/CSstoragePath/Monitor \ + | grep ^monitor-anaops-history | grep \.txt$ | tail -1` + if [ $POOLNAME == 'vocms097.cern.ch' ] ; then + FILE=$glideinWMSMonitor_OUTPUT_DIR/`ls -1rt /crabprod/CSstoragePath/Monitor \ + | grep ^monitor-global-history | grep \.txt$ | tail -1` + fi +fi + NOW=`ls -l --time-style=+%s $FILE | awk '{print $6}'` echo HISTORY FILE: $FILE @@ -137,11 +149,15 @@ for x in $COUNT_EXIT_CODES ; do done echo -echo "N.B. Exit Code explanations taken from https://twiki.cern.ch/twiki/bin/view/CMSPublic/JobExitCodes." -echo " Only categories with more than 100 jobs are shown." -echo " Some ambiguity exists in the error codes from HTCondor. Additional possibilities for exit" +echo "N.B." +echo " * Exit Code explanations taken from https://twiki.cern.ch/twiki/bin/view/CMSPublic/JobExitCodes." +echo " * Only categories with more than 100 jobs are shown." +echo " * Some ambiguity exists in the error codes from HTCondor. Additional possibilities for exit" echo " code mappings are listed on the following line in those cases, e.g. HTCondor exit code" echo " 84 can map to 84 or 8020, since 8020%256=84." +echo " * In case someone is trying to make Crab2/3 comparisons, note that jobs failed with exit code" +echo " 506* (hit RSS/time boundary) in Crab2 appear as removed with no exit code in Crab3." + echo echo HELD JOBS IN THE PAST 24 HOURS: @@ -239,13 +255,13 @@ END { } ' | grep ^T | sort -echo -echo -echo USER PRIORITIES: -echo -condor_userprio -all -pool $POOLNAME +#echo +#echo +#echo USER PRIORITIES: +#echo +#condor_userprio -all -pool $POOLNAME #condor_userprio -allusers -all -pool $POOLNAME -$glideinWMSMonitor_RELEASE_DIR/debug.sh $FILE +$glideinWMSMonitor_RELEASE_DIR/debug.sh $FILE $POOLNAME exit diff --git a/condor_history_dump.sh b/condor_history_dump.sh index a65c9aa..8b82cd3 100755 --- a/condor_history_dump.sh +++ b/condor_history_dump.sh @@ -18,17 +18,34 @@ POOLNAME="vocms097.cern.ch" OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/monitor-global-history-${DATE}.txt condor_history_dump $POOLNAME > ${OUTFILE}.tmp mv ${OUTFILE}.tmp $OUTFILE - +GLOBAL_POOL_INFILE=$OUTFILE POOLNAME="glidein-collector-2.t2.ucsd.edu" - OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/latest-history.txt $glideinWMSMonitor_RELEASE_DIR/condor_history_analyze.sh $POOLNAME > ${OUTFILE}.tmp mv ${OUTFILE}.tmp $OUTFILE + +POOLNAME="glidein-collector-2.t2.ucsd.edu" OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/latest-overflow.txt $glideinWMSMonitor_RELEASE_DIR/condor_history_analyze_overflow.sh $POOLNAME > ${OUTFILE}.tmp mv ${OUTFILE}.tmp $OUTFILE +POOLNAME="vocms097.cern.ch" +OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/latest-global-history.txt +$glideinWMSMonitor_RELEASE_DIR/condor_history_analyze.sh $POOLNAME > ${OUTFILE}.tmp +mv ${OUTFILE}.tmp $OUTFILE + +# AccountingGroup=production are production jobs +# then GlobalJobId=crab3 are CRAB3 jobs +# rest are CRAB2 + +POOLNAME="vocms097.cern.ch" +INFILE=$glideinWMSMonitor_OUTPUT_DIR/monitor-global-crab3-history-${DATE}.txt +grep 'GlobalJobId=crab3' $GLOBAL_POOL_INFILE > $INFILE +OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/latest-global-crab3-history.txt +$glideinWMSMonitor_RELEASE_DIR/condor_history_analyze.sh $POOLNAME $INFILE > ${OUTFILE}.tmp +mv ${OUTFILE}.tmp $OUTFILE + exit diff --git a/debug.sh b/debug.sh index b2a4d48..1508b23 100755 --- a/debug.sh +++ b/debug.sh @@ -1,6 +1,7 @@ #!/bin/bash FILE=$1 +POOLNAME=$2 export TZ=UTC0 echo @@ -9,7 +10,7 @@ echo echo HISTORY FILE: $FILE echo printf "%-35s %8s %-35s\n" "schedd Name" "#Jobs" "Earliest Entry Time" -SCHEDDS=`condor_status -schedd -format '%s\n' Name` +SCHEDDS=`condor_status -schedd -pool $POOLNAME -format '%s\n' Name` for SCHEDD in $SCHEDDS ; do n=` cat $FILE | grep $SCHEDD\# | wc -l` earliest=`cat $FILE | grep $SCHEDD\# | grep -o 'EnteredCurrentStatus=[0-9]*' \