Skip to content

Commit

Permalink
Various updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesletts committed Mar 22, 2015
1 parent e61cb4a commit 92f0f0d
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 32 deletions.
55 changes: 45 additions & 10 deletions condor_check
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ fi

COLLECTOR1=$1
COLLECTOR2=$2

# Collector2 could be discovered

# Header
Expand All @@ -36,6 +37,25 @@ if [ $COLLECTOR2 != "unknown" ] ; then
echo
fi

# Frontend groups:
echo Frontend Group glidein counts:
condor_status -pool $COLLECTOR1 \
-format '%s\n' GLIDECLIENT_Group | sort | uniq -c
echo
# Frontend ClassAds in the Collector:
echo Frontend ClassAds in the Collector:
condor_status -pool $COLLECTOR1 -any \
-const '(GlideinMyType=?=\"glideresource\")' \
-format '%s\n' GlideClientName | sort | uniq -c
echo

ncollectors=`condor_status -collector -format '%s\n' Machine -pool $COLLECTOR1 | wc -l`
if [ $ncollectors -gt 1 ] ; then
echo Compare Collectors:
$glideinWMSMonitor_RELEASE_DIR/CompareCollectors.sh $COLLECTOR1 $COLLECTOR2
echo
fi

# get information from sitedb about pledges and se names by CMSSite name
PLEDGES=`get_pledges_from_sitedb`
SEDFILE=`translate_se_names_in_sitedb_to_cmssite`
Expand All @@ -54,8 +74,8 @@ RUNNING=`get_pilots_by_site $COLLECTOR1` || exit 5
DESIRED=`get_DESIRED_Sites $COLLECTOR1` || exit 6

# Print the table of pilots and pressure from queued jobs for each site
printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" "Site" "Pledge" "Pledged" "Analysis" "Any Actv." "Claimed" "Unclaimed" "Pressure" "Exclusive" "Maintenance"
printf "%30s%10s%10s%10s%40s%10s\n" "Updated" "Analysis" "Usage 1m" "Usage 1m" "Pressure"
printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" "Site" "Pledge" "Pledged" "Analysis" "All" "Maximum" "Claimed" "Unclaimed" "Pressure" "Exclusive" "Maintenance"
printf "%30s%10s%10s%10s%10s%40s%10s\n" "Updated" "to CMS" "Usage 1m" "Usage 1m" "Usage 1m" "Pressure"
echo

# Sum up some interesting quantities:
Expand All @@ -66,6 +86,7 @@ totalexclusivepressure=0
totalpledge=0
totalanausage=0
totalallusage=0
totalmaxusage=0

# Loop over sites for the table:
sites=`cat $SEDFILE | awk -F\/ '{print $3}' | sort | uniq`
Expand Down Expand Up @@ -98,32 +119,45 @@ for site in $sites ; do
validityofpledge=`grep ^$site\, $PLEDGES | tail -1 | awk -F\, '{print $3}'`
if [ "X"$validityofpledge == "X" ] ; then validityofpledge="N/A" ; fi

# Extract average job slots at site from Dashboard (all activities including analysis and production):
# Extract average job slots at site from Dashboard for analysis
avgusage=`grep ^$site\, $ANAUSAGE | awk -F\, '{print int($2)}'`
if [ "X"$avgusage == "X" ] ; then avgusage=0 ; fi
totalanausage=$[$totalanausage+$avgusage]

# Extract max job slots at site from Dashboard (all activities including analysis and production):
allusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($3)}'`
# and for all activities
allusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($2)}'`
if [ "X"$allusage == "X" ] ; then allusage=0 ; fi
totalallusage=$[$totalallusage+$allusage]

# Extract max job slots at site from Dashboard (all activities)
maxusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($3)}'`
if [ "X"$maxusage == "X" ] ; then maxusage=0 ; fi
totalmaxusage=$[$totalmaxusage+$maxusage]

# downtimes
downtime=`grep ^$site\, $DOWNTIMES | awk -F\, '{print $2}'`
#if [ `echo $downtime | wc -w` -eq 0 ] ; then downtime="" ; fi

# skip meaningless entries, no pledge, no demand
if [ $[$pledge+$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then continue ; fi
if [ $[$pledge+$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then
continue
fi
if [ $COLLECTOR1 != "vocms097.cern.ch" ] ; then
if [ $[$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then
continue
fi
fi

printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" $site $validityofpledge $pledge $avgusage $allusage $claimed $unclaimed $pressure $exclusivepressure "$downtime"
printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" $site $validityofpledge $pledge $avgusage $allusage $maxusage $claimed $unclaimed $pressure $exclusivepressure "$downtime"

done

# Table footer, sums:
totalpressure="N/A"
printf "\n%-30s%10s%10s%10s%10s%10s%10s%10s\n" "SUM" $totalpledge $totalanausage $totalallusage $totalclaimed $totalunclaimed $totalpressure $totalexclusivepressure
printf "\n%-30s%10s%10s%10s%10s%10s%10s%10s%10s\n" "SUM" $totalpledge $totalanausage $totalallusage $totalmaxusage $totalclaimed $totalunclaimed $totalpressure $totalexclusivepressure
echo
cat << EOF
if [ $COLLECTOR1 == "vocms097.cern.ch" ] ; then
cat << EOF
Notes:
* Pledges are 100% of the last pledge entered in SiteDB for the site for all activities.
* Analysis Usage statistics are from the last month in Dashboard for activity=analysis only and includes
Expand All @@ -133,6 +167,7 @@ Notes:
on the schedd.
* Sites are only listed in the Site Table if there is demand (running or queued) or pledged resources.
EOF
fi

# clean up temp files
rm $PLEDGES $SEDFILE $CLAIMED $RUNNING $DESIRED $ANAUSAGE $DOWNTIMES $ALLUSAGE
Expand Down
22 changes: 16 additions & 6 deletions condor_check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,30 @@ cat >> $OUTFILE <<EOF
======================================================= GLOBAL POOL ITB ==================================================
EOF
# run analysis of global pool, with a time limit of 300s.
COLLECTOR1=vocms056.cern.ch
COLLECTOR1=vocms0115.cern.ch
COLLECTOR2=unknown
alarm 600 $glideinWMSMonitor_RELEASE_DIR/condor_check $COLLECTOR1 $COLLECTOR2 >> $OUTFILE
cat >> $OUTFILE <<EOF
===================================================== PRODUCTION POOL =====================================================
======================================================= TIER-0 POOL =======================================================
EOF
COLLECTOR1=vocms97.cern.ch
COLLECTOR2=unknown
#alarm 600 $glideinWMSMonitor_RELEASE_DIR/condor_check $COLLECTOR1 $COLLECTOR2 short >> $OUTFILE
COLLECTOR1=vocms007.cern.ch
COLLECTOR2=cmssrv239.fnal.gov
alarm 600 $glideinWMSMonitor_RELEASE_DIR/condor_check $COLLECTOR1 $COLLECTOR2 >> $OUTFILE
cat >> $OUTFILE <<EOF
======================================================= UCSD POOL =======================================================
EOF
COLLECTOR1=glidein-collector.t2.ucsd.edu
COLLECTOR2=glidein-collector-2.t2.ucsd.edu
alarm 600 $glideinWMSMonitor_RELEASE_DIR/condor_check $COLLECTOR1 $COLLECTOR2 >> $OUTFILE




#if [ $rc -eq 0 ] ; then
LINKNAME=$glideinWMSMonitor_OUTPUT_DIR/latest.txt
Expand Down
1 change: 1 addition & 0 deletions condor_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ get_DESIRED_Sites() {
source $glideinWMSMonitor_RELEASE_DIR/sitedb_functions.sh
SEDFILE=`translate_se_names_in_sitedb_to_cmssite`

# we could use the -global option to condor_q and not specify the SCHEDD list
SCHEDDS=`condor_status -pool $POOLNAME -const '(TotalIdleJobs>0)' -schedd -format ' -name %s' Name ` || return 1
DESIRED=`mktemp -t DESIRED.txt.XXXXXXX` || return 2

Expand Down
9 changes: 7 additions & 2 deletions crontab.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
*/10 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_check.sh
0 5,7,11,17,23 * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_history_dump.sh
*/15 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_check.sh
*/15 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/multi-core.sh >> /dev/null 2>&1
54 * * * * /crabprod/CSstoragePath/make-multi-core-html.sh >> /dev/null 2>&1
0 5,11,17,23 * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_history_dump.sh
36 9 * * * /home/letts/.globus/2014-2015/init-proxy.sh >> /dev/null 2>&1
40 23 * * * /home/letts/scripts/NewQuotaMail.sh > /home/letts/NewQuotaMail.txt 2>&1
10,20,30,40,50 * * * * /home/letts/UserPrioDump/dev/DashboardFeed.sh > /home/letts/UserPrioDump/dev/Dashboard.txt 2>&1
18 changes: 12 additions & 6 deletions dashboard_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,13 @@ dashboard_job_slots_used() {
dashboard_user_report() {
GRANULARITY=$1
NUMBER_OF_PERIODS=$2
DATE1=$3
printf "%10s,%10s,%10s,%10s\n" date1 date2 nusers nuserst2
date1=`date -dlast-monday +%F`
if [ -x $DATE1 ]; then
date1=`date -dlast-monday +%F`
else
date1=$DATE1
fi
for (( i=1; i<=$NUMBER_OF_PERIODS; i++ )) ; do
date2=$date1
date1=`date -d "$date2 -$GRANULARITY days" +%F`
Expand All @@ -194,17 +199,18 @@ dashboard_job_report() {
# ARGS: Number of time periods to display
GRANULARITY=$1
NUMBER_OF_PERIODS=$2
DATE1=$3
printf "%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s\n" \
date1 date2 \
ncrab2T1 ncrab3T1 nallT1 jscrab2T1 jscrab3T1 jsallT1 \
ncrab2T2 ncrab3T2 nallT2 jscrab2T2 jscrab3T2 jsallT2 \
ncrab2T3 ncrab3T3 nallT3 jscrab2T3 jscrab3T3 jsallT3

date1=`date -dlast-monday +%F`

#date1="2012-04-09"
#GRANULARITY=7
#NUMBER_OF_PERIODS=4
if [ -x $DATE1 ]; then
date1=`date -dlast-monday +%F`
else
date1=$DATE1
fi

for (( i=1; i<=$NUMBER_OF_PERIODS; i++ )) ; do
date2=$date1
Expand Down
6 changes: 0 additions & 6 deletions multi-core.sh
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,6 @@ echo "}"
return
}

JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-production-`/bin/date +%F-Z%R -u`.json
writeoutjsonfile vocms97.cern.ch > $JSONFILE

JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-anaops-`/bin/date +%F-Z%R -u`.json
writeoutjsonfile glidein-collector.t2.ucsd.edu > $JSONFILE

JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-global-`/bin/date +%F-Z%R -u`.json
writeoutjsonfile vocms097.cern.ch > $JSONFILE

Expand Down
3 changes: 2 additions & 1 deletion weekly_job_report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ else
source $glideinWMSMonitor_RELEASE_DIR/bashrc
fi
OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-jobs-`/bin/date +%Y-%m-%d`.txt
dashboard_job_report 7 7 > $OUTFILE
OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-jobs-test.txt
dashboard_job_report 7 1 > $OUTFILE
exit
17 changes: 16 additions & 1 deletion weekly_user_report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,20 @@ else
source $glideinWMSMonitor_RELEASE_DIR/bashrc
fi
OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-user-`/bin/date +%Y-%m-%d`.txt
dashboard_user_report 7 7 > $OUTFILE
#dashboard_user_report 7 30 > $OUTFILE

OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/monthly-report-user-2014.txt
dashboard_user_report 31 1 2014-02-01 > $OUTFILE
dashboard_user_report 28 1 2014-03-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2014-04-01 | tail -1 >> $OUTFILE
dashboard_user_report 30 1 2014-05-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2014-06-01 | tail -1 >> $OUTFILE
dashboard_user_report 30 1 2014-07-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2014-08-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2014-09-01 | tail -1 >> $OUTFILE
dashboard_user_report 30 1 2014-10-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2014-11-01 | tail -1 >> $OUTFILE
dashboard_user_report 30 1 2014-12-01 | tail -1 >> $OUTFILE
dashboard_user_report 31 1 2015-01-01 | tail -1 >> $OUTFILE

exit

0 comments on commit 92f0f0d

Please sign in to comment.