diff --git a/condor_check b/condor_check index c50416f..ace4608 100755 --- a/condor_check +++ b/condor_check @@ -13,6 +13,7 @@ fi COLLECTOR1=$1 COLLECTOR2=$2 + # Collector2 could be discovered # Header @@ -36,6 +37,25 @@ if [ $COLLECTOR2 != "unknown" ] ; then echo fi +# Frontend groups: +echo Frontend Group glidein counts: +condor_status -pool $COLLECTOR1 \ + -format '%s\n' GLIDECLIENT_Group | sort | uniq -c +echo +# Frontend ClassAds in the Collector: +echo Frontend ClassAds in the Collector: +condor_status -pool $COLLECTOR1 -any \ + -const '(GlideinMyType=?=\"glideresource\")' \ + -format '%s\n' GlideClientName | sort | uniq -c +echo + +ncollectors=`condor_status -collector -format '%s\n' Machine -pool $COLLECTOR1 | wc -l` +if [ $ncollectors -gt 1 ] ; then + echo Compare Collectors: + $glideinWMSMonitor_RELEASE_DIR/CompareCollectors.sh $COLLECTOR1 $COLLECTOR2 + echo +fi + # get information from sitedb about pledges and se names by CMSSite name PLEDGES=`get_pledges_from_sitedb` SEDFILE=`translate_se_names_in_sitedb_to_cmssite` @@ -54,8 +74,8 @@ RUNNING=`get_pilots_by_site $COLLECTOR1` || exit 5 DESIRED=`get_DESIRED_Sites $COLLECTOR1` || exit 6 # Print the table of pilots and pressure from queued jobs for each site -printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" "Site" "Pledge" "Pledged" "Analysis" "Any Actv." "Claimed" "Unclaimed" "Pressure" "Exclusive" "Maintenance" -printf "%30s%10s%10s%10s%40s%10s\n" "Updated" "Analysis" "Usage 1m" "Usage 1m" "Pressure" +printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" "Site" "Pledge" "Pledged" "Analysis" "All" "Maximum" "Claimed" "Unclaimed" "Pressure" "Exclusive" "Maintenance" +printf "%30s%10s%10s%10s%10s%40s%10s\n" "Updated" "to CMS" "Usage 1m" "Usage 1m" "Usage 1m" "Pressure" echo # Sum up some interesting quantities: @@ -66,6 +86,7 @@ totalexclusivepressure=0 totalpledge=0 totalanausage=0 totalallusage=0 +totalmaxusage=0 # Loop over sites for the table: sites=`cat $SEDFILE | awk -F\/ '{print $3}' | sort | uniq` @@ -98,32 +119,45 @@ for site in $sites ; do validityofpledge=`grep ^$site\, $PLEDGES | tail -1 | awk -F\, '{print $3}'` if [ "X"$validityofpledge == "X" ] ; then validityofpledge="N/A" ; fi - # Extract average job slots at site from Dashboard (all activities including analysis and production): + # Extract average job slots at site from Dashboard for analysis avgusage=`grep ^$site\, $ANAUSAGE | awk -F\, '{print int($2)}'` if [ "X"$avgusage == "X" ] ; then avgusage=0 ; fi totalanausage=$[$totalanausage+$avgusage] - - # Extract max job slots at site from Dashboard (all activities including analysis and production): - allusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($3)}'` + + # and for all activities + allusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($2)}'` if [ "X"$allusage == "X" ] ; then allusage=0 ; fi totalallusage=$[$totalallusage+$allusage] + # Extract max job slots at site from Dashboard (all activities) + maxusage=`grep ^$site\, $ALLUSAGE | awk -F\, '{print int($3)}'` + if [ "X"$maxusage == "X" ] ; then maxusage=0 ; fi + totalmaxusage=$[$totalmaxusage+$maxusage] + # downtimes downtime=`grep ^$site\, $DOWNTIMES | awk -F\, '{print $2}'` #if [ `echo $downtime | wc -w` -eq 0 ] ; then downtime="" ; fi # skip meaningless entries, no pledge, no demand - if [ $[$pledge+$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then continue ; fi + if [ $[$pledge+$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then + continue + fi + if [ $COLLECTOR1 != "vocms097.cern.ch" ] ; then + if [ $[$claimed+$unclaimed+$exclusivepressure] -eq 0 ] ; then + continue + fi + fi - printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" $site $validityofpledge $pledge $avgusage $allusage $claimed $unclaimed $pressure $exclusivepressure "$downtime" + printf "%-20s%10s%10s%10s%10s%10s%10s%10s%10s%10s %-18s\n" $site $validityofpledge $pledge $avgusage $allusage $maxusage $claimed $unclaimed $pressure $exclusivepressure "$downtime" done # Table footer, sums: totalpressure="N/A" -printf "\n%-30s%10s%10s%10s%10s%10s%10s%10s\n" "SUM" $totalpledge $totalanausage $totalallusage $totalclaimed $totalunclaimed $totalpressure $totalexclusivepressure +printf "\n%-30s%10s%10s%10s%10s%10s%10s%10s%10s\n" "SUM" $totalpledge $totalanausage $totalallusage $totalmaxusage $totalclaimed $totalunclaimed $totalpressure $totalexclusivepressure echo -cat << EOF +if [ $COLLECTOR1 == "vocms097.cern.ch" ] ; then + cat << EOF Notes: * Pledges are 100% of the last pledge entered in SiteDB for the site for all activities. * Analysis Usage statistics are from the last month in Dashboard for activity=analysis only and includes @@ -133,6 +167,7 @@ Notes: on the schedd. * Sites are only listed in the Site Table if there is demand (running or queued) or pledged resources. EOF +fi # clean up temp files rm $PLEDGES $SEDFILE $CLAIMED $RUNNING $DESIRED $ANAUSAGE $DOWNTIMES $ALLUSAGE diff --git a/condor_check.sh b/condor_check.sh index 39840dc..fb5860f 100755 --- a/condor_check.sh +++ b/condor_check.sh @@ -31,20 +31,30 @@ cat >> $OUTFILE <> $OUTFILE cat >> $OUTFILE <> $OUTFILE +COLLECTOR1=vocms007.cern.ch +COLLECTOR2=cmssrv239.fnal.gov alarm 600 $glideinWMSMonitor_RELEASE_DIR/condor_check $COLLECTOR1 $COLLECTOR2 >> $OUTFILE +cat >> $OUTFILE <> $OUTFILE + + + #if [ $rc -eq 0 ] ; then LINKNAME=$glideinWMSMonitor_OUTPUT_DIR/latest.txt diff --git a/condor_functions.sh b/condor_functions.sh index 35bd613..bb0a8e6 100644 --- a/condor_functions.sh +++ b/condor_functions.sh @@ -79,6 +79,7 @@ get_DESIRED_Sites() { source $glideinWMSMonitor_RELEASE_DIR/sitedb_functions.sh SEDFILE=`translate_se_names_in_sitedb_to_cmssite` + # we could use the -global option to condor_q and not specify the SCHEDD list SCHEDDS=`condor_status -pool $POOLNAME -const '(TotalIdleJobs>0)' -schedd -format ' -name %s' Name ` || return 1 DESIRED=`mktemp -t DESIRED.txt.XXXXXXX` || return 2 diff --git a/crontab.txt b/crontab.txt index 4d0a6d3..e53f609 100644 --- a/crontab.txt +++ b/crontab.txt @@ -1,2 +1,7 @@ -*/10 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_check.sh -0 5,7,11,17,23 * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_history_dump.sh +*/15 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_check.sh +*/15 * * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/multi-core.sh >> /dev/null 2>&1 +54 * * * * /crabprod/CSstoragePath/make-multi-core-html.sh >> /dev/null 2>&1 +0 5,11,17,23 * * * source /home/letts/Monitor/glideinWMSMonitor/bashrc && $glideinWMSMonitor_RELEASE_DIR/condor_history_dump.sh +36 9 * * * /home/letts/.globus/2014-2015/init-proxy.sh >> /dev/null 2>&1 +40 23 * * * /home/letts/scripts/NewQuotaMail.sh > /home/letts/NewQuotaMail.txt 2>&1 +10,20,30,40,50 * * * * /home/letts/UserPrioDump/dev/DashboardFeed.sh > /home/letts/UserPrioDump/dev/Dashboard.txt 2>&1 diff --git a/dashboard_functions.sh b/dashboard_functions.sh index e67cb85..82a00d8 100644 --- a/dashboard_functions.sh +++ b/dashboard_functions.sh @@ -175,8 +175,13 @@ dashboard_job_slots_used() { dashboard_user_report() { GRANULARITY=$1 NUMBER_OF_PERIODS=$2 + DATE1=$3 printf "%10s,%10s,%10s,%10s\n" date1 date2 nusers nuserst2 - date1=`date -dlast-monday +%F` + if [ -x $DATE1 ]; then + date1=`date -dlast-monday +%F` + else + date1=$DATE1 + fi for (( i=1; i<=$NUMBER_OF_PERIODS; i++ )) ; do date2=$date1 date1=`date -d "$date2 -$GRANULARITY days" +%F` @@ -194,17 +199,18 @@ dashboard_job_report() { # ARGS: Number of time periods to display GRANULARITY=$1 NUMBER_OF_PERIODS=$2 +DATE1=$3 printf "%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s,%10s\n" \ date1 date2 \ ncrab2T1 ncrab3T1 nallT1 jscrab2T1 jscrab3T1 jsallT1 \ ncrab2T2 ncrab3T2 nallT2 jscrab2T2 jscrab3T2 jsallT2 \ ncrab2T3 ncrab3T3 nallT3 jscrab2T3 jscrab3T3 jsallT3 -date1=`date -dlast-monday +%F` - -#date1="2012-04-09" -#GRANULARITY=7 -#NUMBER_OF_PERIODS=4 + if [ -x $DATE1 ]; then + date1=`date -dlast-monday +%F` + else + date1=$DATE1 + fi for (( i=1; i<=$NUMBER_OF_PERIODS; i++ )) ; do date2=$date1 diff --git a/multi-core.sh b/multi-core.sh index d04623d..12fc0c2 100755 --- a/multi-core.sh +++ b/multi-core.sh @@ -261,12 +261,6 @@ echo "}" return } -JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-production-`/bin/date +%F-Z%R -u`.json -writeoutjsonfile vocms97.cern.ch > $JSONFILE - -JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-anaops-`/bin/date +%F-Z%R -u`.json -writeoutjsonfile glidein-collector.t2.ucsd.edu > $JSONFILE - JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-global-`/bin/date +%F-Z%R -u`.json writeoutjsonfile vocms097.cern.ch > $JSONFILE diff --git a/weekly_job_report.sh b/weekly_job_report.sh index 01453bd..47db7b0 100755 --- a/weekly_job_report.sh +++ b/weekly_job_report.sh @@ -6,5 +6,6 @@ else source $glideinWMSMonitor_RELEASE_DIR/bashrc fi OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-jobs-`/bin/date +%Y-%m-%d`.txt -dashboard_job_report 7 7 > $OUTFILE +OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-jobs-test.txt +dashboard_job_report 7 1 > $OUTFILE exit diff --git a/weekly_user_report.sh b/weekly_user_report.sh index 6785a4e..e078008 100755 --- a/weekly_user_report.sh +++ b/weekly_user_report.sh @@ -6,5 +6,20 @@ else source $glideinWMSMonitor_RELEASE_DIR/bashrc fi OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/weekly-report-user-`/bin/date +%Y-%m-%d`.txt -dashboard_user_report 7 7 > $OUTFILE +#dashboard_user_report 7 30 > $OUTFILE + +OUTFILE=$glideinWMSMonitor_OUTPUT_DIR/monthly-report-user-2014.txt +dashboard_user_report 31 1 2014-02-01 > $OUTFILE +dashboard_user_report 28 1 2014-03-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2014-04-01 | tail -1 >> $OUTFILE +dashboard_user_report 30 1 2014-05-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2014-06-01 | tail -1 >> $OUTFILE +dashboard_user_report 30 1 2014-07-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2014-08-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2014-09-01 | tail -1 >> $OUTFILE +dashboard_user_report 30 1 2014-10-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2014-11-01 | tail -1 >> $OUTFILE +dashboard_user_report 30 1 2014-12-01 | tail -1 >> $OUTFILE +dashboard_user_report 31 1 2015-01-01 | tail -1 >> $OUTFILE + exit