From 7cced1b9d4d310df7dab5bbc193c81ebfc75cc94 Mon Sep 17 00:00:00 2001 From: JAMES LETTS Date: Thu, 26 Mar 2015 11:15:35 -0700 Subject: [PATCH] Updates and some new multi-core monitoring. --- CompareCollectors.sh | 19 +++ condor_check | 2 +- multi-core-t1.sh | 285 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+), 1 deletion(-) create mode 100755 CompareCollectors.sh create mode 100755 multi-core-t1.sh diff --git a/CompareCollectors.sh b/CompareCollectors.sh new file mode 100755 index 0000000..295ff7d --- /dev/null +++ b/CompareCollectors.sh @@ -0,0 +1,19 @@ +#!/bin/sh +COLLECTOR1=$1 +COLLECTOR2=$2 + +list_all_glidein_names() { + condor_status -pool $COLLECTOR1 -format '%s\n' Name + condor_status -pool $COLLECTOR2 -format '%s\n' Name +} + + +unique=`list_all_glidein_names | sort | uniq | wc -l` +regto2=`list_all_glidein_names | sort | uniq -c | awk '($1==2){print $0}' | wc -l` +pct=$[$[$unique-$regto2]*100/$unique] + +echo Unique pilots on either collector: $unique +echo Pilots registered to both collectors: $regto2 +echo ... troublesome pilots = $pct \% + +exit 0 diff --git a/condor_check b/condor_check index ace4608..72db8f5 100755 --- a/condor_check +++ b/condor_check @@ -45,7 +45,7 @@ echo # Frontend ClassAds in the Collector: echo Frontend ClassAds in the Collector: condor_status -pool $COLLECTOR1 -any \ - -const '(GlideinMyType=?=\"glideresource\")' \ + -const '(GlideinMyType=?="glideresource")' \ -format '%s\n' GlideClientName | sort | uniq -c echo diff --git a/multi-core-t1.sh b/multi-core-t1.sh new file mode 100755 index 0000000..983f79f --- /dev/null +++ b/multi-core-t1.sh @@ -0,0 +1,285 @@ +#!/bin/sh + +if [ -z $glideinWMSMonitor_RELEASE_DIR ] ; then + echo "ERROR: glideinWMSMonitor source code missing." + exit 1 +else + source $glideinWMSMonitor_RELEASE_DIR/bashrc +fi + + +writeoutjsonfile() { + +COLLECTOR=$1 + +echo "{" +echo " \"Multi-core pilot monitoring\": {" +echo " \"Collector\": \"$COLLECTOR\"," +echo " \"Time\": `/bin/date +%s`," + + +echo " \"Partitionable glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Partitionable")' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Partitionable glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Partitionable")' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Partitionable retiring glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Partitionable")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Partitionable retiring glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Partitionable")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Dynamic glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Dynamic")' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Dynamic glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Dynamic")' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Dynamic retiring glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Dynamic")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Dynamic retiring glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Dynamic")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static retiring glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' State -format '%s\n' Activity | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static retiring glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static multi-core glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-format '%s ' Name -format '%s ' State -format '%s\n' Activity | \ +grep ^slot[0-9]*\@ | awk '{print $2 " " $3 }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static multi-core glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-format '%s ' Name -format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +grep ^slot[0-9]*\@ | awk '{print $2 " " $3 " " $4 }' | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static multi-core retiring glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' Name -format '%s ' State -format '%s\n' Activity | \ +grep ^slot[0-9]*\@ | awk '{print $2 " " $3 }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Static multi-core retiring glidein Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-const '(SlotType=?="Static")' \ +-const '(GLIDEIN_ToRetire=!=UNDEFINED)&&(CurrentTime>GLIDEIN_ToRetire)' \ +-format '%s ' Name -format '%s ' Cpus -format '%s ' State -format '%s\n' Activity | \ +grep ^slot[0-9]*\@ | awk '{print $2 " " $3 " " $4 }' | \ +awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort | uniq -c | \ +awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Total glideins\": {" +echo " \"header\": [\"State\",\"Activity\",\"glideins\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-format '%s ' State -format '%s\n' Activity \ +| sort |uniq -c \ +| awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }," + + +echo " \"Total Cpus\": {" +echo " \"header\": [\"State\",\"Activity\",\"Cpus\"]," +echo " \"data\": [" +condor_status -pool $COLLECTOR \ +-const '(regexp("T1_",GLIDEIN_CMSSite)=?=True)' \ +-format '%s ' Cpus -format '%s ' State -format '%s\n' Activity \ +| awk ' { for (i=$1; i>0; i--) { print $2 " " $3 } }' | sort |uniq -c \ +| awk '{printf(" [\"%s\",\"%s\",%i],\n",$2,$3,$1)}' +echo " [null,null,0]" +echo " ]" +echo " }" + + +echo " }" +echo "}" + + +return +} + +JSONFILE=${glideinWMSMonitor_OUTPUT_DIR}-json/monitor-multicore-t1-global-`/bin/date +%F-Z%R -u`.json +writeoutjsonfile vocms097.cern.ch > $JSONFILE + +exit