This repository has been archived by the owner on Jan 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 20
/
check_glusterfs
170 lines (150 loc) · 4.14 KB
/
check_glusterfs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/bin/bash
## Fork of MarkR’s GlusterFS-checks at:
## http://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/GlusterFS-checks/details
### CHANGELOG
## 1.0.2
# * 07/01/2014
# * Modified by Doug Wilson <[email protected]>
# * includes carrillm’s fix to support TB sized volumes
# * outputs all errors on a critical alarm, not just free space
# This Nagios script was written against version 3.3 & 3.4 of Gluster. Older
# versions will most likely not work at all with this monitoring script.
#
# Gluster currently requires elevated permissions to do anything. In order to
# accommodate this, you need to allow your Nagios user some additional
# permissions via sudo. The line you want to add will look something like the
# following in /etc/sudoers (or something equivalent):
#
# Defaults:nagios !requiretty
# nagios ALL=(root) NOPASSWD:/usr/sbin/gluster volume status [[\:graph\:]]* detail,/usr/sbin/gluster volume heal [[\:graph\:]]* info
#
# That should give us all the access we need to check the status of any
# currently defined peers and volumes.
# Inspired by a script of Mark Nipper
#
# 2013, Mark Ruys, [email protected]
PATH=/sbin:/bin:/usr/sbin:/usr/bin
PROGNAME=$(basename -- $0)
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.0.1"
. $PROGPATH/utils.sh
# parse command line
usage () {
echo ""
echo "USAGE: "
echo " $PROGNAME -v VOLUME -n BRICKS [-w GB -c GB]"
echo " -n BRICKS: number of bricks"
echo " -w and -c values in GB"
exit $STATE_UNKNOWN
}
while getopts "v:n:w:c:" opt; do
case $opt in
v) VOLUME=${OPTARG} ;;
n) BRICKS=${OPTARG} ;;
w) WARN=${OPTARG} ;;
c) CRIT=${OPTARG} ;;
*) usage ;;
esac
done
if [ -z "${VOLUME}" -o -z "${BRICKS}" ]; then
usage
fi
Exit () {
echo "$1: ${2:0}"
status=STATE_$1
exit ${!status}
}
# check for commands
for cmd in basename bc awk sudo pidof gluster; do
if ! type -p "$cmd" >/dev/null; then
Exit UNKNOWN "$cmd not found"
fi
done
# check for glusterd (management daemon)
if ! pidof glusterd &>/dev/null; then
Exit CRITICAL "glusterd management daemon not running"
fi
# check for glusterfsd (brick daemon)
if ! pidof glusterfsd &>/dev/null; then
Exit CRITICAL "glusterfsd brick daemon not running"
fi
# get volume heal status
heal=0
for entries in $(sudo gluster volume heal ${VOLUME} info | awk '/^Number of entries: /{print $4}'); do
if [ "$entries" -gt 0 ]; then
let $((heal+=entries))
fi
done
if [ "$heal" -gt 0 ]; then
errors=("${errors[@]}" "$heal unsynched entries")
fi
# get volume status
bricksfound=0
freegb=9999999
shopt -s nullglob
while read -r line; do
field=($(echo $line))
case ${field[0]} in
Brick)
brick=${field[@]:2}
;;
Disk)
key=${field[@]:0:3}
if [ "${key}" = "Disk Space Free" ]; then
freeunit=${field[@]:4}
free=${freeunit:0:-2}
freeconvgb=`echo "($free*1024)" | bc`
unit=${freeunit#$free}
if [ "$unit" = "TB" ]; then
free=$freeconvgb
unit="GB"
fi
if [ "$unit" != "GB" ]; then
Exit UNKNOWN "unknown disk space size $freeunit"
fi
free=$(echo "${free} / 1" | bc -q)
if [ $free -lt $freegb ]; then
freegb=$free
fi
fi
;;
Online)
online=${field[@]:2}
if [ "${online}" = "Y" ]; then
let $((bricksfound++))
else
errors=("${errors[@]}" "$brick offline")
fi
;;
esac
done < <(sudo gluster volume status ${VOLUME} detail)
if [ $bricksfound -eq 0 ]; then
Exit CRITICAL "no bricks found"
elif [ $bricksfound -lt $BRICKS ]; then
errors=("${errors[@]}" "found $bricksfound bricks, expected $BRICKS ")
ex_stat="WARNING_stat"
fi
if [ -n "$CRIT" -a -n "$WARN" ]; then
if [ $CRIT -ge $WARN ]; then
Exit UNKNOWN "critical threshold below warning"
elif [ $freegb -lt $CRIT ]; then
errors=("${errors[@]}" "free space ${freegb}GB")
ex_stat="CRITICAL_stat"
elif [ $freegb -lt $WARN ]; then
errors=("${errors[@]}" "free space ${freegb}GB")
ex_stat="WARNING_stat"
fi
fi
# exit with warning if errors
if [ -n "$errors" ]; then
sep='; '
msg=$(printf "${sep}%s" "${errors[@]}")
msg=${msg:${#sep}}
if [ ${ex_stat} == "CRITICAL_stat" ]; then
Exit CRITICAL "${msg}"
else
Exit WARNING "${msg}"
fi
fi
# exit with no errors
Exit OK "${bricksfound} bricks; free space ${freegb}GB"