Skip to content

Commit

Permalink
Merge pull request #29 from romana/issue-28-tcp-health
Browse files Browse the repository at this point in the history
Added a TCP health monitor plugin.
  • Loading branch information
jbrendel authored Jul 31, 2017
2 parents cd9bcdb + a5f69aa commit 5a18afa
Show file tree
Hide file tree
Showing 9 changed files with 294 additions and 34 deletions.
10 changes: 6 additions & 4 deletions PLUGINS.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,12 @@ cluster nodes. It uses plugins so that it can easily be extended. The
design of health monitor plugins are very similar to the watcher
plugins.

One health monitor plugin is included by default:
Two health monitor plugin are included by default:

* icmpecho: This uses ICMPecho (ping) requests to check that an EC2 instance is
responsive.
* tcp: This uses a TCP connection attempt to check that a process on an EC2
instance is responsive.

A health monitor plugin communicates any detected failed instances to the main
event loop of the vpc-router via a queue. It always sends a full list of the
Expand All @@ -101,9 +103,9 @@ host list.

## Location, naming convention and base class

The 'icmpecho' health monitor plugin is included. It is an integrated
health monitor plugin (included in the vpc-router source) and is located
in the directory `vpcrouter/monitor/plugins/`.
The 'icmpecho' and 'tcp' health monitor plugins are included. They are
integrated health monitor plugins (included in the vpc-router source) and are
located in the directory `vpcrouter/monitor/plugins/`.

The `-H` / `--health` option in the vpc-router command line chooses the health
monitor plugin. It uses 'icmpecho' as default value. The name of the plugin has
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,17 @@ The health-check itself is implemented via plugins, which gives vpc-router the
flexibility to use a wide variety of information to determine whether an EC2
routing instance is healthy. By default, it uses the 'icmpecho' plugin, which
utilizes an ICMPecho ('ping') request to actively check the responsiveness of
instances.
instances. A 'tcp' plugin, which attempts TCP connection attempts on a
specified port, is also provided.

Use the `--health` option to select the health monitor plugin, for example:

$ vpcrouter --health tcp --tcp_check_port 22 --tcp_check_interval 5 ...

or:

$ vpcrouter --health icmpecho --icmp_check_interval 5 ...


## TODO

Expand Down
2 changes: 1 addition & 1 deletion vpcrouter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
"""

__version__ = "1.4.0"
__version__ = "1.4.1"
9 changes: 8 additions & 1 deletion vpcrouter/monitor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ def get_new_working_set(self):
break
return new_list_of_ips

def get_monitor_interval(self):
"""
Return the sleep time between monitoring intervals.
"""
raise NotImplementedError()

def do_health_checks(self, list_of_ips):
"""
Perform a health check on a list of IP addresses.
Expand Down Expand Up @@ -191,7 +198,7 @@ def start_monitoring(self):
interval_count = 0
currently_failed_ips = set()

time.sleep(self.conf['interval'])
time.sleep(self.get_monitor_interval())
interval_count += 1

except StopReceived:
Expand Down
28 changes: 18 additions & 10 deletions vpcrouter/monitor/plugins/icmpecho.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,18 @@ def _do_ping(self, ip, ping_id, results):
pass
results[ip] = res

def get_monitor_interval(self):
"""
Return the sleep time between monitoring intervals.
"""
return self.conf['icmp_check_interval']

def do_health_checks(self, list_of_ips):
"""
Perform a health check on a list of IP addresses.
Each check (we use ICMP echo right now) is run in its own thread.
Each check (we use ICMP echo) is run in its own thread.
Gather up the results and return the list of those addresses that
failed the test.
Expand Down Expand Up @@ -154,11 +161,12 @@ def add_arguments(cls, parser):
Arguments for the configfile mode.
"""
parser.add_argument('-i', '--interval', dest='interval',
parser.add_argument('--icmp_check_interval',
dest='icmp_check_interval',
required=False, default=2,
help="ICMPecho interval in seconds "
"(only in ping mode)")
return ["interval"]
"(only for 'icmpecho' health monitor plugin)")
return ["icmp_check_interval"]

@classmethod
def check_arguments(cls, conf):
Expand All @@ -169,16 +177,16 @@ def check_arguments(cls, conf):
float.
"""
if not conf['interval']:
raise ArgsError("An ICMPecho interval needs to be specified (-i).")
if not conf['icmp_check_interval']:
raise ArgsError("An ICMPecho interval needs to be specified "
"(--icmp_check_interval).")

try:
conf['interval'] = float(conf['interval'])
conf['icmp_check_interval'] = float(conf['icmp_check_interval'])
except Exception:
raise ArgsError("Specified ICMPecho interval '%s' must be "
"a number." %
conf['interval'])
"a number." % conf['icmp_check_interval'])

if not (1 <= conf['interval'] <= 3600):
if not (1 <= conf['icmp_check_interval'] <= 3600):
raise ArgsError("Specified ICMPecho interval must be between "
"1 and 3600 seconds")
175 changes: 175 additions & 0 deletions vpcrouter/monitor/plugins/tcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
"""
Copyright 2017 Pani Networks Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

#
# A monitor plugin for checking instance health with a TCP connection
# establishment attempt.
#

import logging
import socket
import threading

from vpcrouter.errors import ArgsError
from vpcrouter.monitor import common


class Tcp(common.MonitorPlugin):
"""
A health monitor plugin, which uses ICMP echo requests (ping) to check
instances for health.
"""
def _do_tcp_check(self, ip, results):
"""
Attempt to establish a TCP connection.
If not successful, record the IP in the results dict.
Always closes the connection at the end.
"""
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
sock.connect((ip, self.conf['tcp_check_port']))
except:
# Any problem during the connection attempt? We won't diagnose it,
# we just indicate failure by adding the IP to the list
results.append(ip)
finally:
sock.close()

def get_monitor_interval(self):
"""
Return the sleep time between monitoring intervals.
"""
return self.conf['tcp_check_interval']

def do_health_checks(self, list_of_ips):
"""
Perform a health check on a list of IP addresses.
Each check (we use a TCP connection attempt) is run in its own thread.
Gather up the results and return the list of those addresses that
failed the test.
TODO: Currently, this starts a thread for every single address we want
to check. That's probably not a good idea if we have thousands of
addresses. Therefore, we should implement some batching for large
sets.
"""
threads = []
results = []

# Start the thread for each IP we wish to ping. We calculate a unique
# ID for the ICMP echo request sent by each thread. It's based on the
# slowly increasing time stamp (just 8 bits worth of the seconds since
# epoch)...
for count, ip in enumerate(list_of_ips):
thread = threading.Thread(target=self._do_tcp_check,
args=(ip, results))
thread.start()
threads.append(thread)

# ... make sure all threads are done...
for thread in threads:
thread.join()

# ... and send back all the failed IPs.
return results

def start(self):
"""
Start the configfile change monitoring thread.
"""
logging.info("TCP health monitor plugin: Starting to watch "
"instances.")

self.monitor_thread = threading.Thread(target = self.start_monitoring,
name = "HealthMon")
self.monitor_thread.daemon = True
self.monitor_thread.start()

def stop(self):
"""
Stop the config change monitoring thread.
"""
super(Tcp, self).stop()
self.monitor_thread.join()
logging.info("TCP health monitor plugin: Stopped")

@classmethod
def add_arguments(cls, parser):
"""
Arguments for the configfile mode.
"""
parser.add_argument('--tcp_check_interval',
dest='tcp_check_interval',
required=False, default=2,
help="TCP health-test interval in seconds "
"(only for 'tcp' health monitor plugin)")
parser.add_argument('--tcp_check_port',
dest='tcp_check_port',
required=False, default=22,
help="Port for TCP health-test, default 22 "
"(only for 'tcp' health monitor plugin)")
return ["tcp_check_interval", "tcp_check_port"]

@classmethod
def check_arguments(cls, conf):
"""
Sanity checks for options needed for configfile mode.
As a side effect, it also converts the specified interval to an
integer.
"""
# Checking the interval
if not conf['tcp_check_interval']:
raise ArgsError("A TCP health-test interval needs to be "
"specified (--tcp_check_interval).")

try:
conf['tcp_check_interval'] = float(conf['tcp_check_interval'])
except Exception:
raise ArgsError("Specified TCP health-test interval '%s' must be "
"a number." % conf['tcp_check_interval'])

if not (1 <= conf['tcp_check_interval'] <= 3600):
raise ArgsError("Specified TCP health-test interval must be "
"between 1 and 3600 seconds")

# Checking the port
if not conf['tcp_check_port']:
raise ArgsError("A port for the TCP health-test needs to be "
"specified (--tcp_check_port).")
try:
conf['tcp_check_port'] = int(conf['tcp_check_port'])
except Exception:
raise ArgsError("Specified port for the TCP health-test '%s' "
"must be a number." % conf['tcp_check_port'])

if not (1 <= conf['tcp_check_port'] <= 65535):
raise ArgsError("Specified port for TCP health-test must be "
"between 1 and 65535")
3 changes: 2 additions & 1 deletion vpcrouter/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def test_parse_args(self):
"conf" : {
'verbose': False, 'addr': 'localhost', 'mode': 'http',
'vpc_id': '123', 'logfile': 'foo', 'health' : 'icmpecho',
'interval' : 2, 'port': 33289, 'region_name': 'foo'}},
'icmp_check_interval' : 2, 'port': 33289,
'region_name': 'foo'}},
{"args" : ['-l', 'foo', '-v', '123', '-r', 'foo',
'-m', 'configfile'],
"exc" : ArgsError, "watcher_plugin" : "configfile",
Expand Down
Loading

0 comments on commit 5a18afa

Please sign in to comment.