From ec916d39475185bad8e3ff3a2efcfc326efc0bcc Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Thu, 13 Jun 2019 19:42:37 -0700 Subject: [PATCH 1/9] Ensure there is only one watcher refresh per time window --- lib/synapse/service_watcher/zookeeper.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 57d867c9..5099e42a 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -363,6 +363,30 @@ def zk_connect zk_cleanup end + # handle session connected after reconnecting + # http://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkSessions + @zk.on_connected do + log.info "synapse: ZK client has reconnected #{@name}" + # random backoff to avoid checking and refreshing all watchers at the same time + sleep rand(10) + now = Time.now + # ensure there is only one refresh can happen within a time window + if !@last_reconnect_time.nil? && (now - @last_connect_time) < 60 + log.info "synapse: ZK client skip since last reconnect is too close #{@name}" + return + end + # test-and-set should be thread safe based on per-callback model + # https://github.com/zk-ruby/zk/wiki/EventDeliveryModel + @last_reconnect_time = now + # zookeeper watcher is one-time trigger, and can be lost when disconnected + # https://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkWatches + # only need reanble watcher on parent path and children list + log.info "synapse: ZK client refresh watcher after reconnected #{@name}" + if @zk.exists?(@discovery['path'], :watch => true) + @zk.children(@discovery['path'], :watch => true) + end + end + # the path must exist, otherwise watch callbacks will not work statsd_time('synapse.watcher.zk.create_path.elapsed_time', ["zk_cluster:#{@zk_cluster}", "service_name:#{@name}"]) do create(@discovery['path']) From 513574decda3609d3d0c4ea27ff2a22b7d4fde56 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Thu, 13 Jun 2019 19:44:32 -0700 Subject: [PATCH 2/9] Fix indentation --- lib/synapse/service_watcher/zookeeper.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 5099e42a..a0b49365 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -369,9 +369,9 @@ def zk_connect log.info "synapse: ZK client has reconnected #{@name}" # random backoff to avoid checking and refreshing all watchers at the same time sleep rand(10) - now = Time.now + now = Time.now # ensure there is only one refresh can happen within a time window - if !@last_reconnect_time.nil? && (now - @last_connect_time) < 60 + if !@last_reconnect_time.nil? && (now - @last_reconnect_time) < 60 log.info "synapse: ZK client skip since last reconnect is too close #{@name}" return end From 495ca2c50abc781876077952b72f22ac690381b1 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Thu, 13 Jun 2019 20:23:19 -0700 Subject: [PATCH 3/9] Fix typo --- lib/synapse/service_watcher/zookeeper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index a0b49365..63735dfb 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -380,7 +380,7 @@ def zk_connect @last_reconnect_time = now # zookeeper watcher is one-time trigger, and can be lost when disconnected # https://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkWatches - # only need reanble watcher on parent path and children list + # only need re-enable watcher on parent path and children list log.info "synapse: ZK client refresh watcher after reconnected #{@name}" if @zk.exists?(@discovery['path'], :watch => true) @zk.children(@discovery['path'], :watch => true) From d2da647375f1d1afbdb0c176c40bf55c9c60c6b8 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Fri, 14 Jun 2019 06:37:23 -0700 Subject: [PATCH 4/9] Refactor and unit test reconnect time --- lib/synapse/service_watcher/zookeeper.rb | 18 ++++++++++++------ .../synapse/service_watcher_zookeeper_spec.rb | 11 +++++++++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 63735dfb..14617704 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -369,15 +369,10 @@ def zk_connect log.info "synapse: ZK client has reconnected #{@name}" # random backoff to avoid checking and refreshing all watchers at the same time sleep rand(10) - now = Time.now - # ensure there is only one refresh can happen within a time window - if !@last_reconnect_time.nil? && (now - @last_reconnect_time) < 60 + unless test_and_set_reconnect_time(Time.now) log.info "synapse: ZK client skip since last reconnect is too close #{@name}" return end - # test-and-set should be thread safe based on per-callback model - # https://github.com/zk-ruby/zk/wiki/EventDeliveryModel - @last_reconnect_time = now # zookeeper watcher is one-time trigger, and can be lost when disconnected # https://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkWatches # only need re-enable watcher on parent path and children list @@ -397,6 +392,17 @@ def zk_connect end end + def test_and_set_reconnect_time(now) + # ensure there is only one refresh can happen within a time window + if !@last_reconnect_time.nil? && (now - @last_reconnect_time) < 60 + return false + end + # test-and-set should be thread safe based on per-callback model + # https://github.com/zk-ruby/zk/wiki/EventDeliveryModel + @last_reconnect_time = now + true + + end # decode the data at a zookeeper endpoint def deserialize_service_instance(data) log.debug "synapse: deserializing process data" diff --git a/spec/lib/synapse/service_watcher_zookeeper_spec.rb b/spec/lib/synapse/service_watcher_zookeeper_spec.rb index 0f2f9f4c..867719af 100644 --- a/spec/lib/synapse/service_watcher_zookeeper_spec.rb +++ b/spec/lib/synapse/service_watcher_zookeeper_spec.rb @@ -149,6 +149,17 @@ subject.send(:watcher_callback).call end + it 'test and set reconnect time' do + now = Time.now + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + now += 10 + expect(subject.send(:test_and_set_reconnect_time, now)).to be false + now += 50 + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + now += 60 + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + end + it 'handles zk consistency issues' do expect(subject).to receive(:watch) expect(subject).to receive(:discover).and_call_original From 84da546375a55db3c484c8f27132231eeb818edc Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Thu, 13 Jun 2019 20:55:02 -0700 Subject: [PATCH 5/9] Fix ruby 1.9.3 ci --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3e528555..8516c4d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: ruby cache: bundler sudo: false rvm: - - 1.9.3-p551 + - 1.9.3 - 2.0.0-p648 - 2.1.10 - 2.2.5 From f8297a7e1a42cc18c08a79250617a50e7033b229 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 17 Jun 2019 09:49:26 -0700 Subject: [PATCH 6/9] Move to sleep after test_and_set --- lib/synapse/service_watcher/zookeeper.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 14617704..496795a8 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -367,12 +367,14 @@ def zk_connect # http://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkSessions @zk.on_connected do log.info "synapse: ZK client has reconnected #{@name}" - # random backoff to avoid checking and refreshing all watchers at the same time - sleep rand(10) unless test_and_set_reconnect_time(Time.now) log.info "synapse: ZK client skip since last reconnect is too close #{@name}" return end + + # random backoff to avoid checking and refreshing all watchers at the same time + sleep rand(30) + # zookeeper watcher is one-time trigger, and can be lost when disconnected # https://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkWatches # only need re-enable watcher on parent path and children list From 9d8f3fe903599bc50ee5e2e8e63644eb1d62f62c Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 17 Jun 2019 10:00:05 -0700 Subject: [PATCH 7/9] revert .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8516c4d5..3e528555 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: ruby cache: bundler sudo: false rvm: - - 1.9.3 + - 1.9.3-p551 - 2.0.0-p648 - 2.1.10 - 2.2.5 From af9fe2b7aaf9e744a2eeba82c57a317318cdf2d0 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 17 Jun 2019 10:01:18 -0700 Subject: [PATCH 8/9] Fix typo --- lib/synapse/service_watcher/zookeeper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 496795a8..ccf0350a 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -372,7 +372,7 @@ def zk_connect return end - # random backoff to avoid checking and refreshing all watchers at the same time + # random backoff to avoid refreshing all watchers at the same time sleep rand(30) # zookeeper watcher is one-time trigger, and can be lost when disconnected From 18ea63e9eb100da1ee2b5211bf9b9d2dfe2c5467 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 17 Jun 2019 14:22:20 -0700 Subject: [PATCH 9/9] Bump up version to 0.16.12 --- Gemfile.lock | 2 +- lib/synapse/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 6d409093..8bdd60e9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - synapse (0.16.11) + synapse (0.16.12) aws-sdk (~> 1.39) docker-api (~> 1.7) dogstatsd-ruby (~> 3.3.0) diff --git a/lib/synapse/version.rb b/lib/synapse/version.rb index 12e5f56e..61ae47fe 100644 --- a/lib/synapse/version.rb +++ b/lib/synapse/version.rb @@ -1,3 +1,3 @@ module Synapse - VERSION = "0.16.11" + VERSION = "0.16.12" end