diff --git a/Gemfile.lock b/Gemfile.lock index 6d409093..8bdd60e9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - synapse (0.16.11) + synapse (0.16.12) aws-sdk (~> 1.39) docker-api (~> 1.7) dogstatsd-ruby (~> 3.3.0) diff --git a/lib/synapse/service_watcher/zookeeper.rb b/lib/synapse/service_watcher/zookeeper.rb index 57d867c9..ccf0350a 100644 --- a/lib/synapse/service_watcher/zookeeper.rb +++ b/lib/synapse/service_watcher/zookeeper.rb @@ -363,6 +363,27 @@ def zk_connect zk_cleanup end + # handle session connected after reconnecting + # http://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkSessions + @zk.on_connected do + log.info "synapse: ZK client has reconnected #{@name}" + unless test_and_set_reconnect_time(Time.now) + log.info "synapse: ZK client skip since last reconnect is too close #{@name}" + return + end + + # random backoff to avoid refreshing all watchers at the same time + sleep rand(30) + + # zookeeper watcher is one-time trigger, and can be lost when disconnected + # https://zookeeper.apache.org/doc/r3.3.5/zookeeperProgrammers.html#ch_zkWatches + # only need re-enable watcher on parent path and children list + log.info "synapse: ZK client refresh watcher after reconnected #{@name}" + if @zk.exists?(@discovery['path'], :watch => true) + @zk.children(@discovery['path'], :watch => true) + end + end + # the path must exist, otherwise watch callbacks will not work statsd_time('synapse.watcher.zk.create_path.elapsed_time', ["zk_cluster:#{@zk_cluster}", "service_name:#{@name}"]) do create(@discovery['path']) @@ -373,6 +394,17 @@ def zk_connect end end + def test_and_set_reconnect_time(now) + # ensure there is only one refresh can happen within a time window + if !@last_reconnect_time.nil? && (now - @last_reconnect_time) < 60 + return false + end + # test-and-set should be thread safe based on per-callback model + # https://github.com/zk-ruby/zk/wiki/EventDeliveryModel + @last_reconnect_time = now + true + + end # decode the data at a zookeeper endpoint def deserialize_service_instance(data) log.debug "synapse: deserializing process data" diff --git a/lib/synapse/version.rb b/lib/synapse/version.rb index 12e5f56e..61ae47fe 100644 --- a/lib/synapse/version.rb +++ b/lib/synapse/version.rb @@ -1,3 +1,3 @@ module Synapse - VERSION = "0.16.11" + VERSION = "0.16.12" end diff --git a/spec/lib/synapse/service_watcher_zookeeper_spec.rb b/spec/lib/synapse/service_watcher_zookeeper_spec.rb index 0f2f9f4c..867719af 100644 --- a/spec/lib/synapse/service_watcher_zookeeper_spec.rb +++ b/spec/lib/synapse/service_watcher_zookeeper_spec.rb @@ -149,6 +149,17 @@ subject.send(:watcher_callback).call end + it 'test and set reconnect time' do + now = Time.now + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + now += 10 + expect(subject.send(:test_and_set_reconnect_time, now)).to be false + now += 50 + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + now += 60 + expect(subject.send(:test_and_set_reconnect_time, now)).to be true + end + it 'handles zk consistency issues' do expect(subject).to receive(:watch) expect(subject).to receive(:discover).and_call_original