Skip to content

Commit

Permalink
feat: migrate to redis for service discovery (#268)
Browse files Browse the repository at this point in the history
  • Loading branch information
stakach authored Apr 12, 2024
1 parent 4e71add commit 6f380a4
Show file tree
Hide file tree
Showing 15 changed files with 159 additions and 150 deletions.
23 changes: 1 addition & 22 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ x-deployment-env: &deployment-env
ENV: ${ENV:-development}
SG_ENV: ${SG_ENV:-development}

x-etcd-client-env: &etcd-client-env
ETCD_HOST: ${ETCD_HOST:-etcd}
ETCD_PORT: ${ETCD_PORT:-2379}

x-redis-client-env: &redis-client-env
REDIS_URL: ${REDIS_URL:-redis://redis:6379}

Expand Down Expand Up @@ -44,7 +40,6 @@ services:
- ${PWD}/spec:/app/spec
- ${PWD}/src:/app/src
depends_on:
- etcd
- redis
- postgres
- migrator
Expand All @@ -55,29 +50,13 @@ services:
# Environment
GITHUB_ACTION: ${GITHUB_ACTION:-}
# Service Hosts
<<: [*etcd-client-env,*redis-client-env, *postgresdb-client-env,*deployment-env, *build-api-env]
<<: [*redis-client-env, *postgresdb-client-env,*deployment-env, *build-api-env]

redis:
image: eqalpha/keydb
restart: always
hostname: redis

etcd:
image: quay.io/coreos/etcd:${ETCD_VERSION:-v3.5.4}
restart: always
hostname: etcd
environment:
ALLOW_NONE_AUTHENTICATION: "yes"
ETCD_NAME: "etcd"
ETCD_INITIAL_ADVERTISE_PEER_URLS: "http://etcd:2380"
ETCD_LISTEN_PEER_URLS: "http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS: "http://0.0.0.0:2379"
ETCD_ADVERTISE_CLIENT_URLS: "http://etcd:2379"
ETCD_INITIAL_CLUSTER_TOKEN: "etcd-cluster"
ETCD_INITIAL_CLUSTER=etcd: "http://etcd:2380"
ETCD_INITIAL_CLUSTER_STATE: "new"
TZ: $TZ

postgres:
hostname: postgres
image: postgres
Expand Down
20 changes: 6 additions & 14 deletions shard.lock
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ shards:
git: https://github.com/spider-gazelle/bindata.git
version: 2.0.0

clustering:
git: https://github.com/place-labs/clustering.git
version: 3.1.1

connect-proxy:
git: https://github.com/spider-gazelle/connect-proxy.git
version: 2.0.0
Expand Down Expand Up @@ -69,10 +65,6 @@ shards:
git: https://github.com/arcage/crystal-email.git
version: 0.7.0

etcd:
git: https://github.com/place-labs/crystal-etcd.git
version: 1.2.5

eventbus:
git: https://github.com/spider-gazelle/eventbus.git
version: 0.9.9+git.commit.ca8ef0c5e21ee15da079edd5bcea39bee7e07f26
Expand Down Expand Up @@ -109,10 +101,6 @@ shards:
git: https://github.com/jgaskins/hot_topic.git
version: 0.1.0+git.commit.3c901e77b6e000930398738260a2944b6f5785dc

hound-dog:
git: https://github.com/place-labs/hound-dog.git
version: 2.9.1

http-params-serializable:
git: https://github.com/place-labs/http-params-serializable.git
version: 0.5.0
Expand Down Expand Up @@ -211,15 +199,15 @@ shards:

placeos-driver:
git: https://github.com/placeos/driver.git
version: 6.9.19
version: 6.11.0+git.commit.9363dbec039d92ccc0ea5afdbaa549bc6bb14cdf

placeos-log-backend:
git: https://github.com/place-labs/log-backend.git
version: 0.11.4

placeos-models:
git: https://github.com/placeos/models.git
version: 9.42.2
version: 9.45.0

placeos-resource:
git: https://github.com/place-labs/resource.git
Expand All @@ -245,6 +233,10 @@ shards:
git: https://github.com/caspiano/redis-cluster.cr.git
version: 0.8.5

redis_service_manager:
git: https://github.com/place-labs/redis_service_manager.git
version: 3.1.1

rendezvous-hash:
git: https://github.com/caspiano/rendezvous-hash.git
version: 0.3.1
Expand Down
15 changes: 6 additions & 9 deletions shard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ dependencies:
github: spider-gazelle/action-controller
version: ~> 7.2

clustering:
github: place-labs/clustering
version: ~> 3.0

git-repository:
github: place-labs/git-repository

Expand All @@ -28,17 +24,13 @@ dependencies:
github: crystal-community/hardware
version: ~> 0.5

hound-dog:
github: place-labs/hound-dog
version: ~> 2.5

log_helper:
github: spider-gazelle/log_helper
version: ~> 1

placeos-driver:
github: placeos/driver
version: ~> 6.1
branch: master

placeos-log-backend:
github: place-labs/log-backend
Expand All @@ -60,6 +52,11 @@ dependencies:
github: caspiano/redis-cluster.cr
version: ">= 0.8.4"

# clustering service discovery
redis_service_manager:
github: place-labs/redis_service_manager
version: ">= 3.0.0"

responsible:
github: place-labs/responsible
version: ~> 1.2
Expand Down
76 changes: 62 additions & 14 deletions spec/helper.cr
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@ def clear_tables
PlaceOS::Model::Edge.clear
end

def clustering_mock
MockClustering.new("core")
end

def discovery_mock
DiscoveryMock.new("core", uri: CORE_URL)
Clustering::Discovery.new(clustering_mock)
end

def module_manager_mock
discovery = discovery_mock
clustering = MockClustering.new(uri: CORE_URL, discovery: discovery)
PlaceOS::Core::ModuleManager.new(CORE_URL, discovery: discovery, clustering: clustering)
PlaceOS::Core::ModuleManager.new(CORE_URL, clustering: clustering_mock)
end

macro around_suite(block)
Expand All @@ -55,7 +57,6 @@ end

around_suite ->{
clear_tables
HoundDog::Service.clear_namespace
}

PgORM::Database.configure { |_| }
Expand Down Expand Up @@ -151,24 +152,71 @@ def create_resources(process : Bool = true, use_head : Bool = false)
{repository, driver, mod, resource_manager}
end

class DiscoveryMock < HoundDog::Discovery
# reopen this for specs
class Clustering::Discovery
DOES_NOT_MAP = "<does-not-map>"

def own_node?(key : String) : Bool
key != DOES_NOT_MAP
end

def etcd_nodes
[@service_events.node].map &->HoundDog::Discovery.to_hash_value(HoundDog::Service::Node)
end
end

class MockClustering < Clustering
def start(&stabilize : Array(HoundDog::Service::Node) ->)
@stabilize = stabilize
stabilize.call([discovery.node])
NODE_ID = "----core1----"
NODE_URI = ENV["CORE_URL"]? || "http://core:3000"
VERSION = "1"

getter uri : String = NODE_URI
getter ulid : String = NODE_ID

# registers this node with the cluster as a member
def register : Bool
rendezvous_hash = rendezvous
@version = VERSION

ready_cb = Proc(Nil).new do
cluster_stable_callbacks.each do |callback|
spawn do
begin
callback.call
rescue error
Log.error(exception: error) { "notifying cluster stable" }
end
end
end
end

rebalance_callbacks.each do |callback|
spawn do
begin
callback.call(rendezvous_hash, ready_cb)
rescue error
Log.error(exception: error) { "performing rebalance callback" }
end
end
end
true
end

# removes this node from the cluster as a member
getter unregister : Bool = true

# is this node registered as part of the cluster
getter? registered : Bool = true

# is this class watching for changes to the cluster
# this should return true if registered returns true
getter? watching : Bool = true

# returns the list of known nodes
def rendezvous : RendezvousHash
RendezvousHash.new(nodes: [NODE_URI])
end

def stop
# returns a node_id => URI mapping
def node_hash : Hash(String, URI)
{
NODE_ID => URI.parse(NODE_URI),
}
end
end
6 changes: 2 additions & 4 deletions spec/mappings/control_system_modules_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@ module PlaceOS::Core::Mappings
end

def self.mocked_fail_manager
discovery = discovery_mock
clustering = MockClustering.new(uri: CORE_URL, discovery: discovery)
Mock.new(CORE_URL, discovery: discovery, clustering: clustering)
Mock.new(CORE_URL, clustering_mock)
end

describe ControlSystemModules, tags: "mappings" do
describe ".update_mapping" do
it "ignores systems not mapped to node" do
control_system = Model::Generator.control_system
control_system.id = DiscoveryMock::DOES_NOT_MAP
control_system.id = Clustering::Discovery::DOES_NOT_MAP
control_system_modules = ControlSystemModules.new(module_manager: module_manager_mock, startup: false)
control_system_modules.process_resource(:updated, control_system).skipped?.should be_true
end
Expand Down
13 changes: 5 additions & 8 deletions spec/module_manager_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,7 @@ module PlaceOS::Core
end

describe "startup" do
it "registers to etcd" do
# Remove metadata in etcd
namespace = HoundDog.settings.service_namespace
HoundDog.etcd_client.kv.delete_prefix(namespace)

it "registers to redis" do
# Clear relevant tables
Model::Driver.clear
Model::Module.clear
Expand All @@ -70,13 +66,14 @@ module PlaceOS::Core
sleep 3

# Check that the node is registered in etcd
module_manager.discovery.nodes.map(&.[:name]).should contain(module_manager.discovery.name)
core_uri = URI.parse(CORE_URL)
module_manager.discovery.nodes.should contain(core_uri)

module_manager.discovery.unregister
module_manager.stop
sleep 0.1

# Check that the node is no longer registered in etcd
module_manager.discovery.nodes.map(&.[:name]).should_not contain(module_manager.discovery.name)
module_manager.discovery.nodes.should_not contain(core_uri)
ensure
module_manager.try &.stop
end
Expand Down
7 changes: 0 additions & 7 deletions src/config.cr
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,6 @@ require "./telemetry"
# Server required after application controllers
require "action-controller/server"

# Configure Service discovery
HoundDog.configure do |settings|
settings.service_namespace = "core"
settings.etcd_host = PlaceOS::Core::ETCD_HOST
settings.etcd_port = PlaceOS::Core::ETCD_PORT
end

# Filter out sensitive params that shouldn't be logged
filter_params = ["password", "bearer_token"]

Expand Down
7 changes: 4 additions & 3 deletions src/constants.cr
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ module PlaceOS::Core

DRIVERS = ENV["ENGINE_DRIVERS"]? || File.join(PlaceOS::Compiler.repository_dir, "drivers")

ETCD_HOST = ENV["ETCD_HOST"]? || "localhost"
ETCD_PORT = (ENV["ETCD_PORT"]? || 2379).to_i

REDIS_URL = ENV["REDIS_URL"]? || "redis://localhost:6379"

# seconds before a node is considered offline
# should not be divisible by 3
CLUSTER_NODE_TTL = (ENV["CLUSTER_NODE_TTL"]? || "20").to_i

# `core` self-registers to etcd with this information.
# In k8s we can grab the Pod information from the environment
# https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-pod-fields-as-values-for-environment-variables
Expand Down
2 changes: 1 addition & 1 deletion src/core-app.cr
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Signal::INT.trap &terminate
# Docker containers use the term signal
Signal::TERM.trap &terminate

# Wait for etcd, redis, and postgres to be ready
# Wait for redis and postgres to be ready
PlaceOS::Core.wait_for_resources

spawn(same_thread: true) do
Expand Down
5 changes: 4 additions & 1 deletion src/placeos-core.cr
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ module PlaceOS::Core
LOGSTASH_HOST = ENV["LOGSTASH_HOST"]?
LOGSTASH_PORT = ENV["LOGSTASH_PORT"]?

# Minimize the number of connections being made to redis
REDIS_LOCK = Driver::RedisStorage.redis_lock
REDIS_CLIENT = Driver::RedisStorage.shared_redis_client

def self.log_backend
if !(logstash_host = LOGSTASH_HOST.presence).nil?
logstash_port = LOGSTASH_PORT.try(&.to_i?) || abort("LOGSTASH_PORT is either malformed or not present in environment")
Expand Down Expand Up @@ -48,7 +52,6 @@ module PlaceOS::Core
end

# Wait for the upstream services to be ready
# - etcd
# - redis
# - postgres
def self.wait_for_resources
Expand Down
3 changes: 0 additions & 3 deletions src/placeos-core/healthcheck.cr
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ module PlaceOS::Core::Healthcheck
Promise.defer {
check_resource?("redis") { ::PlaceOS::Driver::RedisStorage.with_redis &.ping }
},
Promise.defer {
check_resource?("etcd") { ModuleManager.instance.discovery.etcd(&.maintenance.status) }
},
Promise.defer {
check_resource?("postgres") { pg_healthcheck }
},
Expand Down
Loading

0 comments on commit 6f380a4

Please sign in to comment.