From cb529d0c81411625005ced05bd4e36bfb26da25d Mon Sep 17 00:00:00 2001
From: morvencao <lcao@redhat.com>
Date: Thu, 12 Dec 2024 10:15:21 +0000
Subject: [PATCH 1/3] ensure maestro instance ready after adding to hash ring.

Signed-off-by: morvencao <lcao@redhat.com>
---
 cmd/maestro/server/healthcheck_server.go      | 73 ++++++++++++++-----
 cmd/maestro/server/pulse_server.go            | 16 +++-
 go.mod                                        |  2 -
 go.sum                                        |  2 -
 pkg/api/server_instance.go                    |  4 +
 pkg/dao/instance.go                           | 10 +++
 pkg/dao/mocks/instance.go                     | 11 +++
 .../202401151014_add_server_instances.go      |  4 +
 8 files changed, 96 insertions(+), 26 deletions(-)

diff --git a/cmd/maestro/server/healthcheck_server.go b/cmd/maestro/server/healthcheck_server.go
index ed099c1a..c4d75db5 100755
--- a/cmd/maestro/server/healthcheck_server.go
+++ b/cmd/maestro/server/healthcheck_server.go
@@ -6,37 +6,38 @@ import (
 	"net"
 	"net/http"
 
-	health "github.com/docker/go-healthcheck"
 	"github.com/gorilla/mux"
+	"github.com/openshift-online/maestro/pkg/dao"
 	"k8s.io/klog/v2"
 )
 
-var (
-	updater = health.NewStatusUpdater()
-)
-
 var _ Server = &healthCheckServer{}
 
 type healthCheckServer struct {
-	httpServer *http.Server
+	httpServer  *http.Server
+	instanceDao dao.InstanceDao
+	instanceID  string
+	brokerType  string
 }
 
 func NewHealthCheckServer() *healthCheckServer {
 	router := mux.NewRouter()
-	health.DefaultRegistry = health.NewRegistry()
-	health.Register("maintenance_status", updater)
-	router.HandleFunc("/healthcheck", health.StatusHandler).Methods(http.MethodGet)
-	router.HandleFunc("/healthcheck/down", downHandler).Methods(http.MethodPost)
-	router.HandleFunc("/healthcheck/up", upHandler).Methods(http.MethodPost)
-
 	srv := &http.Server{
 		Handler: router,
 		Addr:    env().Config.HTTPServer.Hostname + ":" + env().Config.HealthCheck.BindPort,
 	}
 
-	return &healthCheckServer{
-		httpServer: srv,
+	sessionFactory := env().Database.SessionFactory
+	server := &healthCheckServer{
+		httpServer:  srv,
+		instanceDao: dao.NewInstanceDao(&sessionFactory),
+		instanceID:  env().Config.MessageBroker.ClientID,
+		brokerType:  env().Config.MessageBroker.MessageBrokerType,
 	}
+
+	router.HandleFunc("/healthcheck", server.healthCheckHandler).Methods(http.MethodGet)
+
+	return server
 }
 
 func (s healthCheckServer) Start() {
@@ -73,10 +74,44 @@ func (s healthCheckServer) Listen() (listener net.Listener, err error) {
 func (s healthCheckServer) Serve(listener net.Listener) {
 }
 
-func upHandler(w http.ResponseWriter, r *http.Request) {
-	updater.Update(nil)
-}
+// healthCheckHandler returns a 200 OK if the instance is ready, 503 Service Unavailable otherwise.
+func (s healthCheckServer) healthCheckHandler(w http.ResponseWriter, r *http.Request) {
+	// For MQTT, check if the instance is ready
+	if s.brokerType == "mqtt" {
+		instance, err := s.instanceDao.Get(r.Context(), s.instanceID)
+		if err != nil {
+			klog.Errorf("Error getting instance: %v", err)
+			w.WriteHeader(http.StatusInternalServerError)
+			_, err := w.Write([]byte(`{"status": "error"}`))
+			if err != nil {
+				klog.Errorf("Error writing healthcheck response: %v", err)
+			}
+			return
+		}
+		if instance.Ready {
+			klog.Infof("Instance is ready")
+			w.WriteHeader(http.StatusOK)
+			_, err := w.Write([]byte(`{"status": "ok"}`))
+			if err != nil {
+				klog.Errorf("Error writing healthcheck response: %v", err)
+			}
+			return
+		}
+
+		klog.Infof("Instance not ready")
+		w.WriteHeader(http.StatusServiceUnavailable)
+		_, err = w.Write([]byte(`{"status": "not ready"}`))
+		if err != nil {
+			klog.Errorf("Error writing healthcheck response: %v", err)
+		}
+		return
+	}
 
-func downHandler(w http.ResponseWriter, r *http.Request) {
-	updater.Update(fmt.Errorf("maintenance mode"))
+	// For gRPC broker, return 200 OK for now
+	klog.Infof("Instance is ready")
+	w.WriteHeader(http.StatusOK)
+	_, err := w.Write([]byte(`{"status": "ok"}`))
+	if err != nil {
+		klog.Errorf("Error writing healthcheck response: %v", err)
+	}
 }
diff --git a/cmd/maestro/server/pulse_server.go b/cmd/maestro/server/pulse_server.go
index e49efc06..e8590c72 100644
--- a/cmd/maestro/server/pulse_server.go
+++ b/cmd/maestro/server/pulse_server.go
@@ -116,9 +116,9 @@ func (s *PulseServer) pulse(ctx context.Context) {
 	log.V(10).Infof("Updating heartbeat for maestro instance: %s", s.instanceID)
 	instance := &api.ServerInstance{
 		Meta: api.Meta{
-			ID:        s.instanceID,
-			UpdatedAt: time.Now(),
+			ID: s.instanceID,
 		},
+		LastPulse: time.Now(),
 	}
 	_, err := s.instanceDao.UpSert(ctx, instance)
 	if err != nil {
@@ -149,13 +149,16 @@ func (s *PulseServer) checkInstances(ctx context.Context) {
 		return
 	}
 
+	activeInstanceIDs := []string{}
 	inactiveInstanceIDs := []string{}
 	for _, instance := range instances {
 		// Instances pulsing within the last three check intervals are considered as active.
-		if instance.UpdatedAt.After(time.Now().Add(time.Duration(int64(-3*time.Second) * s.pulseInterval))) {
+		if instance.LastPulse.After(time.Now().Add(time.Duration(int64(-3*time.Second) * s.pulseInterval))) {
 			if err := s.statusDispatcher.OnInstanceUp(instance.ID); err != nil {
 				log.Error(fmt.Sprintf("Error to call OnInstanceUp handler for maestro instance %s: %s", instance.ID, err.Error()))
 			}
+			// mark the instance as active after it is added to the status dispatcher
+			activeInstanceIDs = append(activeInstanceIDs, instance.ID)
 		} else {
 			if err := s.statusDispatcher.OnInstanceDown(instance.ID); err != nil {
 				log.Error(fmt.Sprintf("Error to call OnInstanceDown handler for maestro instance %s: %s", instance.ID, err.Error()))
@@ -165,6 +168,13 @@ func (s *PulseServer) checkInstances(ctx context.Context) {
 		}
 	}
 
+	if len(activeInstanceIDs) > 0 {
+		// batch mark active instances
+		if err := s.instanceDao.MarkReadyByIDs(ctx, activeInstanceIDs); err != nil {
+			log.Error(fmt.Sprintf("Unable to mark active maestro instances (%s): %s", activeInstanceIDs, err.Error()))
+		}
+	}
+
 	if len(inactiveInstanceIDs) > 0 {
 		// batch delete inactive instances
 		if err := s.instanceDao.DeleteByIDs(ctx, inactiveInstanceIDs); err != nil {
diff --git a/go.mod b/go.mod
index cbc03d3d..71f37b22 100755
--- a/go.mod
+++ b/go.mod
@@ -13,7 +13,6 @@ require (
 	github.com/cespare/xxhash v1.1.0
 	github.com/cloudevents/sdk-go/v2 v2.15.3-0.20240911135016-682f3a9684e4
 	github.com/deckarep/golang-set/v2 v2.6.0
-	github.com/docker/go-healthcheck v0.1.0
 	github.com/evanphx/json-patch v5.9.0+incompatible
 	github.com/getsentry/sentry-go v0.20.0
 	github.com/ghodss/yaml v1.0.0
@@ -79,7 +78,6 @@ require (
 	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect
-	github.com/docker/distribution v2.8.1+incompatible // indirect
 	github.com/eclipse/paho.golang v0.21.0 // indirect
 	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
 	github.com/felixge/fgprof v0.9.4 // indirect
diff --git a/go.sum b/go.sum
index 8167e651..ec98d49f 100644
--- a/go.sum
+++ b/go.sum
@@ -113,8 +113,6 @@ github.com/docker/docker v20.10.17+incompatible h1:JYCuMrWaVNophQTOrMMoSwudOVEfc
 github.com/docker/docker v20.10.17+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
 github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
 github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
-github.com/docker/go-healthcheck v0.1.0 h1:6ZrRr63F5LLsPwSlbZgjgoxNu+o1VlMIhCQWgbfrgU0=
-github.com/docker/go-healthcheck v0.1.0/go.mod h1:3v7a0338vhH6WnYFtUd66S+9QK3M6xK4sKr7gGrht6o=
 github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
diff --git a/pkg/api/server_instance.go b/pkg/api/server_instance.go
index f9f74fef..d737e61d 100644
--- a/pkg/api/server_instance.go
+++ b/pkg/api/server_instance.go
@@ -1,11 +1,15 @@
 package api
 
+import "time"
+
 // ServerInstance is employed by Maestro to discover active server instances. The updatedAt field
 // determines the liveness of the instance; if the instance remains unchanged for three consecutive
 // check intervals (30 seconds by default), it is marked as dead.
 // However, it is not meant for direct exposure to end users through the API.
 type ServerInstance struct {
 	Meta
+	LastPulse time.Time // LastPulse indicates the last time the instance pulsed.
+	Ready     bool      // Ready indicates whether the instance is ready to serve requests.
 }
 
 type ServerInstanceList []*ServerInstance
diff --git a/pkg/dao/instance.go b/pkg/dao/instance.go
index db8e35ad..e51ee1bb 100644
--- a/pkg/dao/instance.go
+++ b/pkg/dao/instance.go
@@ -15,6 +15,7 @@ type InstanceDao interface {
 	Create(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
 	Replace(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
 	UpSert(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
+	MarkReadyByIDs(ctx context.Context, ids []string) error
 	Delete(ctx context.Context, id string) error
 	DeleteByIDs(ctx context.Context, ids []string) error
 	FindByIDs(ctx context.Context, ids []string) (api.ServerInstanceList, error)
@@ -68,6 +69,15 @@ func (d *sqlInstanceDao) UpSert(ctx context.Context, instance *api.ServerInstanc
 	return instance, nil
 }
 
+func (d *sqlInstanceDao) MarkReadyByIDs(ctx context.Context, ids []string) error {
+	g2 := (*d.sessionFactory).New(ctx)
+	if err := g2.Model(&api.ServerInstance{}).Where("id in (?)", ids).Update("ready", true).Error; err != nil {
+		db.MarkForRollback(ctx, err)
+		return err
+	}
+	return nil
+}
+
 func (d *sqlInstanceDao) Delete(ctx context.Context, id string) error {
 	g2 := (*d.sessionFactory).New(ctx)
 	if err := g2.Omit(clause.Associations).Delete(&api.ServerInstance{Meta: api.Meta{ID: id}}).Error; err != nil {
diff --git a/pkg/dao/mocks/instance.go b/pkg/dao/mocks/instance.go
index 7d3076bf..9723f9ad 100644
--- a/pkg/dao/mocks/instance.go
+++ b/pkg/dao/mocks/instance.go
@@ -67,6 +67,17 @@ func (d *instanceDaoMock) UpSert(ctx context.Context, instance *api.ServerInstan
 	return instance, nil
 }
 
+func (d *instanceDaoMock) MarkReadyByIDs(ctx context.Context, ids []string) error {
+	d.mux.Lock()
+	defer d.mux.Unlock()
+	for _, instance := range d.instances {
+		if contains(ids, instance.ID) {
+			instance.Ready = true
+		}
+	}
+	return nil
+}
+
 func (d *instanceDaoMock) Delete(ctx context.Context, ID string) error {
 	d.mux.Lock()
 	defer d.mux.Unlock()
diff --git a/pkg/db/migrations/202401151014_add_server_instances.go b/pkg/db/migrations/202401151014_add_server_instances.go
index 281481a4..3de9fbc8 100644
--- a/pkg/db/migrations/202401151014_add_server_instances.go
+++ b/pkg/db/migrations/202401151014_add_server_instances.go
@@ -1,6 +1,8 @@
 package migrations
 
 import (
+	"time"
+
 	"gorm.io/gorm"
 
 	"github.com/go-gormigrate/gormigrate/v2"
@@ -9,6 +11,8 @@ import (
 func addServerInstances() *gormigrate.Migration {
 	type ServerInstance struct {
 		Model
+		LastPulse time.Time
+		Ready     bool `gorm:"default:false"`
 	}
 
 	return &gormigrate.Migration{

From 146d798941992c196a8b4505ccf2b88e36420f63 Mon Sep 17 00:00:00 2001
From: morvencao <lcao@redhat.com>
Date: Fri, 13 Dec 2024 08:43:13 +0000
Subject: [PATCH 2/3] move instance check to healthcheck server.

Signed-off-by: morvencao <lcao@redhat.com>
---
 cmd/maestro/servecmd/cmd.go                   |  38 +++-
 .../{pulse_server.go => event_server.go}      | 117 +---------
 cmd/maestro/server/healthcheck_server.go      | 200 +++++++++++++-----
 pkg/api/server_instance.go                    |   4 +-
 pkg/config/config.go                          |   8 +-
 .../{pulse_server.go => event_server.go}      |  19 +-
 ...se_server_test.go => event_server_test.go} |  17 +-
 pkg/config/health_check.go                    |  11 +-
 pkg/dao/instance.go                           |  14 +-
 pkg/dao/mocks/instance.go                     |  11 +
 .../202401151014_add_server_instances.go      |   4 +-
 test/helper.go                                |  40 ++--
 ...lse_server_test.go => status_hash_test.go} |  27 ++-
 13 files changed, 276 insertions(+), 234 deletions(-)
 rename cmd/maestro/server/{pulse_server.go => event_server.go} (66%)
 rename pkg/config/{pulse_server.go => event_server.go} (83%)
 rename pkg/config/{pulse_server_test.go => event_server_test.go} (82%)
 rename test/integration/{pulse_server_test.go => status_hash_test.go} (86%)

diff --git a/cmd/maestro/servecmd/cmd.go b/cmd/maestro/servecmd/cmd.go
index c0f41c30..876da6e6 100755
--- a/cmd/maestro/servecmd/cmd.go
+++ b/cmd/maestro/servecmd/cmd.go
@@ -11,6 +11,9 @@ import (
 
 	"github.com/openshift-online/maestro/cmd/maestro/environments"
 	"github.com/openshift-online/maestro/cmd/maestro/server"
+	"github.com/openshift-online/maestro/pkg/config"
+	"github.com/openshift-online/maestro/pkg/dao"
+	"github.com/openshift-online/maestro/pkg/dispatcher"
 	"github.com/openshift-online/maestro/pkg/event"
 )
 
@@ -35,6 +38,8 @@ func runServer(cmd *cobra.Command, args []string) {
 		klog.Fatalf("Unable to initialize environment: %s", err.Error())
 	}
 
+	healthcheckServer := server.NewHealthCheckServer()
+
 	// Create event broadcaster to broadcast resource status update events to subscribers
 	eventBroadcaster := event.NewEventBroadcaster()
 
@@ -42,17 +47,34 @@ func runServer(cmd *cobra.Command, args []string) {
 	// For gRPC, create a gRPC broker to handle resource spec and status events.
 	// For MQTT, create a Pulse server to handle resource spec and status events.
 	var eventServer server.EventServer
-	if environments.Environment().Config.MessageBroker.MessageBrokerType == "grpc" {
+	switch environments.Environment().Config.MessageBroker.MessageBrokerType {
+	case "mqtt":
+		klog.Info("Setting up pulse server")
+		var statusDispatcher dispatcher.Dispatcher
+		subscriptionType := environments.Environment().Config.EventServer.SubscriptionType
+		switch config.SubscriptionType(subscriptionType) {
+		case config.SharedSubscriptionType:
+			statusDispatcher = dispatcher.NewNoopDispatcher(dao.NewConsumerDao(&environments.Environment().Database.SessionFactory), environments.Environment().Clients.CloudEventsSource)
+		case config.BroadcastSubscriptionType:
+			statusDispatcher = dispatcher.NewHashDispatcher(environments.Environment().Config.MessageBroker.ClientID, dao.NewInstanceDao(&environments.Environment().Database.SessionFactory),
+				dao.NewConsumerDao(&environments.Environment().Database.SessionFactory), environments.Environment().Clients.CloudEventsSource, environments.Environment().Config.EventServer.ConsistentHashConfig)
+		default:
+			klog.Errorf("Unsupported subscription type: %s", subscriptionType)
+		}
+
+		// Set the status dispatcher for the healthcheck server
+		healthcheckServer.SetStatusDispatcher(statusDispatcher)
+		eventServer = server.NewMQTTEventServer(eventBroadcaster, statusDispatcher)
+	case "grpc":
 		klog.Info("Setting up grpc broker")
 		eventServer = server.NewGRPCBroker(eventBroadcaster)
-	} else {
-		klog.Info("Setting up pulse server")
-		eventServer = server.NewPulseServer(eventBroadcaster)
+	default:
+		klog.Errorf("Unsupported message broker type: %s", environments.Environment().Config.MessageBroker.MessageBrokerType)
 	}
+
 	// Create the servers
 	apiserver := server.NewAPIServer(eventBroadcaster)
 	metricsServer := server.NewMetricsServer()
-	healthcheckServer := server.NewHealthCheckServer()
 	controllersServer := server.NewControllersServer(eventServer)
 
 	ctx, cancel := context.WithCancel(context.Background())
@@ -70,10 +92,6 @@ func runServer(cmd *cobra.Command, args []string) {
 		if err := metricsServer.Stop(); err != nil {
 			klog.Errorf("Failed to stop metrics server, %v", err)
 		}
-
-		if err := healthcheckServer.Stop(); err != nil {
-			klog.Errorf("Failed to stop healthcheck server, %v", err)
-		}
 	}()
 
 	// Start the event broadcaster
@@ -82,7 +100,7 @@ func runServer(cmd *cobra.Command, args []string) {
 	// Run the servers
 	go apiserver.Start()
 	go metricsServer.Start()
-	go healthcheckServer.Start()
+	go healthcheckServer.Start(ctx)
 	go eventServer.Start(ctx)
 	go controllersServer.Start(ctx)
 
diff --git a/cmd/maestro/server/pulse_server.go b/cmd/maestro/server/event_server.go
similarity index 66%
rename from cmd/maestro/server/pulse_server.go
rename to cmd/maestro/server/event_server.go
index e8590c72..77ba990b 100644
--- a/cmd/maestro/server/pulse_server.go
+++ b/cmd/maestro/server/event_server.go
@@ -3,11 +3,9 @@ package server
 import (
 	"context"
 	"fmt"
-	"time"
 
 	"github.com/openshift-online/maestro/pkg/api"
 	"github.com/openshift-online/maestro/pkg/client/cloudevents"
-	"github.com/openshift-online/maestro/pkg/config"
 	"github.com/openshift-online/maestro/pkg/dao"
 	"github.com/openshift-online/maestro/pkg/db"
 	"github.com/openshift-online/maestro/pkg/dispatcher"
@@ -15,8 +13,6 @@ import (
 	"github.com/openshift-online/maestro/pkg/logger"
 	"github.com/openshift-online/maestro/pkg/services"
 	"k8s.io/apimachinery/pkg/api/meta"
-	"k8s.io/apimachinery/pkg/util/wait"
-	"k8s.io/klog/v2"
 	"open-cluster-management.io/sdk-go/pkg/cloudevents/generic/types"
 	"open-cluster-management.io/sdk-go/pkg/cloudevents/work/common"
 	workpayload "open-cluster-management.io/sdk-go/pkg/cloudevents/work/payload"
@@ -45,16 +41,14 @@ type EventServer interface {
 	OnStatusUpdate(ctx context.Context, eventID, resourceID string) error
 }
 
-var _ EventServer = &PulseServer{}
+var _ EventServer = &MQTTEventServer{}
 
-// PulseServer represents a server responsible for publish resource spec events from
+// MQTTEventServer represents a server responsible for publish resource spec events from
 // resource controller and handle resource status update events from the maestro agent.
 // It also periodic heartbeat updates and checking the liveness of Maestro instances,
 // triggering status resync based on instances' status and other conditions.
-type PulseServer struct {
+type MQTTEventServer struct {
 	instanceID         string
-	pulseInterval      int64
-	instanceDao        dao.InstanceDao
 	eventInstanceDao   dao.EventInstanceDao
 	lockFactory        db.LockFactory
 	eventBroadcaster   *event.EventBroadcaster // event broadcaster to broadcast resource status update events to subscribers
@@ -64,22 +58,10 @@ type PulseServer struct {
 	statusDispatcher   dispatcher.Dispatcher
 }
 
-func NewPulseServer(eventBroadcaster *event.EventBroadcaster) EventServer {
-	var statusDispatcher dispatcher.Dispatcher
-	switch config.SubscriptionType(env().Config.PulseServer.SubscriptionType) {
-	case config.SharedSubscriptionType:
-		statusDispatcher = dispatcher.NewNoopDispatcher(dao.NewConsumerDao(&env().Database.SessionFactory), env().Clients.CloudEventsSource)
-	case config.BroadcastSubscriptionType:
-		statusDispatcher = dispatcher.NewHashDispatcher(env().Config.MessageBroker.ClientID, dao.NewInstanceDao(&env().Database.SessionFactory),
-			dao.NewConsumerDao(&env().Database.SessionFactory), env().Clients.CloudEventsSource, env().Config.PulseServer.ConsistentHashConfig)
-	default:
-		klog.Fatalf("Unsupported subscription type: %s", env().Config.PulseServer.SubscriptionType)
-	}
+func NewMQTTEventServer(eventBroadcaster *event.EventBroadcaster, statusDispatcher dispatcher.Dispatcher) EventServer {
 	sessionFactory := env().Database.SessionFactory
-	return &PulseServer{
+	return &MQTTEventServer{
 		instanceID:         env().Config.MessageBroker.ClientID,
-		pulseInterval:      env().Config.PulseServer.PulseInterval,
-		instanceDao:        dao.NewInstanceDao(&sessionFactory),
 		eventInstanceDao:   dao.NewEventInstanceDao(&sessionFactory),
 		lockFactory:        db.NewAdvisoryLockFactory(sessionFactory),
 		eventBroadcaster:   eventBroadcaster,
@@ -93,7 +75,7 @@ func NewPulseServer(eventBroadcaster *event.EventBroadcaster) EventServer {
 // Start initializes and runs the pulse server, updating and checking Maestro instances' liveness,
 // initializes subscription to status update messages and triggers status resync based on
 // instances' status and other conditions.
-func (s *PulseServer) Start(ctx context.Context) {
+func (s *MQTTEventServer) Start(ctx context.Context) {
 	log.Infof("Starting pulse server")
 
 	// start subscribing to resource status update messages.
@@ -101,91 +83,14 @@ func (s *PulseServer) Start(ctx context.Context) {
 	// start the status dispatcher
 	go s.statusDispatcher.Start(ctx)
 
-	// start a goroutine to periodically update heartbeat for the current maestro instance
-	go wait.UntilWithContext(ctx, s.pulse, time.Duration(s.pulseInterval*int64(time.Second)))
-
-	// start a goroutine to periodically check the liveness of maestro instances
-	go wait.UntilWithContext(ctx, s.checkInstances, time.Duration(s.pulseInterval/3*int64(time.Second)))
-
 	// wait until context is canceled
 	<-ctx.Done()
 	log.Infof("Shutting down pulse server")
 }
 
-func (s *PulseServer) pulse(ctx context.Context) {
-	log.V(10).Infof("Updating heartbeat for maestro instance: %s", s.instanceID)
-	instance := &api.ServerInstance{
-		Meta: api.Meta{
-			ID: s.instanceID,
-		},
-		LastPulse: time.Now(),
-	}
-	_, err := s.instanceDao.UpSert(ctx, instance)
-	if err != nil {
-		log.Error(fmt.Sprintf("Unable to upsert maestro instance: %s", err.Error()))
-	}
-}
-
-func (s *PulseServer) checkInstances(ctx context.Context) {
-	log.V(10).Infof("Checking liveness of maestro instances")
-	// lock the Instance with a fail-fast advisory lock context.
-	// this allows concurrent processing of many instances by one or more maestro instances exclusively.
-	lockOwnerID, acquired, err := s.lockFactory.NewNonBlockingLock(ctx, "maestro-instances-pulse-check", db.Instances)
-	// Ensure that the transaction related to this lock always end.
-	defer s.lockFactory.Unlock(ctx, lockOwnerID)
-	if err != nil {
-		log.Error(fmt.Sprintf("error obtaining the instance lock: %v", err))
-		return
-	}
-	// skip if the lock is not acquired
-	if !acquired {
-		log.V(4).Infof("failed to acquire the lock as another maestro instance is checking instances, skip")
-		return
-	}
-
-	instances, err := s.instanceDao.All(ctx)
-	if err != nil {
-		log.Error(fmt.Sprintf("Unable to get all maestro instances: %s", err.Error()))
-		return
-	}
-
-	activeInstanceIDs := []string{}
-	inactiveInstanceIDs := []string{}
-	for _, instance := range instances {
-		// Instances pulsing within the last three check intervals are considered as active.
-		if instance.LastPulse.After(time.Now().Add(time.Duration(int64(-3*time.Second) * s.pulseInterval))) {
-			if err := s.statusDispatcher.OnInstanceUp(instance.ID); err != nil {
-				log.Error(fmt.Sprintf("Error to call OnInstanceUp handler for maestro instance %s: %s", instance.ID, err.Error()))
-			}
-			// mark the instance as active after it is added to the status dispatcher
-			activeInstanceIDs = append(activeInstanceIDs, instance.ID)
-		} else {
-			if err := s.statusDispatcher.OnInstanceDown(instance.ID); err != nil {
-				log.Error(fmt.Sprintf("Error to call OnInstanceDown handler for maestro instance %s: %s", instance.ID, err.Error()))
-			} else {
-				inactiveInstanceIDs = append(inactiveInstanceIDs, instance.ID)
-			}
-		}
-	}
-
-	if len(activeInstanceIDs) > 0 {
-		// batch mark active instances
-		if err := s.instanceDao.MarkReadyByIDs(ctx, activeInstanceIDs); err != nil {
-			log.Error(fmt.Sprintf("Unable to mark active maestro instances (%s): %s", activeInstanceIDs, err.Error()))
-		}
-	}
-
-	if len(inactiveInstanceIDs) > 0 {
-		// batch delete inactive instances
-		if err := s.instanceDao.DeleteByIDs(ctx, inactiveInstanceIDs); err != nil {
-			log.Error(fmt.Sprintf("Unable to delete inactive maestro instances (%s): %s", inactiveInstanceIDs, err.Error()))
-		}
-	}
-}
-
 // startSubscription initiates the subscription to resource status update messages.
 // It runs asynchronously in the background until the provided context is canceled.
-func (s *PulseServer) startSubscription(ctx context.Context) {
+func (s *MQTTEventServer) startSubscription(ctx context.Context) {
 	s.sourceClient.Subscribe(ctx, func(action types.ResourceAction, resource *api.Resource) error {
 		log.V(4).Infof("received action %s for resource %s", action, resource.ID)
 
@@ -210,17 +115,17 @@ func (s *PulseServer) startSubscription(ctx context.Context) {
 }
 
 // OnCreate will be called on each new resource creation event inserted into db.
-func (s *PulseServer) OnCreate(ctx context.Context, resourceID string) error {
+func (s *MQTTEventServer) OnCreate(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnCreate(ctx, resourceID)
 }
 
 // OnUpdate will be called on each new resource update event inserted into db.
-func (s *PulseServer) OnUpdate(ctx context.Context, resourceID string) error {
+func (s *MQTTEventServer) OnUpdate(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnUpdate(ctx, resourceID)
 }
 
 // OnDelete will be called on each new resource deletion event inserted into db.
-func (s *PulseServer) OnDelete(ctx context.Context, resourceID string) error {
+func (s *MQTTEventServer) OnDelete(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnDelete(ctx, resourceID)
 }
 
@@ -228,7 +133,7 @@ func (s *PulseServer) OnDelete(ctx context.Context, resourceID string) error {
 // It does two things:
 // 1. build the resource status and broadcast it to subscribers
 // 2. add the event instance record to mark the event has been processed by the current instance
-func (s *PulseServer) OnStatusUpdate(ctx context.Context, eventID, resourceID string) error {
+func (s *MQTTEventServer) OnStatusUpdate(ctx context.Context, eventID, resourceID string) error {
 	statusEvent, sErr := s.statusEventService.Get(ctx, eventID)
 	if sErr != nil {
 		return fmt.Errorf("failed to get status event %s: %s", eventID, sErr.Error())
diff --git a/cmd/maestro/server/healthcheck_server.go b/cmd/maestro/server/healthcheck_server.go
index c4d75db5..07bed664 100755
--- a/cmd/maestro/server/healthcheck_server.go
+++ b/cmd/maestro/server/healthcheck_server.go
@@ -2,25 +2,32 @@ package server
 
 import (
 	"context"
+	e "errors"
 	"fmt"
-	"net"
 	"net/http"
+	"time"
 
 	"github.com/gorilla/mux"
+	"github.com/openshift-online/maestro/pkg/api"
 	"github.com/openshift-online/maestro/pkg/dao"
+	"github.com/openshift-online/maestro/pkg/db"
+	"github.com/openshift-online/maestro/pkg/dispatcher"
+	"gorm.io/gorm"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/klog/v2"
 )
 
-var _ Server = &healthCheckServer{}
-
-type healthCheckServer struct {
-	httpServer  *http.Server
-	instanceDao dao.InstanceDao
-	instanceID  string
-	brokerType  string
+type HealthCheckServer struct {
+	httpServer        *http.Server
+	statusDispatcher  dispatcher.Dispatcher
+	lockFactory       db.LockFactory
+	instanceDao       dao.InstanceDao
+	instanceID        string
+	heartbeatInterval int
+	brokerType        string
 }
 
-func NewHealthCheckServer() *healthCheckServer {
+func NewHealthCheckServer() *HealthCheckServer {
 	router := mux.NewRouter()
 	srv := &http.Server{
 		Handler: router,
@@ -28,11 +35,13 @@ func NewHealthCheckServer() *healthCheckServer {
 	}
 
 	sessionFactory := env().Database.SessionFactory
-	server := &healthCheckServer{
-		httpServer:  srv,
-		instanceDao: dao.NewInstanceDao(&sessionFactory),
-		instanceID:  env().Config.MessageBroker.ClientID,
-		brokerType:  env().Config.MessageBroker.MessageBrokerType,
+	server := &HealthCheckServer{
+		httpServer:        srv,
+		lockFactory:       db.NewAdvisoryLockFactory(sessionFactory),
+		instanceDao:       dao.NewInstanceDao(&sessionFactory),
+		instanceID:        env().Config.MessageBroker.ClientID,
+		heartbeatInterval: env().Config.HealthCheck.HeartbeartInterval,
+		brokerType:        env().Config.MessageBroker.MessageBrokerType,
 	}
 
 	router.HandleFunc("/healthcheck", server.healthCheckHandler).Methods(http.MethodGet)
@@ -40,7 +49,19 @@ func NewHealthCheckServer() *healthCheckServer {
 	return server
 }
 
-func (s healthCheckServer) Start() {
+func (s *HealthCheckServer) SetStatusDispatcher(dispatcher dispatcher.Dispatcher) {
+	s.statusDispatcher = dispatcher
+}
+
+func (s *HealthCheckServer) Start(ctx context.Context) {
+	klog.Infof("Starting HealthCheck server")
+
+	// start a goroutine to periodically update heartbeat for the current maestro instance
+	go wait.UntilWithContext(ctx, s.pulse, time.Duration(s.heartbeatInterval*int(time.Second)))
+
+	// start a goroutine to periodically check the liveness of maestro instances
+	go wait.UntilWithContext(ctx, s.checkInstances, time.Duration(s.heartbeatInterval/3*int(time.Second)))
+
 	var err error
 	if env().Config.HealthCheck.EnableHTTPS {
 		if env().Config.HTTPServer.HTTPSCertFile == "" || env().Config.HTTPServer.HTTPSKeyFile == "" {
@@ -59,58 +80,137 @@ func (s healthCheckServer) Start() {
 	}
 	check(err, "HealthCheck server terminated with errors")
 	klog.Infof("HealthCheck server terminated")
-}
 
-func (s healthCheckServer) Stop() error {
-	return s.httpServer.Shutdown(context.Background())
-}
-
-// Unimplemented
-func (s healthCheckServer) Listen() (listener net.Listener, err error) {
-	return nil, nil
-}
+	// wait until context is done
+	<-ctx.Done()
 
-// Unimplemented
-func (s healthCheckServer) Serve(listener net.Listener) {
+	klog.Infof("Shutting down HealthCheck server")
+	s.httpServer.Shutdown(context.Background())
 }
 
-// healthCheckHandler returns a 200 OK if the instance is ready, 503 Service Unavailable otherwise.
-func (s healthCheckServer) healthCheckHandler(w http.ResponseWriter, r *http.Request) {
-	// For MQTT, check if the instance is ready
-	if s.brokerType == "mqtt" {
-		instance, err := s.instanceDao.Get(r.Context(), s.instanceID)
-		if err != nil {
-			klog.Errorf("Error getting instance: %v", err)
-			w.WriteHeader(http.StatusInternalServerError)
-			_, err := w.Write([]byte(`{"status": "error"}`))
+func (s *HealthCheckServer) pulse(ctx context.Context) {
+	klog.V(10).Infof("Updating heartbeat for maestro instance: %s", s.instanceID)
+	// If there are multiple requests at the same time, it will cause the race conditions among these
+	// requests (read–modify–write), the advisory lock is used here to prevent the race conditions.
+	lockOwnerID, err := s.lockFactory.NewAdvisoryLock(ctx, s.instanceID, db.Instances)
+	// Ensure that the transaction related to this lock always end.
+	defer s.lockFactory.Unlock(ctx, lockOwnerID)
+	if err != nil {
+		klog.Errorf("Error obtaining the instance (%s) lock: %v", s.instanceID, err)
+		return
+	}
+	found, err := s.instanceDao.Get(ctx, s.instanceID)
+	if err != nil {
+		if e.Is(err, gorm.ErrRecordNotFound) {
+			// create a new instance if not found
+			klog.V(10).Infof("Creating new maestro instance: %s", s.instanceID)
+			instance := &api.ServerInstance{
+				Meta: api.Meta{
+					ID: s.instanceID,
+				},
+				LastHeartbeat: time.Now(),
+			}
+			_, err := s.instanceDao.Create(ctx, instance)
 			if err != nil {
-				klog.Errorf("Error writing healthcheck response: %v", err)
+				klog.Errorf("Unable to create maestro instance: %s", err.Error())
 			}
 			return
 		}
-		if instance.Ready {
-			klog.Infof("Instance is ready")
-			w.WriteHeader(http.StatusOK)
-			_, err := w.Write([]byte(`{"status": "ok"}`))
-			if err != nil {
-				klog.Errorf("Error writing healthcheck response: %v", err)
+		klog.Errorf("Unable to get maestro instance: %s", err.Error())
+	}
+	found.LastHeartbeat = time.Now()
+	_, err = s.instanceDao.Replace(ctx, found)
+	if err != nil {
+		klog.Errorf("Unable to update heartbeat for maestro instance: %s", err.Error())
+	}
+}
+
+func (s *HealthCheckServer) checkInstances(ctx context.Context) {
+	klog.V(10).Infof("Checking liveness of maestro instances")
+	// lock the Instance with a fail-fast advisory lock context.
+	// this allows concurrent processing of many instances by one or more maestro instances exclusively.
+	lockOwnerID, acquired, err := s.lockFactory.NewNonBlockingLock(ctx, "maestro-instances-liveness-check", db.Instances)
+	// Ensure that the transaction related to this lock always end.
+	defer s.lockFactory.Unlock(ctx, lockOwnerID)
+	if err != nil {
+		klog.Errorf("Error obtaining the instance lock: %v", err)
+		return
+	}
+	// skip if the lock is not acquired
+	if !acquired {
+		klog.V(10).Infof("failed to acquire the lock as another maestro instance is checking instances, skip")
+		return
+	}
+
+	instances, err := s.instanceDao.All(ctx)
+	if err != nil {
+		klog.Errorf("Unable to get all maestro instances: %s", err.Error())
+		return
+	}
+
+	activeInstanceIDs := []string{}
+	inactiveInstanceIDs := []string{}
+	for _, instance := range instances {
+		// Instances pulsing within the last three check intervals are considered as active.
+		if instance.LastHeartbeat.After(time.Now().Add(time.Duration(int(-3*time.Second) * s.heartbeatInterval))) {
+			if s.brokerType == "mqtt" {
+				if err := s.statusDispatcher.OnInstanceUp(instance.ID); err != nil {
+					klog.Errorf("Error to call OnInstanceUp handler for maestro instance %s: %s", instance.ID, err.Error())
+				}
 			}
-			return
+			// mark the instance as active after it is added to the status dispatcher
+			activeInstanceIDs = append(activeInstanceIDs, instance.ID)
+		} else {
+			if s.brokerType == "mqtt" {
+				if err := s.statusDispatcher.OnInstanceDown(instance.ID); err != nil {
+					klog.Errorf("Error to call OnInstanceDown handler for maestro instance %s: %s", instance.ID, err.Error())
+				}
+			}
+			// mark the instance as inactive after it is removed from the status dispatcher
+			inactiveInstanceIDs = append(inactiveInstanceIDs, instance.ID)
 		}
+	}
 
-		klog.Infof("Instance not ready")
-		w.WriteHeader(http.StatusServiceUnavailable)
-		_, err = w.Write([]byte(`{"status": "not ready"}`))
+	if len(activeInstanceIDs) > 0 {
+		// batch mark active instances
+		if err := s.instanceDao.MarkReadyByIDs(ctx, activeInstanceIDs); err != nil {
+			klog.Errorf("Unable to mark active maestro instances (%s): %s", activeInstanceIDs, err.Error())
+		}
+	}
+
+	if len(inactiveInstanceIDs) > 0 {
+		// batch mark inactive instances
+		if err := s.instanceDao.MarkUnreadyByIDs(ctx, inactiveInstanceIDs); err != nil {
+			klog.Errorf("Unable to mark inactive maestro instances (%s): %s", inactiveInstanceIDs, err.Error())
+		}
+	}
+}
+
+// healthCheckHandler returns a 200 OK if the instance is ready, 503 Service Unavailable otherwise.
+func (s *HealthCheckServer) healthCheckHandler(w http.ResponseWriter, r *http.Request) {
+	instance, err := s.instanceDao.Get(r.Context(), s.instanceID)
+	if err != nil {
+		klog.Errorf("Error getting instance: %v", err)
+		w.WriteHeader(http.StatusInternalServerError)
+		_, err := w.Write([]byte(`{"status": "error"}`))
+		if err != nil {
+			klog.Errorf("Error writing healthcheck response: %v", err)
+		}
+		return
+	}
+	if instance.Ready {
+		klog.Infof("Instance is ready")
+		w.WriteHeader(http.StatusOK)
+		_, err := w.Write([]byte(`{"status": "ok"}`))
 		if err != nil {
 			klog.Errorf("Error writing healthcheck response: %v", err)
 		}
 		return
 	}
 
-	// For gRPC broker, return 200 OK for now
-	klog.Infof("Instance is ready")
-	w.WriteHeader(http.StatusOK)
-	_, err := w.Write([]byte(`{"status": "ok"}`))
+	klog.Infof("Instance not ready")
+	w.WriteHeader(http.StatusServiceUnavailable)
+	_, err = w.Write([]byte(`{"status": "not ready"}`))
 	if err != nil {
 		klog.Errorf("Error writing healthcheck response: %v", err)
 	}
diff --git a/pkg/api/server_instance.go b/pkg/api/server_instance.go
index d737e61d..dd532306 100644
--- a/pkg/api/server_instance.go
+++ b/pkg/api/server_instance.go
@@ -8,8 +8,8 @@ import "time"
 // However, it is not meant for direct exposure to end users through the API.
 type ServerInstance struct {
 	Meta
-	LastPulse time.Time // LastPulse indicates the last time the instance pulsed.
-	Ready     bool      // Ready indicates whether the instance is ready to serve requests.
+	LastHeartbeat time.Time // LastHeartbeat indicates the last time the instance sent a heartbeat.
+	Ready         bool      // Ready indicates whether the instance is ready to serve requests.
 }
 
 type ServerInstanceList []*ServerInstance
diff --git a/pkg/config/config.go b/pkg/config/config.go
index fbcbd052..f9445036 100755
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -17,7 +17,7 @@ type ApplicationConfig struct {
 	GRPCServer    *GRPCServerConfig    `json:"grpc_server"`
 	Metrics       *MetricsConfig       `json:"metrics"`
 	HealthCheck   *HealthCheckConfig   `json:"health_check"`
-	PulseServer   *PulseServerConfig   `json:"pulse_server"`
+	EventServer   *EventServerConfig   `json:"event_server"`
 	Database      *DatabaseConfig      `json:"database"`
 	MessageBroker *MessageBrokerConfig `json:"message_broker"`
 	OCM           *OCMConfig           `json:"ocm"`
@@ -30,7 +30,7 @@ func NewApplicationConfig() *ApplicationConfig {
 		GRPCServer:    NewGRPCServerConfig(),
 		Metrics:       NewMetricsConfig(),
 		HealthCheck:   NewHealthCheckConfig(),
-		PulseServer:   NewPulseServerConfig(),
+		EventServer:   NewEventServerConfig(),
 		Database:      NewDatabaseConfig(),
 		MessageBroker: NewMessageBrokerConfig(),
 		OCM:           NewOCMConfig(),
@@ -44,7 +44,7 @@ func (c *ApplicationConfig) AddFlags(flagset *pflag.FlagSet) {
 	c.GRPCServer.AddFlags(flagset)
 	c.Metrics.AddFlags(flagset)
 	c.HealthCheck.AddFlags(flagset)
-	c.PulseServer.AddFlags(flagset)
+	c.EventServer.AddFlags(flagset)
 	c.Database.AddFlags(flagset)
 	c.MessageBroker.AddFlags(flagset)
 	c.OCM.AddFlags(flagset)
@@ -61,7 +61,7 @@ func (c *ApplicationConfig) ReadFiles() []string {
 		{c.OCM.ReadFiles, "OCM"},
 		{c.Metrics.ReadFiles, "Metrics"},
 		{c.HealthCheck.ReadFiles, "HealthCheck"},
-		{c.PulseServer.ReadFiles, "PulseServer"},
+		{c.EventServer.ReadFiles, "EventServer"},
 		{c.Sentry.ReadFiles, "Sentry"},
 	}
 	messages := []string{}
diff --git a/pkg/config/pulse_server.go b/pkg/config/event_server.go
similarity index 83%
rename from pkg/config/pulse_server.go
rename to pkg/config/event_server.go
index 3b29b80f..03abb2ff 100644
--- a/pkg/config/pulse_server.go
+++ b/pkg/config/event_server.go
@@ -11,9 +11,8 @@ const (
 	BroadcastSubscriptionType SubscriptionType = "broadcast"
 )
 
-// PulseServerConfig contains the configuration for the maestro pulse server.
-type PulseServerConfig struct {
-	PulseInterval        int64                 `json:"pulse_interval"`
+// EventServerConfig contains the configuration for the maestro pulse server.
+type EventServerConfig struct {
 	SubscriptionType     string                `json:"subscription_type"`
 	ConsistentHashConfig *ConsistentHashConfig `json:"consistent_hash_config"`
 }
@@ -25,10 +24,9 @@ type ConsistentHashConfig struct {
 	Load              float64 `json:"load"`
 }
 
-// NewPulseServerConfig creates a new PulseServerConfig with default 15 second pulse interval.
-func NewPulseServerConfig() *PulseServerConfig {
-	return &PulseServerConfig{
-		PulseInterval:        15,
+// NewEventServerConfig creates a new EventServerConfig with default settings.
+func NewEventServerConfig() *EventServerConfig {
+	return &EventServerConfig{
 		SubscriptionType:     "shared",
 		ConsistentHashConfig: NewConsistentHashConfig(),
 	}
@@ -46,20 +44,19 @@ func NewConsistentHashConfig() *ConsistentHashConfig {
 	}
 }
 
-// AddFlags configures the PulseServerConfig with command line flags.
+// AddFlags configures the EventServerConfig with command line flags.
 // It allows users to customize the interval for maestro instance pulses and subscription type.
 //   - "pulse-interval" sets the time between maestro instance pulses (in seconds) to indicate its liveness (default: 15 seconds).
 //   - "subscription-type" specifies the subscription type for resource status updates from message broker, either "shared" or "broadcast".
 //     "shared" subscription type uses MQTT feature to ensure only one Maestro instance receives resource status messages.
 //     "broadcast" subscription type will make all Maestro instances to receive resource status messages and hash the message to determine which instance should process it.
 //     If subscription type is "broadcast", ConsistentHashConfig settings can be configured for the hashing algorithm.
-func (c *PulseServerConfig) AddFlags(fs *pflag.FlagSet) {
-	fs.Int64Var(&c.PulseInterval, "pulse-interval", c.PulseInterval, "Sets the pulse interval for maestro instances (seconds) to indicate liveness")
+func (c *EventServerConfig) AddFlags(fs *pflag.FlagSet) {
 	fs.StringVar(&c.SubscriptionType, "subscription-type", c.SubscriptionType, "Sets the subscription type for resource status updates from message broker, Options: \"shared\" (only one instance receives resource status message, MQTT feature ensures exclusivity) or \"broadcast\" (all instances receive messages, hashed to determine processing instance)")
 	c.ConsistentHashConfig.AddFlags(fs)
 }
 
-func (c *PulseServerConfig) ReadFiles() error {
+func (c *EventServerConfig) ReadFiles() error {
 	c.ConsistentHashConfig.ReadFiles()
 	return nil
 }
diff --git a/pkg/config/pulse_server_test.go b/pkg/config/event_server_test.go
similarity index 82%
rename from pkg/config/pulse_server_test.go
rename to pkg/config/event_server_test.go
index 803c5dc1..44fd2a7a 100644
--- a/pkg/config/pulse_server_test.go
+++ b/pkg/config/event_server_test.go
@@ -7,17 +7,16 @@ import (
 	"github.com/spf13/pflag"
 )
 
-func TestPulseServerConfig(t *testing.T) {
+func TestEventServerConfig(t *testing.T) {
 	cases := []struct {
 		name  string
 		input map[string]string
-		want  *PulseServerConfig
+		want  *EventServerConfig
 	}{
 		{
 			name:  "default subscription type",
 			input: map[string]string{},
-			want: &PulseServerConfig{
-				PulseInterval:    15,
+			want: &EventServerConfig{
 				SubscriptionType: "shared",
 				ConsistentHashConfig: &ConsistentHashConfig{
 					PartitionCount:    7,
@@ -31,8 +30,7 @@ func TestPulseServerConfig(t *testing.T) {
 			input: map[string]string{
 				"subscription-type": "broadcast",
 			},
-			want: &PulseServerConfig{
-				PulseInterval:    15,
+			want: &EventServerConfig{
 				SubscriptionType: "broadcast",
 				ConsistentHashConfig: &ConsistentHashConfig{
 					PartitionCount:    7,
@@ -49,8 +47,7 @@ func TestPulseServerConfig(t *testing.T) {
 				"consistent-hash-replication-factor": "30",
 				"consistent-hash-load":               "1.5",
 			},
-			want: &PulseServerConfig{
-				PulseInterval:    15,
+			want: &EventServerConfig{
 				SubscriptionType: "broadcast",
 				ConsistentHashConfig: &ConsistentHashConfig{
 					PartitionCount:    10,
@@ -61,7 +58,7 @@ func TestPulseServerConfig(t *testing.T) {
 		},
 	}
 
-	config := NewPulseServerConfig()
+	config := NewEventServerConfig()
 	pflag.NewFlagSet("test", pflag.ContinueOnError)
 	fs := pflag.CommandLine
 	config.AddFlags(fs)
@@ -72,7 +69,7 @@ func TestPulseServerConfig(t *testing.T) {
 				fs.Set(key, value)
 			}
 			if !reflect.DeepEqual(config, tc.want) {
-				t.Errorf("NewPulseServerConfig() = %v; want %v", config, tc.want)
+				t.Errorf("NewEventServerConfig() = %v; want %v", config, tc.want)
 			}
 			// clear flags
 			fs.VisitAll(func(f *pflag.Flag) {
diff --git a/pkg/config/health_check.go b/pkg/config/health_check.go
index f811e597..122f4b6e 100755
--- a/pkg/config/health_check.go
+++ b/pkg/config/health_check.go
@@ -5,20 +5,23 @@ import (
 )
 
 type HealthCheckConfig struct {
-	BindPort    string `json:"bind_port"`
-	EnableHTTPS bool   `json:"enable_https"`
+	BindPort           string `json:"bind_port"`
+	EnableHTTPS        bool   `json:"enable_https"`
+	HeartbeartInterval int    `json:"heartbeat_interval"`
 }
 
 func NewHealthCheckConfig() *HealthCheckConfig {
 	return &HealthCheckConfig{
-		BindPort:    "8083",
-		EnableHTTPS: false,
+		BindPort:           "8083",
+		EnableHTTPS:        false,
+		HeartbeartInterval: 15,
 	}
 }
 
 func (c *HealthCheckConfig) AddFlags(fs *pflag.FlagSet) {
 	fs.StringVar(&c.BindPort, "health-check-server-bindport", c.BindPort, "Health check server bind port")
 	fs.BoolVar(&c.EnableHTTPS, "enable-health-check-https", c.EnableHTTPS, "Enable HTTPS for health check server")
+	fs.IntVar(&c.HeartbeartInterval, "heartbeat-interval", c.HeartbeartInterval, "Heartbeat interval for health check server")
 }
 
 func (c *HealthCheckConfig) ReadFiles() error {
diff --git a/pkg/dao/instance.go b/pkg/dao/instance.go
index e51ee1bb..21e732ca 100644
--- a/pkg/dao/instance.go
+++ b/pkg/dao/instance.go
@@ -14,8 +14,8 @@ type InstanceDao interface {
 	Get(ctx context.Context, id string) (*api.ServerInstance, error)
 	Create(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
 	Replace(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
-	UpSert(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error)
 	MarkReadyByIDs(ctx context.Context, ids []string) error
+	MarkUnreadyByIDs(ctx context.Context, ids []string) error
 	Delete(ctx context.Context, id string) error
 	DeleteByIDs(ctx context.Context, ids []string) error
 	FindByIDs(ctx context.Context, ids []string) (api.ServerInstanceList, error)
@@ -60,18 +60,18 @@ func (d *sqlInstanceDao) Replace(ctx context.Context, instance *api.ServerInstan
 	return instance, nil
 }
 
-func (d *sqlInstanceDao) UpSert(ctx context.Context, instance *api.ServerInstance) (*api.ServerInstance, error) {
+func (d *sqlInstanceDao) MarkReadyByIDs(ctx context.Context, ids []string) error {
 	g2 := (*d.sessionFactory).New(ctx)
-	if err := g2.Unscoped().Omit(clause.Associations).Save(instance).Error; err != nil {
+	if err := g2.Model(&api.ServerInstance{}).Where("id in (?)", ids).Update("ready", true).Error; err != nil {
 		db.MarkForRollback(ctx, err)
-		return nil, err
+		return err
 	}
-	return instance, nil
+	return nil
 }
 
-func (d *sqlInstanceDao) MarkReadyByIDs(ctx context.Context, ids []string) error {
+func (d *sqlInstanceDao) MarkUnreadyByIDs(ctx context.Context, ids []string) error {
 	g2 := (*d.sessionFactory).New(ctx)
-	if err := g2.Model(&api.ServerInstance{}).Where("id in (?)", ids).Update("ready", true).Error; err != nil {
+	if err := g2.Model(&api.ServerInstance{}).Where("id in (?)", ids).Update("ready", false).Error; err != nil {
 		db.MarkForRollback(ctx, err)
 		return err
 	}
diff --git a/pkg/dao/mocks/instance.go b/pkg/dao/mocks/instance.go
index 9723f9ad..b872b337 100644
--- a/pkg/dao/mocks/instance.go
+++ b/pkg/dao/mocks/instance.go
@@ -78,6 +78,17 @@ func (d *instanceDaoMock) MarkReadyByIDs(ctx context.Context, ids []string) erro
 	return nil
 }
 
+func (d *instanceDaoMock) MarkUnreadyByIDs(ctx context.Context, ids []string) error {
+	d.mux.Lock()
+	defer d.mux.Unlock()
+	for _, instance := range d.instances {
+		if contains(ids, instance.ID) {
+			instance.Ready = false
+		}
+	}
+	return nil
+}
+
 func (d *instanceDaoMock) Delete(ctx context.Context, ID string) error {
 	d.mux.Lock()
 	defer d.mux.Unlock()
diff --git a/pkg/db/migrations/202401151014_add_server_instances.go b/pkg/db/migrations/202401151014_add_server_instances.go
index 3de9fbc8..2a733721 100644
--- a/pkg/db/migrations/202401151014_add_server_instances.go
+++ b/pkg/db/migrations/202401151014_add_server_instances.go
@@ -11,8 +11,8 @@ import (
 func addServerInstances() *gormigrate.Migration {
 	type ServerInstance struct {
 		Model
-		LastPulse time.Time
-		Ready     bool `gorm:"default:false"`
+		LastHeartbeat time.Time
+		Ready         bool `gorm:"default:false"`
 	}
 
 	return &gormigrate.Migration{
diff --git a/test/helper.go b/test/helper.go
index bf8b22df..9d4c353b 100755
--- a/test/helper.go
+++ b/test/helper.go
@@ -13,6 +13,8 @@ import (
 	"time"
 
 	"github.com/openshift-online/maestro/pkg/controllers"
+	"github.com/openshift-online/maestro/pkg/dao"
+	"github.com/openshift-online/maestro/pkg/dispatcher"
 	"github.com/openshift-online/maestro/pkg/event"
 	"github.com/openshift-online/maestro/pkg/logger"
 	"k8s.io/klog/v2"
@@ -70,13 +72,14 @@ type Helper struct {
 	ContextCancelFunc context.CancelFunc
 
 	EventBroadcaster  *event.EventBroadcaster
+	StatusDispatcher  dispatcher.Dispatcher
 	Store             *MemoryStore
 	GRPCSourceClient  *generic.CloudEventSourceClient[*api.Resource]
 	DBFactory         db.SessionFactory
 	AppConfig         *config.ApplicationConfig
 	APIServer         server.Server
 	MetricsServer     server.Server
-	HealthCheckServer server.Server
+	HealthCheckServer *server.HealthCheckServer
 	EventServer       server.EventServer
 	ControllerManager *server.ControllersServer
 	WorkAgentHolder   *work.ClientHolder
@@ -95,7 +98,7 @@ func NewHelper(t *testing.T) *Helper {
 			fmt.Println("Unable to read JWT keys - this may affect tests that make authenticated server requests")
 		}
 
-		env := environments.Environment()
+		env := helper.Env()
 		// Manually set environment name, ignoring environment variables
 		env.Name = environments.TestingEnv
 		err = env.AddFlags(pflag.CommandLine)
@@ -118,10 +121,12 @@ func NewHelper(t *testing.T) *Helper {
 			Ctx:               ctx,
 			ContextCancelFunc: cancel,
 			EventBroadcaster:  event.NewEventBroadcaster(),
-			AppConfig:         env.Config,
-			DBFactory:         env.Database.SessionFactory,
-			JWTPrivateKey:     jwtKey,
-			JWTCA:             jwtCA,
+			StatusDispatcher: dispatcher.NewHashDispatcher(helper.Env().Config.MessageBroker.ClientID, dao.NewInstanceDao(&helper.Env().Database.SessionFactory),
+				dao.NewConsumerDao(&helper.Env().Database.SessionFactory), helper.Env().Clients.CloudEventsSource, helper.Env().Config.EventServer.ConsistentHashConfig),
+			AppConfig:     env.Config,
+			DBFactory:     env.Database.SessionFactory,
+			JWTPrivateKey: jwtKey,
+			JWTCA:         jwtCA,
 		}
 
 		// TODO jwk mock server needs to be refactored out of the helper and into the testing environment
@@ -130,14 +135,13 @@ func NewHelper(t *testing.T) *Helper {
 			helper.sendShutdownSignal,
 			helper.stopAPIServer,
 			helper.stopMetricsServer,
-			helper.stopHealthCheckServer,
 			jwkMockTeardown,
 		}
 
 		helper.startEventBroadcaster()
 		helper.startAPIServer()
 		helper.startMetricsServer()
-		helper.startHealthCheckServer()
+		helper.startHealthCheckServer(helper.Ctx)
 		helper.startEventServer(helper.Ctx)
 	})
 	helper.T = t
@@ -192,31 +196,25 @@ func (helper *Helper) stopMetricsServer() error {
 	return nil
 }
 
-func (helper *Helper) startHealthCheckServer() {
+func (helper *Helper) startHealthCheckServer(ctx context.Context) {
+	helper.Env().Config.HealthCheck.HeartbeartInterval = 1
 	helper.HealthCheckServer = server.NewHealthCheckServer()
+	helper.HealthCheckServer.SetStatusDispatcher(helper.StatusDispatcher)
 	go func() {
 		klog.V(10).Info("Test health check server started")
-		helper.HealthCheckServer.Start()
+		helper.HealthCheckServer.Start(ctx)
 		klog.V(10).Info("Test health check server stopped")
 	}()
 }
 
-func (helper *Helper) stopHealthCheckServer() error {
-	if err := helper.HealthCheckServer.Stop(); err != nil {
-		return fmt.Errorf("unable to stop health check server: %s", err.Error())
-	}
-	return nil
-}
-
 func (helper *Helper) sendShutdownSignal() error {
 	helper.ContextCancelFunc()
 	return nil
 }
 
 func (helper *Helper) startEventServer(ctx context.Context) {
-	helper.Env().Config.PulseServer.PulseInterval = 1
-	helper.Env().Config.PulseServer.SubscriptionType = "broadcast"
-	helper.EventServer = server.NewPulseServer(helper.EventBroadcaster)
+	// helper.Env().Config.EventServer.SubscriptionType = "broadcast"
+	helper.EventServer = server.NewMQTTEventServer(helper.EventBroadcaster, helper.StatusDispatcher)
 	go func() {
 		klog.V(10).Info("Test event server started")
 		helper.EventServer.Start(ctx)
@@ -339,7 +337,7 @@ func (helper *Helper) RestartMetricsServer() {
 
 func (helper *Helper) Reset() {
 	klog.Infof("Reseting testing environment")
-	env := environments.Environment()
+	env := helper.Env()
 	// Reset the configuration
 	env.Config = config.NewApplicationConfig()
 
diff --git a/test/integration/pulse_server_test.go b/test/integration/status_hash_test.go
similarity index 86%
rename from test/integration/pulse_server_test.go
rename to test/integration/status_hash_test.go
index 9556f2bd..6873b5df 100644
--- a/test/integration/pulse_server_test.go
+++ b/test/integration/status_hash_test.go
@@ -16,7 +16,7 @@ import (
 	"github.com/openshift-online/maestro/test"
 )
 
-func TestPulseServer(t *testing.T) {
+func TestEventServer(t *testing.T) {
 	h, _ := test.RegisterIntegration(t)
 	ctx, cancel := context.WithCancel(context.Background())
 	defer func() {
@@ -25,10 +25,11 @@ func TestPulseServer(t *testing.T) {
 
 	instanceDao := dao.NewInstanceDao(&h.Env().Database.SessionFactory)
 	// insert one existing instances
-	_, err := instanceDao.UpSert(ctx, &api.ServerInstance{
+	_, err := instanceDao.Create(ctx, &api.ServerInstance{
 		Meta: api.Meta{
 			ID: "instance1",
 		},
+		LastHeartbeat: time.Now(),
 	})
 	Expect(err).NotTo(HaveOccurred())
 
@@ -39,13 +40,23 @@ func TestPulseServer(t *testing.T) {
 			return err
 		}
 
-		if len(instances) != 1 {
+		if len(instances) != 2 {
 			return fmt.Errorf("expected 1 instance, got %d", len(instances))
 		}
 
-		instance := instances[0]
-		if instance.UpdatedAt.IsZero() {
-			return fmt.Errorf("expected instance.UpdatedAt to be non-zero")
+		var instance *api.ServerInstance
+		for _, i := range instances {
+			if i.ID == *instanceID {
+				instance = i
+			}
+		}
+
+		if instance.LastHeartbeat.IsZero() {
+			return fmt.Errorf("expected instance.LastHeartbeat to be non-zero")
+		}
+
+		if !instance.Ready {
+			return fmt.Errorf("expected instance.Ready to be true")
 		}
 
 		if instance.ID != *instanceID {
@@ -65,10 +76,12 @@ func TestPulseServer(t *testing.T) {
 	// to make sure the consumer is hashed to the new instance firstly.
 	// after the new instance is stale after 3*pulseInterval (3s), the current
 	// instance will take over the consumer and resync the resource status.
-	_, err = instanceDao.UpSert(ctx, &api.ServerInstance{
+	_, err = instanceDao.Create(ctx, &api.ServerInstance{
 		Meta: api.Meta{
 			ID: clusterName,
 		},
+		LastHeartbeat: time.Now(),
+		Ready:         true,
 	})
 	Expect(err).NotTo(HaveOccurred())
 

From 59981a32447be51006db3f1bb233217d0f2b3490 Mon Sep 17 00:00:00 2001
From: morvencao <lcao@redhat.com>
Date: Mon, 16 Dec 2024 02:52:19 +0000
Subject: [PATCH 3/3] rename mqtt event server to also cover kafka.

Signed-off-by: morvencao <lcao@redhat.com>
---
 cmd/maestro/servecmd/cmd.go          | 17 ++++++-------
 cmd/maestro/server/event_server.go   | 37 ++++++++++++++--------------
 cmd/maestro/server/grpc_broker.go    |  2 +-
 pkg/config/event_server.go           |  5 ++--
 test/helper.go                       |  2 +-
 test/integration/status_hash_test.go |  4 +--
 6 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/cmd/maestro/servecmd/cmd.go b/cmd/maestro/servecmd/cmd.go
index 876da6e6..1b3611df 100755
--- a/cmd/maestro/servecmd/cmd.go
+++ b/cmd/maestro/servecmd/cmd.go
@@ -45,11 +45,13 @@ func runServer(cmd *cobra.Command, args []string) {
 
 	// Create the event server based on the message broker type:
 	// For gRPC, create a gRPC broker to handle resource spec and status events.
-	// For MQTT, create a Pulse server to handle resource spec and status events.
+	// For MQTT/Kafka, create a message queue based event server to handle resource spec and status events.
 	var eventServer server.EventServer
-	switch environments.Environment().Config.MessageBroker.MessageBrokerType {
-	case "mqtt":
-		klog.Info("Setting up pulse server")
+	if environments.Environment().Config.MessageBroker.MessageBrokerType == "grpc" {
+		klog.Info("Setting up grpc broker")
+		eventServer = server.NewGRPCBroker(eventBroadcaster)
+	} else {
+		klog.Info("Setting up message queue event server")
 		var statusDispatcher dispatcher.Dispatcher
 		subscriptionType := environments.Environment().Config.EventServer.SubscriptionType
 		switch config.SubscriptionType(subscriptionType) {
@@ -64,12 +66,7 @@ func runServer(cmd *cobra.Command, args []string) {
 
 		// Set the status dispatcher for the healthcheck server
 		healthcheckServer.SetStatusDispatcher(statusDispatcher)
-		eventServer = server.NewMQTTEventServer(eventBroadcaster, statusDispatcher)
-	case "grpc":
-		klog.Info("Setting up grpc broker")
-		eventServer = server.NewGRPCBroker(eventBroadcaster)
-	default:
-		klog.Errorf("Unsupported message broker type: %s", environments.Environment().Config.MessageBroker.MessageBrokerType)
+		eventServer = server.NewMessageQueueEventServer(eventBroadcaster, statusDispatcher)
 	}
 
 	// Create the servers
diff --git a/cmd/maestro/server/event_server.go b/cmd/maestro/server/event_server.go
index 77ba990b..cae4207c 100644
--- a/cmd/maestro/server/event_server.go
+++ b/cmd/maestro/server/event_server.go
@@ -41,13 +41,13 @@ type EventServer interface {
 	OnStatusUpdate(ctx context.Context, eventID, resourceID string) error
 }
 
-var _ EventServer = &MQTTEventServer{}
+var _ EventServer = &MessageQueueEventServer{}
 
-// MQTTEventServer represents a server responsible for publish resource spec events from
-// resource controller and handle resource status update events from the maestro agent.
-// It also periodic heartbeat updates and checking the liveness of Maestro instances,
-// triggering status resync based on instances' status and other conditions.
-type MQTTEventServer struct {
+// MessageQueueEventServer represents a event server responsible for publish resource spec events
+// from resource controller and handle resource status update events from the message queue.
+// It also maintains a status dispatcher to dispatch status update events to the corresponding
+// maestro instances.
+type MessageQueueEventServer struct {
 	instanceID         string
 	eventInstanceDao   dao.EventInstanceDao
 	lockFactory        db.LockFactory
@@ -58,9 +58,9 @@ type MQTTEventServer struct {
 	statusDispatcher   dispatcher.Dispatcher
 }
 
-func NewMQTTEventServer(eventBroadcaster *event.EventBroadcaster, statusDispatcher dispatcher.Dispatcher) EventServer {
+func NewMessageQueueEventServer(eventBroadcaster *event.EventBroadcaster, statusDispatcher dispatcher.Dispatcher) EventServer {
 	sessionFactory := env().Database.SessionFactory
-	return &MQTTEventServer{
+	return &MessageQueueEventServer{
 		instanceID:         env().Config.MessageBroker.ClientID,
 		eventInstanceDao:   dao.NewEventInstanceDao(&sessionFactory),
 		lockFactory:        db.NewAdvisoryLockFactory(sessionFactory),
@@ -72,11 +72,10 @@ func NewMQTTEventServer(eventBroadcaster *event.EventBroadcaster, statusDispatch
 	}
 }
 
-// Start initializes and runs the pulse server, updating and checking Maestro instances' liveness,
-// initializes subscription to status update messages and triggers status resync based on
-// instances' status and other conditions.
-func (s *MQTTEventServer) Start(ctx context.Context) {
-	log.Infof("Starting pulse server")
+// Start initializes and runs the event server. It starts the subscription
+// to resource status update messages and the status dispatcher.
+func (s *MessageQueueEventServer) Start(ctx context.Context) {
+	log.Infof("Starting message queue event server")
 
 	// start subscribing to resource status update messages.
 	s.startSubscription(ctx)
@@ -85,12 +84,12 @@ func (s *MQTTEventServer) Start(ctx context.Context) {
 
 	// wait until context is canceled
 	<-ctx.Done()
-	log.Infof("Shutting down pulse server")
+	log.Infof("Shutting down message queue event server")
 }
 
 // startSubscription initiates the subscription to resource status update messages.
 // It runs asynchronously in the background until the provided context is canceled.
-func (s *MQTTEventServer) startSubscription(ctx context.Context) {
+func (s *MessageQueueEventServer) startSubscription(ctx context.Context) {
 	s.sourceClient.Subscribe(ctx, func(action types.ResourceAction, resource *api.Resource) error {
 		log.V(4).Infof("received action %s for resource %s", action, resource.ID)
 
@@ -115,17 +114,17 @@ func (s *MQTTEventServer) startSubscription(ctx context.Context) {
 }
 
 // OnCreate will be called on each new resource creation event inserted into db.
-func (s *MQTTEventServer) OnCreate(ctx context.Context, resourceID string) error {
+func (s *MessageQueueEventServer) OnCreate(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnCreate(ctx, resourceID)
 }
 
 // OnUpdate will be called on each new resource update event inserted into db.
-func (s *MQTTEventServer) OnUpdate(ctx context.Context, resourceID string) error {
+func (s *MessageQueueEventServer) OnUpdate(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnUpdate(ctx, resourceID)
 }
 
 // OnDelete will be called on each new resource deletion event inserted into db.
-func (s *MQTTEventServer) OnDelete(ctx context.Context, resourceID string) error {
+func (s *MessageQueueEventServer) OnDelete(ctx context.Context, resourceID string) error {
 	return s.sourceClient.OnDelete(ctx, resourceID)
 }
 
@@ -133,7 +132,7 @@ func (s *MQTTEventServer) OnDelete(ctx context.Context, resourceID string) error
 // It does two things:
 // 1. build the resource status and broadcast it to subscribers
 // 2. add the event instance record to mark the event has been processed by the current instance
-func (s *MQTTEventServer) OnStatusUpdate(ctx context.Context, eventID, resourceID string) error {
+func (s *MessageQueueEventServer) OnStatusUpdate(ctx context.Context, eventID, resourceID string) error {
 	statusEvent, sErr := s.statusEventService.Get(ctx, eventID)
 	if sErr != nil {
 		return fmt.Errorf("failed to get status event %s: %s", eventID, sErr.Error())
diff --git a/cmd/maestro/server/grpc_broker.go b/cmd/maestro/server/grpc_broker.go
index 7385025e..2dee39e4 100644
--- a/cmd/maestro/server/grpc_broker.go
+++ b/cmd/maestro/server/grpc_broker.go
@@ -429,7 +429,7 @@ func (bkr *GRPCBroker) OnDelete(ctx context.Context, id string) error {
 // It does two things:
 // 1. build the resource status and broadcast it to subscribers
 // 2. add the event instance record to mark the event has been processed by the current instance
-// TODO consider using a same way (pulse_server.OnStatusUpdate) to handle this
+// TODO consider using a same way (MessageQueueEventServer.OnStatusUpdate) to handle this
 func (bkr *GRPCBroker) OnStatusUpdate(ctx context.Context, eventID, resourceID string) error {
 	statusEvent, sErr := bkr.statusEventService.Get(ctx, eventID)
 	if sErr != nil {
diff --git a/pkg/config/event_server.go b/pkg/config/event_server.go
index 03abb2ff..87bc5f67 100644
--- a/pkg/config/event_server.go
+++ b/pkg/config/event_server.go
@@ -11,7 +11,7 @@ const (
 	BroadcastSubscriptionType SubscriptionType = "broadcast"
 )
 
-// EventServerConfig contains the configuration for the maestro pulse server.
+// EventServerConfig contains the configuration for the message queue event server.
 type EventServerConfig struct {
 	SubscriptionType     string                `json:"subscription_type"`
 	ConsistentHashConfig *ConsistentHashConfig `json:"consistent_hash_config"`
@@ -45,8 +45,7 @@ func NewConsistentHashConfig() *ConsistentHashConfig {
 }
 
 // AddFlags configures the EventServerConfig with command line flags.
-// It allows users to customize the interval for maestro instance pulses and subscription type.
-//   - "pulse-interval" sets the time between maestro instance pulses (in seconds) to indicate its liveness (default: 15 seconds).
+// It allows users to customize the subscription type and ConsistentHashConfig settings.
 //   - "subscription-type" specifies the subscription type for resource status updates from message broker, either "shared" or "broadcast".
 //     "shared" subscription type uses MQTT feature to ensure only one Maestro instance receives resource status messages.
 //     "broadcast" subscription type will make all Maestro instances to receive resource status messages and hash the message to determine which instance should process it.
diff --git a/test/helper.go b/test/helper.go
index 9d4c353b..ef374d39 100755
--- a/test/helper.go
+++ b/test/helper.go
@@ -214,7 +214,7 @@ func (helper *Helper) sendShutdownSignal() error {
 
 func (helper *Helper) startEventServer(ctx context.Context) {
 	// helper.Env().Config.EventServer.SubscriptionType = "broadcast"
-	helper.EventServer = server.NewMQTTEventServer(helper.EventBroadcaster, helper.StatusDispatcher)
+	helper.EventServer = server.NewMessageQueueEventServer(helper.EventBroadcaster, helper.StatusDispatcher)
 	go func() {
 		klog.V(10).Info("Test event server started")
 		helper.EventServer.Start(ctx)
diff --git a/test/integration/status_hash_test.go b/test/integration/status_hash_test.go
index 6873b5df..000f33aa 100644
--- a/test/integration/status_hash_test.go
+++ b/test/integration/status_hash_test.go
@@ -68,13 +68,13 @@ func TestEventServer(t *testing.T) {
 
 	// the cluster1 name cannot be changed, because consistent hash makes it allocate to different instance.
 	// the case here we want to the new consumer allocate to new instance(cluster1) which is a fake instance.
-	// after 3*pulseInterval (3s), it will relocate to maestro instance.
+	// after 3*heartbeatInterval (3s), it will relocate to maestro instance.
 	clusterName := "cluster1"
 	consumer := h.CreateConsumer(clusterName)
 
 	// insert a new instance with the same name to consumer name
 	// to make sure the consumer is hashed to the new instance firstly.
-	// after the new instance is stale after 3*pulseInterval (3s), the current
+	// after the new instance is stale after 3*heartbeatInterval (3s), the current
 	// instance will take over the consumer and resync the resource status.
 	_, err = instanceDao.Create(ctx, &api.ServerInstance{
 		Meta: api.Meta{