diff --git a/api/admin/health/health.go b/api/admin/health/health.go index a8dfdfcca..6db522976 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -27,39 +27,50 @@ type Status struct { } type Health struct { - lock sync.RWMutex - timeBetweenBlocks time.Duration - repo *chain.Repository - p2p *comm.Communicator + lock sync.RWMutex + repo *chain.Repository + p2p *comm.Communicator + isNodeBootstrapped bool } -const delayBuffer = 5 * time.Second +const ( + defaultMaxTimeBetweenSlots = time.Duration(2*thor.BlockInterval) * time.Second + defaultMinPeerCount = 2 +) -func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { +func New(repo *chain.Repository, p2p *comm.Communicator) *Health { return &Health{ - repo: repo, - timeBetweenBlocks: timeBetweenBlocks + delayBuffer, - p2p: p2p, + repo: repo, + p2p: p2p, } } // isNetworkProgressing checks if the network is producing new blocks within the allowed interval. -func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool { - return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks +func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time, maxTimeBetweenSlots time.Duration) bool { + return now.Sub(bestBlockTimestamp) <= maxTimeBetweenSlots } // hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. +// Once it's marked as done, it never reverts. func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { + if h.isNodeBootstrapped { + return true + } + blockInterval := time.Duration(thor.BlockInterval) * time.Second - return bestBlockTimestamp.Add(blockInterval).After(now) + if bestBlockTimestamp.Add(blockInterval).After(now) { + h.isNodeBootstrapped = true + } + + return h.isNodeBootstrapped } // isNodeConnectedP2P checks if the node is connected to peers -func (h *Health) isNodeConnectedP2P(peerCount int) bool { - return peerCount > 1 +func (h *Health) isNodeConnectedP2P(peerCount int, minPeerCount int) bool { + return peerCount >= minPeerCount } -func (h *Health) Status() (*Status, error) { +func (h *Health) Status(maxTimeBetweenSlots time.Duration, minPeerCount int) (*Status, error) { h.lock.RLock() defer h.lock.RUnlock() @@ -71,7 +82,7 @@ func (h *Health) Status() (*Status, error) { // Fetch the current connected peers var connectedPeerCount int if h.p2p == nil { - connectedPeerCount = 5010 // ignore peers in solo mode + connectedPeerCount = minPeerCount // ignore peers in solo mode } else { connectedPeerCount = h.p2p.PeerCount() } @@ -79,9 +90,9 @@ func (h *Health) Status() (*Status, error) { now := time.Now() // Perform the checks - networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp) + networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, maxTimeBetweenSlots) nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) - nodeConnected := h.isNodeConnectedP2P(connectedPeerCount) + nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount) // Calculate overall health status healthy := networkProgressing && nodeBootstrapped && nodeConnected diff --git a/api/admin/health/health_api.go b/api/admin/health/health_api.go index d756ec6af..9363f82d0 100644 --- a/api/admin/health/health_api.go +++ b/api/admin/health/health_api.go @@ -7,6 +7,8 @@ package health import ( "net/http" + "strconv" + "time" "github.com/gorilla/mux" "github.com/vechain/thor/v2/api/utils" @@ -22,8 +24,28 @@ func NewAPI(healthStatus *Health) *API { } } -func (h *API) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { - acc, err := h.healthStatus.Status() +func (h *API) handleGetHealth(w http.ResponseWriter, r *http.Request) error { + // Parse query parameters + query := r.URL.Query() + + // Default to constants if query parameters are not provided + maxTimeBetweenSlots := defaultMaxTimeBetweenSlots + minPeerCount := defaultMinPeerCount + + // Override with query parameters if they exist + if queryMaxTimeBetweenSlots := query.Get("maxTimeBetweenSlots"); queryMaxTimeBetweenSlots != "" { + if parsed, err := time.ParseDuration(queryMaxTimeBetweenSlots); err == nil { + maxTimeBetweenSlots = parsed + } + } + + if queryMinPeerCount := query.Get("minPeerCount"); queryMinPeerCount != "" { + if parsed, err := strconv.Atoi(queryMinPeerCount); err == nil { + minPeerCount = parsed + } + } + + acc, err := h.healthStatus.Status(maxTimeBetweenSlots, minPeerCount) if err != nil { return err } diff --git a/api/admin/health/health_api_test.go b/api/admin/health/health_api_test.go index 1c4339207..e50af0398 100644 --- a/api/admin/health/health_api_test.go +++ b/api/admin/health/health_api_test.go @@ -11,7 +11,6 @@ import ( "net/http" "net/http/httptest" "testing" - "time" "github.com/gorilla/mux" "github.com/stretchr/testify/assert" @@ -39,7 +38,7 @@ func initAPIServer(t *testing.T) { router := mux.NewRouter() NewAPI( - New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second), + New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{}))), ).Mount(router, "/health") ts = httptest.NewServer(router) diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index 34fc145bf..034c4e64e 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -10,13 +10,10 @@ import ( "time" "github.com/stretchr/testify/assert" - "github.com/vechain/thor/v2/thor" ) func TestHealth_isNetworkProgressing(t *testing.T) { - h := &Health{ - timeBetweenBlocks: 10 * time.Second, - } + h := &Health{} now := time.Now() @@ -32,14 +29,14 @@ func TestHealth_isNetworkProgressing(t *testing.T) { }, { name: "Not Progressing - block outside timeBetweenBlocks", - bestBlockTimestamp: now.Add(-15 * time.Second), + bestBlockTimestamp: now.Add(-25 * time.Second), expectedProgressing: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp) + isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp, defaultMaxTimeBetweenSlots) assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") }) } @@ -47,7 +44,6 @@ func TestHealth_isNetworkProgressing(t *testing.T) { func TestHealth_hasNodeBootstrapped(t *testing.T) { h := &Health{} - blockInterval := time.Duration(thor.BlockInterval) * time.Second now := time.Now() tests := []struct { @@ -55,15 +51,21 @@ func TestHealth_hasNodeBootstrapped(t *testing.T) { bestBlockTimestamp time.Time expectedBootstrap bool }{ + // keep the order as it matters for health state + { + name: "Not Bootstrapped - block timestamp outside interval", + bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + expectedBootstrap: false, + }, { name: "Bootstrapped - block timestamp within interval", - bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second), + bestBlockTimestamp: now.Add(defaultMaxTimeBetweenSlots), expectedBootstrap: true, }, { - name: "Not Bootstrapped - block timestamp outside interval", - bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second), - expectedBootstrap: false, + name: "Bootstrapped only once", + bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + expectedBootstrap: true, }, } @@ -85,7 +87,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) { }{ { name: "Connected - more than one peer", - peerCount: 2, + peerCount: 3, expectedConnected: true, }, { @@ -97,7 +99,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - isConnected := h.isNodeConnectedP2P(tt.peerCount) + isConnected := h.isNodeConnectedP2P(tt.peerCount, defaultMinPeerCount) assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch") }) } diff --git a/api/admin_server.go b/api/admin_server.go index be8da5dfe..f2315aeb1 100644 --- a/api/admin_server.go +++ b/api/admin_server.go @@ -24,14 +24,13 @@ func StartAdminServer( logLevel *slog.LevelVar, repo *chain.Repository, p2p *comm.Communicator, - timeBetweenBlocks time.Duration, ) (string, func(), error) { listener, err := net.Listen("tcp", addr) if err != nil { return "", nil, errors.Wrapf(err, "listen admin API addr [%v]", addr) } - adminHandler := admin.New(logLevel, health.New(repo, p2p, timeBetweenBlocks)) + adminHandler := admin.New(logLevel, health.New(repo, p2p)) srv := &http.Server{Handler: adminHandler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second} var goes co.Goes diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 5837eedf6..e80d17d23 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -239,7 +239,6 @@ func defaultAction(ctx *cli.Context) error { logLevel, repo, p2pCommunicator.Communicator(), - time.Duration(thor.BlockInterval), ) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) @@ -319,10 +318,6 @@ func soloAction(ctx *cli.Context) error { onDemandBlockProduction := ctx.Bool(onDemandFlag.Name) blockProductionInterval := ctx.Uint64(blockInterval.Name) - if blockProductionInterval == 0 { - return errors.New("block-interval cannot be zero") - } - blockProductionHealthCheck := time.Duration(blockProductionInterval) * time.Second // enable metrics as soon as possible metricsURL := "" @@ -382,7 +377,7 @@ func soloAction(ctx *cli.Context) error { adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil, blockProductionHealthCheck) + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) }