Skip to content

Commit

Permalink
added defaults + api health query
Browse files Browse the repository at this point in the history
  • Loading branch information
otherview committed Nov 20, 2024
1 parent 430c458 commit f1b0000
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 43 deletions.
47 changes: 29 additions & 18 deletions api/admin/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,50 @@ type Status struct {
}

type Health struct {
lock sync.RWMutex
timeBetweenBlocks time.Duration
repo *chain.Repository
p2p *comm.Communicator
lock sync.RWMutex
repo *chain.Repository
p2p *comm.Communicator
isNodeBootstrapped bool
}

const delayBuffer = 5 * time.Second
const (
defaultMaxTimeBetweenSlots = time.Duration(2*thor.BlockInterval) * time.Second
defaultMinPeerCount = 2
)

func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health {
func New(repo *chain.Repository, p2p *comm.Communicator) *Health {
return &Health{
repo: repo,
timeBetweenBlocks: timeBetweenBlocks + delayBuffer,
p2p: p2p,
repo: repo,
p2p: p2p,
}
}

// isNetworkProgressing checks if the network is producing new blocks within the allowed interval.
func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool {
return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks
func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time, maxTimeBetweenSlots time.Duration) bool {
return now.Sub(bestBlockTimestamp) <= maxTimeBetweenSlots
}

// hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval.
// Once it's marked as done, it never reverts.
func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool {
if h.isNodeBootstrapped {
return true
}

blockInterval := time.Duration(thor.BlockInterval) * time.Second
return bestBlockTimestamp.Add(blockInterval).After(now)
if bestBlockTimestamp.Add(blockInterval).After(now) {
h.isNodeBootstrapped = true
}

return h.isNodeBootstrapped
}

// isNodeConnectedP2P checks if the node is connected to peers
func (h *Health) isNodeConnectedP2P(peerCount int) bool {
return peerCount > 1
func (h *Health) isNodeConnectedP2P(peerCount int, minPeerCount int) bool {
return peerCount >= minPeerCount
}

func (h *Health) Status() (*Status, error) {
func (h *Health) Status(maxTimeBetweenSlots time.Duration, minPeerCount int) (*Status, error) {
h.lock.RLock()
defer h.lock.RUnlock()

Expand All @@ -71,17 +82,17 @@ func (h *Health) Status() (*Status, error) {
// Fetch the current connected peers
var connectedPeerCount int
if h.p2p == nil {
connectedPeerCount = 5010 // ignore peers in solo mode
connectedPeerCount = minPeerCount // ignore peers in solo mode
} else {
connectedPeerCount = h.p2p.PeerCount()
}

now := time.Now()

// Perform the checks
networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp)
networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, maxTimeBetweenSlots)
nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp)
nodeConnected := h.isNodeConnectedP2P(connectedPeerCount)
nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount)

// Calculate overall health status
healthy := networkProgressing && nodeBootstrapped && nodeConnected
Expand Down
26 changes: 24 additions & 2 deletions api/admin/health/health_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package health

import (
"net/http"
"strconv"
"time"

"github.com/gorilla/mux"
"github.com/vechain/thor/v2/api/utils"
Expand All @@ -22,8 +24,28 @@ func NewAPI(healthStatus *Health) *API {
}
}

func (h *API) handleGetHealth(w http.ResponseWriter, _ *http.Request) error {
acc, err := h.healthStatus.Status()
func (h *API) handleGetHealth(w http.ResponseWriter, r *http.Request) error {
// Parse query parameters
query := r.URL.Query()

// Default to constants if query parameters are not provided
maxTimeBetweenSlots := defaultMaxTimeBetweenSlots
minPeerCount := defaultMinPeerCount

// Override with query parameters if they exist
if queryMaxTimeBetweenSlots := query.Get("maxTimeBetweenSlots"); queryMaxTimeBetweenSlots != "" {
if parsed, err := time.ParseDuration(queryMaxTimeBetweenSlots); err == nil {
maxTimeBetweenSlots = parsed
}
}

if queryMinPeerCount := query.Get("minPeerCount"); queryMinPeerCount != "" {
if parsed, err := strconv.Atoi(queryMinPeerCount); err == nil {
minPeerCount = parsed
}
}

acc, err := h.healthStatus.Status(maxTimeBetweenSlots, minPeerCount)
if err != nil {
return err
}
Expand Down
3 changes: 1 addition & 2 deletions api/admin/health/health_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/gorilla/mux"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -39,7 +38,7 @@ func initAPIServer(t *testing.T) {

router := mux.NewRouter()
NewAPI(
New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second),
New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{}))),
).Mount(router, "/health")

ts = httptest.NewServer(router)
Expand Down
28 changes: 15 additions & 13 deletions api/admin/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@ import (
"time"

"github.com/stretchr/testify/assert"
"github.com/vechain/thor/v2/thor"
)

func TestHealth_isNetworkProgressing(t *testing.T) {
h := &Health{
timeBetweenBlocks: 10 * time.Second,
}
h := &Health{}

now := time.Now()

Expand All @@ -32,38 +29,43 @@ func TestHealth_isNetworkProgressing(t *testing.T) {
},
{
name: "Not Progressing - block outside timeBetweenBlocks",
bestBlockTimestamp: now.Add(-15 * time.Second),
bestBlockTimestamp: now.Add(-25 * time.Second),
expectedProgressing: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp)
isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp, defaultMaxTimeBetweenSlots)
assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch")
})
}
}

func TestHealth_hasNodeBootstrapped(t *testing.T) {
h := &Health{}
blockInterval := time.Duration(thor.BlockInterval) * time.Second
now := time.Now()

tests := []struct {
name string
bestBlockTimestamp time.Time
expectedBootstrap bool
}{
// keep the order as it matters for health state
{
name: "Not Bootstrapped - block timestamp outside interval",
bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1),
expectedBootstrap: false,
},
{
name: "Bootstrapped - block timestamp within interval",
bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second),
bestBlockTimestamp: now.Add(defaultMaxTimeBetweenSlots),
expectedBootstrap: true,
},
{
name: "Not Bootstrapped - block timestamp outside interval",
bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second),
expectedBootstrap: false,
name: "Bootstrapped only once",
bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1),
expectedBootstrap: true,
},
}

Expand All @@ -85,7 +87,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) {
}{
{
name: "Connected - more than one peer",
peerCount: 2,
peerCount: 3,
expectedConnected: true,
},
{
Expand All @@ -97,7 +99,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
isConnected := h.isNodeConnectedP2P(tt.peerCount)
isConnected := h.isNodeConnectedP2P(tt.peerCount, defaultMinPeerCount)
assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch")
})
}
Expand Down
3 changes: 1 addition & 2 deletions api/admin_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ func StartAdminServer(
logLevel *slog.LevelVar,
repo *chain.Repository,
p2p *comm.Communicator,
timeBetweenBlocks time.Duration,
) (string, func(), error) {
listener, err := net.Listen("tcp", addr)
if err != nil {
return "", nil, errors.Wrapf(err, "listen admin API addr [%v]", addr)
}

adminHandler := admin.New(logLevel, health.New(repo, p2p, timeBetweenBlocks))
adminHandler := admin.New(logLevel, health.New(repo, p2p))

srv := &http.Server{Handler: adminHandler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second}
var goes co.Goes
Expand Down
7 changes: 1 addition & 6 deletions cmd/thor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ func defaultAction(ctx *cli.Context) error {
logLevel,
repo,
p2pCommunicator.Communicator(),
time.Duration(thor.BlockInterval),
)
if err != nil {
return fmt.Errorf("unable to start admin server - %w", err)
Expand Down Expand Up @@ -319,10 +318,6 @@ func soloAction(ctx *cli.Context) error {

onDemandBlockProduction := ctx.Bool(onDemandFlag.Name)
blockProductionInterval := ctx.Uint64(blockInterval.Name)
if blockProductionInterval == 0 {
return errors.New("block-interval cannot be zero")
}
blockProductionHealthCheck := time.Duration(blockProductionInterval) * time.Second

// enable metrics as soon as possible
metricsURL := ""
Expand Down Expand Up @@ -382,7 +377,7 @@ func soloAction(ctx *cli.Context) error {

adminURL := ""
if ctx.Bool(enableAdminFlag.Name) {
url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil, blockProductionHealthCheck)
url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil)
if err != nil {
return fmt.Errorf("unable to start admin server - %w", err)
}
Expand Down

0 comments on commit f1b0000

Please sign in to comment.