Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Healthcheck query publickey #510

Merged
merged 2 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gas/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import (
)

var (
metricNodeMinPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_gas_orcale_node_min_price", Help: "Min gas price periodically queried from the node."})
metricComputedPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_gas_oracle_computed_price", Help: "Computed recommended gas price based on recent full blocks. -1 if none (no recent full blocks)."})
metricNodeMinPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_gas_orcale_node_min_price", Help: "Min gas price periodically queried from the node."})
metricComputedPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_gas_oracle_computed_price", Help: "Computed recommended gas price based on recent full blocks. -1 if none (no recent full blocks)."})
)

// Backend is the gas price oracle backend.
Expand Down
6 changes: 3 additions & 3 deletions indexer/backend_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ const periodicMetricsInterval = 60 * time.Second
var (
metricCacheHits = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_hits",
Name: "oasis_web3_gateway_cache_hits",
Help: "Number of cache hits.",
},
[]string{"cache"},
)
metricCacheMisses = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_misses",
Name: "oasis_web3_gateway_cache_misses",
Help: "Number of cache misses.",
},
[]string{"cache"},
)
metricCacheHitRatio = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_hit_ratio",
Name: "oasis_web3_gateway_cache_hit_ratio",
Help: "Percent of Hits over all accesses (Hits + Misses).",
},
[]string{"cache"},
Expand Down
6 changes: 3 additions & 3 deletions indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ const (
)

var (
metricBlockIndexed = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_block_indexed", Help: "Indexed block heights."})
metricBlockPruned = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_block_pruned", Help: "Pruned block heights."})
metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_health", Help: "1 if gateway healthcheck is reporting as healthy, 0 otherwise."})
metricBlockIndexed = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_block_indexed", Help: "Indexed block heights."})
metricBlockPruned = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_block_pruned", Help: "Pruned block heights."})
metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_indexer_health", Help: "1 if gateway indexer healthcheck is reporting as healthy, 0 otherwise."})
)

// ErrNotHealthy is the error returned if the gateway is unhealthy.
Expand Down
6 changes: 4 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,10 @@ func runRoot() error {
}
}

w3.RegisterAPIs(rpc.GetRPCAPIs(ctx, rc, archiveClient, backend, gasPriceOracle, cfg.Gateway, es))
w3.RegisterHealthChecks([]server.HealthCheck{indx})
apis, checks := rpc.GetRPCAPIs(ctx, rc, archiveClient, backend, gasPriceOracle, cfg.Gateway, es)
w3.RegisterAPIs(apis)
checks = append(checks, indx)
w3.RegisterHealthChecks(checks)

svr := server.Server{
Config: cfg,
Expand Down
43 changes: 20 additions & 23 deletions rpc/apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,74 +20,71 @@ import (
"github.com/oasisprotocol/oasis-web3-gateway/rpc/oasis"
"github.com/oasisprotocol/oasis-web3-gateway/rpc/txpool"
"github.com/oasisprotocol/oasis-web3-gateway/rpc/web3"
"github.com/oasisprotocol/oasis-web3-gateway/server"
)

// GetRPCAPIs returns the list of all APIs.
// GetRPCAPIs returns the list of enabled RPC APIs and accompanying health checks.
func GetRPCAPIs(
_ context.Context,
ctx context.Context,
client client.RuntimeClient,
archiveClient *archive.Client,
backend indexer.Backend,
gasPriceOracle gas.Backend,
config *conf.GatewayConfig,
eventSystem *eventFilters.EventSystem,
) []ethRpc.API {
) ([]ethRpc.API, []server.HealthCheck) {
var apis []ethRpc.API
var healthChecks []server.HealthCheck

// Web3 JSON-RPC Spec APIs - always enabled.
web3Service := web3.NewPublicAPI()
ethService := eth.NewPublicAPI(client, archiveClient, logging.GetLogger("eth_rpc"), config.ChainID, backend, gasPriceOracle, config.MethodLimits)
netService := net.NewPublicAPI(config.ChainID)
txpoolService := txpool.NewPublicAPI()
filtersService := filters.NewPublicAPI(client, logging.GetLogger("eth_filters"), backend, eventSystem)
oasisService := oasis.NewPublicAPI(client, logging.GetLogger("oasis"))

if config.Monitoring.Enabled() {
web3Service = web3.NewMetricsWrapper(web3Service)
netService = net.NewMetricsWrapper(netService)
ethService = ethmetrics.NewMetricsWrapper(ethService, logging.GetLogger("eth_rpc_metrics"), backend)
txpoolService = txpool.NewMetricsWrapper(txpoolService)
filtersService = filters.NewMetricsWrapper(filtersService)
oasisService = oasis.NewMetricsWrapper(oasisService)
}

apis = append(apis,
ethRpc.API{
Namespace: "web3",
Version: "1.0",
Service: web3Service,
Public: true,
},
ethRpc.API{
Namespace: "net",
Version: "1.0",
Service: netService,
Public: true,
},
ethRpc.API{
Namespace: "eth",
Version: "1.0",
Service: ethService,
Public: true,
},
ethRpc.API{
Namespace: "txpool",
Version: "1.0",
Service: txpoolService,
Public: true,
},
ethRpc.API{
Namespace: "eth",
Version: "1.0",
Service: filtersService,
Public: true,
},
ethRpc.API{
)

// Configure oasis_ APIs if enabled.
if config.ExposeOasisRPCs {
oasisService, oasisHealth := oasis.NewPublicAPI(ctx, client, logging.GetLogger("oasis"))
if config.Monitoring.Enabled() {
oasisService = oasis.NewMetricsWrapper(oasisService)
}

apis = append(apis, ethRpc.API{
Namespace: "oasis",
Version: "1.0",
Service: oasisService,
Public: config.ExposeOasisRPCs,
},
)
})
healthChecks = append(healthChecks, oasisHealth)
}

return apis
return apis, healthChecks
}
4 changes: 2 additions & 2 deletions rpc/eth/filters/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
var (
durations = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "oasis_oasis_web3_gateway_subscription_seconds",
Name: "oasis_web3_gateway_subscription_seconds",
// Buckets ranging from 1 second to 24 hours.
Buckets: []float64{1, 10, 30, 60, 600, 1800, 3600, 7200, 21600, 86400},
Help: "Histogram for the eth subscription API subscriptions duration.",
Expand All @@ -23,7 +23,7 @@ var (
)
inflightSubs = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_subscription_inflight",
Name: "oasis_web3_gateway_subscription_inflight",
Help: "Number of concurrent eth inflight subscriptions.",
},
[]string{"method_name"},
Expand Down
2 changes: 1 addition & 1 deletion rpc/eth/metrics/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

var requestHeights = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "oasis_oasis_web3_gateway_api_request_heights",
Name: "oasis_web3_gateway_api_request_heights",
Buckets: []float64{0, 1, 2, 3, 5, 10, 50, 100, 500, 1000},
Help: "Histogram of eth API request heights (difference from the latest height).",
},
Expand Down
10 changes: 5 additions & 5 deletions rpc/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ import (
)

var (
durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_oasis_web3_gateway_api_seconds", Buckets: []float64{0.00001, 0.0001, .001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the eth API requests duration."}, []string{"method_name"})
requests = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_request", Help: "Counter for API requests."}, []string{"method_name"})
failures = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_failure", Help: "Counter for API request failures."}, []string{"method_name"})
successes = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_success", Help: "Counter for API successful requests."}, []string{"method_name"})
inflight = promauto.NewGaugeVec(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_api_inflight", Help: "Number of inflight API request."}, []string{"method_name"})
durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_web3_gateway_api_seconds", Buckets: []float64{0.00001, 0.0001, .001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the eth API requests duration."}, []string{"method_name"})
requests = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_request", Help: "Counter for API requests."}, []string{"method_name"})
failures = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_failure", Help: "Counter for API request failures."}, []string{"method_name"})
successes = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_success", Help: "Counter for API successful requests."}, []string{"method_name"})
inflight = promauto.NewGaugeVec(prometheus.GaugeOpts{Name: "oasis_web3_gateway_api_inflight", Help: "Number of inflight API request."}, []string{"method_name"})
)

// GetAPIMethodMetrics returns the method metrics for the specified API call.
Expand Down
17 changes: 11 additions & 6 deletions rpc/oasis/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import (
"github.com/oasisprotocol/oasis-core/go/common/logging"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/client"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/modules/core"
"github.com/oasisprotocol/oasis-web3-gateway/server"
)

var ErrInternalError = errors.New("internal error")

// API is the net_ prefixed set of APIs in the Web3 JSON-RPC spec.
// API is the oasis_ prefixed set of APIs.
type API interface {
// CallDataPublicKey returns the calldata public key for the runtime with the provided ID.
CallDataPublicKey(ctx context.Context) (*CallDataPublicKey, error)
Expand All @@ -35,19 +36,23 @@ type CallDataPublicKey struct {

type publicAPI struct {
client client.RuntimeClient
Logger *logging.Logger
logger *logging.Logger
}

// NewPublicAPI creates an instance of the Web3 API.
// NewPublicAPI creates an instance of the Web3 API and accompanying health check.
func NewPublicAPI(
ctx context.Context,
client client.RuntimeClient,
logger *logging.Logger,
) API {
return &publicAPI{client: client, Logger: logger}
) (API, server.HealthCheck) {
health := &healthChecker{ctx: ctx, client: client, logger: logger}
go health.run()

return &publicAPI{client: client, logger: logger}, health
}

func (api *publicAPI) CallDataPublicKey(ctx context.Context) (*CallDataPublicKey, error) {
logger := api.Logger.With("method", "oasis_callDataPublicKey")
logger := api.logger.With("method", "oasis_callDataPublicKey")
res, err := core.NewV1(api.client).CallDataPublicKey(ctx)
if err != nil {
logger.Error("failed to fetch public key", "err", err)
Expand Down
68 changes: 68 additions & 0 deletions rpc/oasis/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package oasis

import (
"context"
"fmt"
"sync/atomic"
"time"

"github.com/oasisprotocol/oasis-core/go/common/logging"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/client"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/modules/core"
)

const (
healthCheckInterval = 30 * time.Second
healthIterationTimeout = 15 * time.Second
)

type healthChecker struct {
ctx context.Context
client client.RuntimeClient
logger *logging.Logger

health uint32
}

// Implements server.HealthCheck.
func (h *healthChecker) Health() error {
if atomic.LoadUint32(&h.health) == 0 {
return fmt.Errorf("oasis API not healthy")
}
return nil
}

func (h *healthChecker) updateHealth(healthy bool) {
if healthy {
atomic.StoreUint32(&h.health, 1)
} else {
atomic.StoreUint32(&h.health, 0)
}
}

func (h *healthChecker) run() {
for {
select {
case <-time.After(healthCheckInterval):
func() {
ctx, cancel := context.WithTimeout(h.ctx, healthIterationTimeout)
defer cancel()

// Query public keys.
_, err := core.NewV1(h.client).CallDataPublicKey(ctx)
if err != nil {
h.logger.Error("failed to fetch public key", "err", err)
h.updateHealth(false)
return
}

h.logger.Debug("oasis_ RPC healthy")
h.updateHealth(true)
}()
case <-h.ctx.Done():
h.updateHealth(false)
h.logger.Debug("health checker stopping", "reason", h.ctx.Err())
return
}
}
}
6 changes: 6 additions & 0 deletions server/json_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ import (

"github.com/ethereum/go-ethereum/rpc"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/rs/cors"

"github.com/oasisprotocol/oasis-core/go/common/logging"
)

var metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_health", Help: "1 if gateway healthcheck is reporting as healthy, 0 otherwise."})

// httpConfig is the JSON-RPC/HTTP configuration.
type httpConfig struct {
Modules []string
Expand Down Expand Up @@ -77,9 +81,11 @@ func healthCheckHandler(healthChecks []HealthCheck) func(w http.ResponseWriter,
for _, h := range healthChecks {
if err := h.Health(); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
metricHealthy.Set(0)
return
}
}
metricHealthy.Set(1)
w.WriteHeader(http.StatusOK)
}
}
Expand Down
2 changes: 1 addition & 1 deletion storage/psql/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
"github.com/oasisprotocol/oasis-web3-gateway/storage"
)

var durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_oasis_web3_gateway_psql_query_seconds", Buckets: []float64{0.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the postgresql query duration."}, []string{"query"})
var durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_web3_gateway_psql_query_seconds", Buckets: []float64{0.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the postgresql query duration."}, []string{"query"})

func measureDuration(label string) func() {
timer := prometheus.NewTimer(durations.WithLabelValues(label))
Expand Down
2 changes: 1 addition & 1 deletion tests/rpc/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (

func TestHealthCheck(t *testing.T) {
// Ensure the initial health-check was done.
<-time.After(20 * time.Second)
<-time.After(50 * time.Second)

ctx, cancel := context.WithTimeout(context.Background(), OasisBlockTimeout)
defer cancel()
Expand Down
6 changes: 4 additions & 2 deletions tests/rpc/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,10 @@ func Setup() error {
return fmt.Errorf("setup: failed starting gas price oracle: %w", err)
}

w3.RegisterAPIs(rpc.GetRPCAPIs(context.Background(), rc, nil, backend, gasPriceOracle, tests.TestsConfig.Gateway, es))
w3.RegisterHealthChecks([]server.HealthCheck{indx})
apis, checks := rpc.GetRPCAPIs(ctx, rc, nil, backend, gasPriceOracle, tests.TestsConfig.Gateway, es)
w3.RegisterAPIs(apis)
checks = append(checks, indx)
w3.RegisterHealthChecks(checks)

if err = w3.Start(); err != nil {
w3.Close()
Expand Down
Loading