Skip to content

Commit

Permalink
metrics, pprof: support reloading services with SIGHUP (#3016)
Browse files Browse the repository at this point in the history
  • Loading branch information
roman-khimov authored Nov 23, 2024
2 parents 2bb903c + b79f58f commit 339b4cb
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ attribute, which is used for container domain name in NNS contracts (#2954)
- New `peapod-to-fstree` tool providing peapod-to-fstree data migration (#3013)
- Reloading node attributes with SIGHUP (#3005)
- Reloading pool sizes (#3018)
- Reloading pprof/metrics services with SIGHUP (#3016)

### Fixed
- Do not search for tombstones when handling their expiration, use local indexes instead (#2929)
Expand Down
52 changes: 46 additions & 6 deletions cmd/neofs-node/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
shardconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard"
fstreeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/fstree"
loggerconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/logger"
metricsconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/metrics"
morphconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/morph"
nodeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/node"
objectconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/object"
Expand Down Expand Up @@ -73,6 +72,11 @@ const maxMsgSize = 4 << 20 // transport msg limit 4 MiB
// for each contract listener.
const notificationHandlerPoolSize = 10

const (
metricName = "prometheus"
profilerName = "pprof"
)

// applicationConfiguration reads and stores component-specific configuration
// values. It should not store any application helpers structs (pointers to shared
// structs).
Expand Down Expand Up @@ -291,7 +295,8 @@ type internals struct {
closers []func()
// services that are useful for debug (e.g. when a regular closer does not
// close), must be close at the very end of application life cycle
veryLastClosers []func()
veryLastClosersLock sync.RWMutex
veryLastClosers map[string]func()

apiVersion version.Version
healthStatus atomic.Int32
Expand Down Expand Up @@ -639,10 +644,10 @@ func initCfg(appCfg *config.Config) *cfg {

c.ownerIDFromKey = user.NewFromECDSAPublicKey(key.PrivateKey.PublicKey)

if metricsconfig.Enabled(c.cfgReader) {
c.metricsCollector = metrics.NewNodeMetrics(misc.Version)
c.basics.networkState.metrics = c.metricsCollector
}
c.metricsCollector = metrics.NewNodeMetrics(misc.Version)
c.basics.networkState.metrics = c.metricsCollector

c.veryLastClosers = make(map[string]func())

c.onShutdown(c.clientCache.CloseAll) // clean up connections
c.onShutdown(c.bgClientCache.CloseAll) // clean up connections
Expand Down Expand Up @@ -867,6 +872,9 @@ func (c *cfg) configWatcher(ctx context.Context) {
case <-ch:
c.log.Info("SIGHUP has been received, rereading configuration...")

oldMetrics := writeMetricConfig(c.cfgReader)
oldProfiler := writeProfilerConfig(c.cfgReader)

err := c.readConfig(c.cfgReader)
if err != nil {
c.log.Error("configuration reading", zap.Error(err))
Expand All @@ -877,6 +885,11 @@ func (c *cfg) configWatcher(ctx context.Context) {

c.reloadObjectPoolSizes()

// Prometheus and pprof

// nolint:contextcheck
c.reloadMetricsAndPprof(oldMetrics, oldProfiler)

// Logger

err = c.internals.logLevel.UnmarshalText([]byte(c.logger.level))
Expand Down Expand Up @@ -961,3 +974,30 @@ func writeSystemAttributes(c *cfg) error {

return nil
}

func (c *cfg) reloadMetricsAndPprof(oldMetrics metricConfig, oldProfiler profilerConfig) {
c.veryLastClosersLock.Lock()
defer c.veryLastClosersLock.Unlock()

// Metrics

if oldMetrics.isUpdated(c.cfgReader) {
if closer, ok := c.veryLastClosers[metricName]; ok {
closer()
}
delete(c.veryLastClosers, metricName)

preRunAndLog(c, metricName, initMetrics(c))
}

//Profiler

if oldProfiler.isUpdated(c.cfgReader) {
if closer, ok := c.veryLastClosers[profilerName]; ok {
closer()
}
delete(c.veryLastClosers, profilerName)

preRunAndLog(c, profilerName, initProfiler(c))
}
}
4 changes: 1 addition & 3 deletions cmd/neofs-node/control.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ func (c *cfg) NetmapStatus() control.NetmapStatus {
func (c *cfg) setHealthStatus(st control.HealthStatus) {
c.healthStatus.Store(int32(st))

if c.metricsCollector != nil {
c.metricsCollector.SetHealth(int32(st))
}
c.metricsCollector.SetHealth(int32(st))
}

func (c *cfg) HealthStatus() control.HealthStatus {
Expand Down
13 changes: 8 additions & 5 deletions cmd/neofs-node/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ func main() {

c := initCfg(appCfg)

preRunAndLog(c, "prometheus", initMetrics(c))
preRunAndLog(c, metricName, initMetrics(c))

preRunAndLog(c, "pprof", initProfiler(c))
preRunAndLog(c, profilerName, initProfiler(c))

initApp(c)

Expand Down Expand Up @@ -91,13 +91,13 @@ func preRunAndLog(c *cfg, name string, srv *httputil.Server) {
c.log.Info(fmt.Sprintf("%s service is initialized", name))
c.wg.Add(1)
go func() {
runAndLog(c, name, true, func(c *cfg) {
runAndLog(c, name, false, func(c *cfg) {
fatalOnErr(srv.Serve(ln))
c.wg.Done()
})
}()

c.veryLastClosers = append(c.veryLastClosers, func() {
c.veryLastClosers[name] = func() {
c.log.Debug(fmt.Sprintf("shutting down %s service", name))

err := srv.Shutdown()
Expand All @@ -108,7 +108,7 @@ func preRunAndLog(c *cfg, name string, srv *httputil.Server) {
}

c.log.Debug(fmt.Sprintf("%s service has been stopped", name))
})
}
}

func initAndLog(c *cfg, name string, initializer func(*cfg)) {
Expand Down Expand Up @@ -184,9 +184,12 @@ func shutdown(c *cfg) {
for _, closer := range c.closers {
closer()
}

c.veryLastClosersLock.RLock()
for _, lastCloser := range c.veryLastClosers {
lastCloser()
}
c.veryLastClosersLock.RUnlock()

c.log.Debug("waiting for all processes to stop")

Expand Down
23 changes: 23 additions & 0 deletions cmd/neofs-node/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package main

import (
"time"

"github.com/nspcc-dev/neofs-node/cmd/neofs-node/config"
metricsconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/metrics"
httputil "github.com/nspcc-dev/neofs-node/pkg/util/http"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand All @@ -25,3 +28,23 @@ func initMetrics(c *cfg) *httputil.Server {

return srv
}

type metricConfig struct {
enabled bool
shutdownTimeout time.Duration
address string
}

func writeMetricConfig(c *config.Config) metricConfig {
return metricConfig{
enabled: metricsconfig.Enabled(c),
shutdownTimeout: metricsconfig.ShutdownTimeout(c),
address: metricsconfig.Address(c),
}
}

func (m1 metricConfig) isUpdated(c *config.Config) bool {
return m1.enabled != metricsconfig.Enabled(c) ||
m1.shutdownTimeout != metricsconfig.ShutdownTimeout(c) ||
m1.address != metricsconfig.Address(c)
}
5 changes: 2 additions & 3 deletions cmd/neofs-node/netmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ func (s *networkState) CurrentEpoch() uint64 {

func (s *networkState) setCurrentEpoch(v uint64) {
s.epoch.Store(v)
if s.metrics != nil {
s.metrics.SetEpoch(v)
}

s.metrics.SetEpoch(v)
}

func (s *networkState) setNodeInfo(ni *netmapSDK.NodeInfo) {
Expand Down
5 changes: 1 addition & 4 deletions cmd/neofs-node/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,10 +347,7 @@ func initObjectService(c *cfg) {
respSvc,
)

var firstSvc objectService.ServiceServer = signSvc
if c.metricsCollector != nil {
firstSvc = objectService.NewMetricCollector(signSvc, c.metricsCollector)
}
firstSvc := objectService.NewMetricCollector(signSvc, c.metricsCollector)

server := objectTransportGRPC.New(firstSvc, mNumber, objNode, neofsecdsa.SignerRFC6979(c.shared.basics.key.PrivateKey))

Expand Down
23 changes: 23 additions & 0 deletions cmd/neofs-node/pprof.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package main

import (
"time"

"github.com/nspcc-dev/neofs-node/cmd/neofs-node/config"
profilerconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/profiler"
httputil "github.com/nspcc-dev/neofs-node/pkg/util/http"
)
Expand All @@ -24,3 +27,23 @@ func initProfiler(c *cfg) *httputil.Server {

return srv
}

type profilerConfig struct {
enabled bool
shutdownTimeout time.Duration
address string
}

func writeProfilerConfig(c *config.Config) profilerConfig {
return profilerConfig{
enabled: profilerconfig.Enabled(c),
shutdownTimeout: profilerconfig.ShutdownTimeout(c),
address: profilerconfig.Address(c),
}
}

func (m1 profilerConfig) isUpdated(c *config.Config) bool {
return m1.enabled != profilerconfig.Enabled(c) ||
m1.shutdownTimeout != profilerconfig.ShutdownTimeout(c) ||
m1.address != profilerconfig.Address(c)
}
4 changes: 1 addition & 3 deletions cmd/neofs-node/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,7 @@ func (c *cfg) engineOpts() []engine.Option {
opts = append(opts, engine.WithContainersSource(cntClient.AsContainerSource(c.shared.basics.cCli)))
}

if c.metricsCollector != nil {
opts = append(opts, engine.WithMetrics(c.metricsCollector))
}
opts = append(opts, engine.WithMetrics(c.metricsCollector))

return opts
}
Expand Down

0 comments on commit 339b4cb

Please sign in to comment.