From a373f6c5828d1fc1c5c5bc4cf71bf1de573e484a Mon Sep 17 00:00:00 2001 From: Andrey Butusov Date: Thu, 14 Nov 2024 17:06:57 +0300 Subject: [PATCH 1/4] node: fix logs prometheus and pprof started Logs: ``` prometheus service started successfully pprof service started successfully ``` Appear after shutting down these services. Now they do not appear at all. Signed-off-by: Andrey Butusov --- cmd/neofs-node/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 86e362e516..5f10c82c20 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -91,7 +91,7 @@ func preRunAndLog(c *cfg, name string, srv *httputil.Server) { c.log.Info(fmt.Sprintf("%s service is initialized", name)) c.wg.Add(1) go func() { - runAndLog(c, name, true, func(c *cfg) { + runAndLog(c, name, false, func(c *cfg) { fatalOnErr(srv.Serve(ln)) c.wg.Done() }) From f9e365e90f2961a19e97260c73448f5bf6bfac02 Mon Sep 17 00:00:00 2001 From: Andrey Butusov Date: Fri, 15 Nov 2024 00:32:38 +0300 Subject: [PATCH 2/4] metrics, pprof: make their closers map Add consts for the metric and profiler names. Make `c.veryLastClosers` a map. Signed-off-by: Andrey Butusov --- cmd/neofs-node/config.go | 9 ++++++++- cmd/neofs-node/main.go | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cmd/neofs-node/config.go b/cmd/neofs-node/config.go index da6a705154..b27d3fa034 100644 --- a/cmd/neofs-node/config.go +++ b/cmd/neofs-node/config.go @@ -73,6 +73,11 @@ const maxMsgSize = 4 << 20 // transport msg limit 4 MiB // for each contract listener. const notificationHandlerPoolSize = 10 +const ( + metricName = "prometheus" + profilerName = "pprof" +) + // applicationConfiguration reads and stores component-specific configuration // values. It should not store any application helpers structs (pointers to shared // structs). @@ -291,7 +296,7 @@ type internals struct { closers []func() // services that are useful for debug (e.g. when a regular closer does not // close), must be close at the very end of application life cycle - veryLastClosers []func() + veryLastClosers map[string]func() apiVersion version.Version healthStatus atomic.Int32 @@ -644,6 +649,8 @@ func initCfg(appCfg *config.Config) *cfg { c.basics.networkState.metrics = c.metricsCollector } + c.veryLastClosers = make(map[string]func()) + c.onShutdown(c.clientCache.CloseAll) // clean up connections c.onShutdown(c.bgClientCache.CloseAll) // clean up connections c.onShutdown(c.putClientCache.CloseAll) // clean up connections diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 5f10c82c20..1fb76fcad2 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -55,9 +55,9 @@ func main() { c := initCfg(appCfg) - preRunAndLog(c, "prometheus", initMetrics(c)) + preRunAndLog(c, metricName, initMetrics(c)) - preRunAndLog(c, "pprof", initProfiler(c)) + preRunAndLog(c, profilerName, initProfiler(c)) initApp(c) @@ -97,7 +97,7 @@ func preRunAndLog(c *cfg, name string, srv *httputil.Server) { }) }() - c.veryLastClosers = append(c.veryLastClosers, func() { + c.veryLastClosers[name] = func() { c.log.Debug(fmt.Sprintf("shutting down %s service", name)) err := srv.Shutdown() @@ -108,7 +108,7 @@ func preRunAndLog(c *cfg, name string, srv *httputil.Server) { } c.log.Debug(fmt.Sprintf("%s service has been stopped", name)) - }) + } } func initAndLog(c *cfg, name string, initializer func(*cfg)) { From d03873acc8cec02de9cb1c3a0e425df10393f5f4 Mon Sep 17 00:00:00 2001 From: Andrey Butusov Date: Fri, 22 Nov 2024 12:46:36 +0300 Subject: [PATCH 3/4] metrics: always init metrics collector To simply reload the metrics service and enable/disable it at runtime, always initialize the metrics collector and collect data, even in local mode, if it is not exposed via HTTP. Signed-off-by: Andrey Butusov --- cmd/neofs-node/config.go | 7 ++----- cmd/neofs-node/control.go | 4 +--- cmd/neofs-node/netmap.go | 5 ++--- cmd/neofs-node/object.go | 5 +---- cmd/neofs-node/storage.go | 4 +--- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/cmd/neofs-node/config.go b/cmd/neofs-node/config.go index b27d3fa034..94784ed718 100644 --- a/cmd/neofs-node/config.go +++ b/cmd/neofs-node/config.go @@ -24,7 +24,6 @@ import ( shardconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard" fstreeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/engine/shard/blobstor/fstree" loggerconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/logger" - metricsconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/metrics" morphconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/morph" nodeconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/node" objectconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/object" @@ -644,10 +643,8 @@ func initCfg(appCfg *config.Config) *cfg { c.ownerIDFromKey = user.NewFromECDSAPublicKey(key.PrivateKey.PublicKey) - if metricsconfig.Enabled(c.cfgReader) { - c.metricsCollector = metrics.NewNodeMetrics(misc.Version) - c.basics.networkState.metrics = c.metricsCollector - } + c.metricsCollector = metrics.NewNodeMetrics(misc.Version) + c.basics.networkState.metrics = c.metricsCollector c.veryLastClosers = make(map[string]func()) diff --git a/cmd/neofs-node/control.go b/cmd/neofs-node/control.go index 4afacc81ec..1ba3b733bd 100644 --- a/cmd/neofs-node/control.go +++ b/cmd/neofs-node/control.go @@ -67,9 +67,7 @@ func (c *cfg) NetmapStatus() control.NetmapStatus { func (c *cfg) setHealthStatus(st control.HealthStatus) { c.healthStatus.Store(int32(st)) - if c.metricsCollector != nil { - c.metricsCollector.SetHealth(int32(st)) - } + c.metricsCollector.SetHealth(int32(st)) } func (c *cfg) HealthStatus() control.HealthStatus { diff --git a/cmd/neofs-node/netmap.go b/cmd/neofs-node/netmap.go index 4ac11d7b8d..3d188816b2 100644 --- a/cmd/neofs-node/netmap.go +++ b/cmd/neofs-node/netmap.go @@ -48,9 +48,8 @@ func (s *networkState) CurrentEpoch() uint64 { func (s *networkState) setCurrentEpoch(v uint64) { s.epoch.Store(v) - if s.metrics != nil { - s.metrics.SetEpoch(v) - } + + s.metrics.SetEpoch(v) } func (s *networkState) setNodeInfo(ni *netmapSDK.NodeInfo) { diff --git a/cmd/neofs-node/object.go b/cmd/neofs-node/object.go index aa4aff5ce7..7b3dedbafc 100644 --- a/cmd/neofs-node/object.go +++ b/cmd/neofs-node/object.go @@ -347,10 +347,7 @@ func initObjectService(c *cfg) { respSvc, ) - var firstSvc objectService.ServiceServer = signSvc - if c.metricsCollector != nil { - firstSvc = objectService.NewMetricCollector(signSvc, c.metricsCollector) - } + firstSvc := objectService.NewMetricCollector(signSvc, c.metricsCollector) server := objectTransportGRPC.New(firstSvc, mNumber, objNode, neofsecdsa.SignerRFC6979(c.shared.basics.key.PrivateKey)) diff --git a/cmd/neofs-node/storage.go b/cmd/neofs-node/storage.go index 4be499d51d..66b8be1002 100644 --- a/cmd/neofs-node/storage.go +++ b/cmd/neofs-node/storage.go @@ -93,9 +93,7 @@ func (c *cfg) engineOpts() []engine.Option { opts = append(opts, engine.WithContainersSource(cntClient.AsContainerSource(c.shared.basics.cCli))) } - if c.metricsCollector != nil { - opts = append(opts, engine.WithMetrics(c.metricsCollector)) - } + opts = append(opts, engine.WithMetrics(c.metricsCollector)) return opts } From b79f58f911a62cb5e0eeb665a385819629ff76b4 Mon Sep 17 00:00:00 2001 From: Andrey Butusov Date: Fri, 22 Nov 2024 12:40:37 +0300 Subject: [PATCH 4/4] metrics, pprof: support reloading services with SIGHUP Reload prometheus and pprof services, if the config is updated. Closes #1868. Signed-off-by: Andrey Butusov --- CHANGELOG.md | 1 + cmd/neofs-node/config.go | 38 +++++++++++++++++++++++++++++++++++++- cmd/neofs-node/main.go | 3 +++ cmd/neofs-node/metrics.go | 23 +++++++++++++++++++++++ cmd/neofs-node/pprof.go | 23 +++++++++++++++++++++++ 5 files changed, 87 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 858b6c242f..d40f0669ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ attribute, which is used for container domain name in NNS contracts (#2954) - New `peapod-to-fstree` tool providing peapod-to-fstree data migration (#3013) - Reloading node attributes with SIGHUP (#3005) - Reloading pool sizes (#3018) +- Reloading pprof/metrics services with SIGHUP (#3016) ### Fixed - Do not search for tombstones when handling their expiration, use local indexes instead (#2929) diff --git a/cmd/neofs-node/config.go b/cmd/neofs-node/config.go index 94784ed718..e6182a580b 100644 --- a/cmd/neofs-node/config.go +++ b/cmd/neofs-node/config.go @@ -295,7 +295,8 @@ type internals struct { closers []func() // services that are useful for debug (e.g. when a regular closer does not // close), must be close at the very end of application life cycle - veryLastClosers map[string]func() + veryLastClosersLock sync.RWMutex + veryLastClosers map[string]func() apiVersion version.Version healthStatus atomic.Int32 @@ -871,6 +872,9 @@ func (c *cfg) configWatcher(ctx context.Context) { case <-ch: c.log.Info("SIGHUP has been received, rereading configuration...") + oldMetrics := writeMetricConfig(c.cfgReader) + oldProfiler := writeProfilerConfig(c.cfgReader) + err := c.readConfig(c.cfgReader) if err != nil { c.log.Error("configuration reading", zap.Error(err)) @@ -881,6 +885,11 @@ func (c *cfg) configWatcher(ctx context.Context) { c.reloadObjectPoolSizes() + // Prometheus and pprof + + // nolint:contextcheck + c.reloadMetricsAndPprof(oldMetrics, oldProfiler) + // Logger err = c.internals.logLevel.UnmarshalText([]byte(c.logger.level)) @@ -965,3 +974,30 @@ func writeSystemAttributes(c *cfg) error { return nil } + +func (c *cfg) reloadMetricsAndPprof(oldMetrics metricConfig, oldProfiler profilerConfig) { + c.veryLastClosersLock.Lock() + defer c.veryLastClosersLock.Unlock() + + // Metrics + + if oldMetrics.isUpdated(c.cfgReader) { + if closer, ok := c.veryLastClosers[metricName]; ok { + closer() + } + delete(c.veryLastClosers, metricName) + + preRunAndLog(c, metricName, initMetrics(c)) + } + + //Profiler + + if oldProfiler.isUpdated(c.cfgReader) { + if closer, ok := c.veryLastClosers[profilerName]; ok { + closer() + } + delete(c.veryLastClosers, profilerName) + + preRunAndLog(c, profilerName, initProfiler(c)) + } +} diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 1fb76fcad2..1d6184d647 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -184,9 +184,12 @@ func shutdown(c *cfg) { for _, closer := range c.closers { closer() } + + c.veryLastClosersLock.RLock() for _, lastCloser := range c.veryLastClosers { lastCloser() } + c.veryLastClosersLock.RUnlock() c.log.Debug("waiting for all processes to stop") diff --git a/cmd/neofs-node/metrics.go b/cmd/neofs-node/metrics.go index a3899a92e5..29f4359103 100644 --- a/cmd/neofs-node/metrics.go +++ b/cmd/neofs-node/metrics.go @@ -1,6 +1,9 @@ package main import ( + "time" + + "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config" metricsconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/metrics" httputil "github.com/nspcc-dev/neofs-node/pkg/util/http" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -25,3 +28,23 @@ func initMetrics(c *cfg) *httputil.Server { return srv } + +type metricConfig struct { + enabled bool + shutdownTimeout time.Duration + address string +} + +func writeMetricConfig(c *config.Config) metricConfig { + return metricConfig{ + enabled: metricsconfig.Enabled(c), + shutdownTimeout: metricsconfig.ShutdownTimeout(c), + address: metricsconfig.Address(c), + } +} + +func (m1 metricConfig) isUpdated(c *config.Config) bool { + return m1.enabled != metricsconfig.Enabled(c) || + m1.shutdownTimeout != metricsconfig.ShutdownTimeout(c) || + m1.address != metricsconfig.Address(c) +} diff --git a/cmd/neofs-node/pprof.go b/cmd/neofs-node/pprof.go index 403259bbdd..a8b8d9b67b 100644 --- a/cmd/neofs-node/pprof.go +++ b/cmd/neofs-node/pprof.go @@ -1,6 +1,9 @@ package main import ( + "time" + + "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config" profilerconfig "github.com/nspcc-dev/neofs-node/cmd/neofs-node/config/profiler" httputil "github.com/nspcc-dev/neofs-node/pkg/util/http" ) @@ -24,3 +27,23 @@ func initProfiler(c *cfg) *httputil.Server { return srv } + +type profilerConfig struct { + enabled bool + shutdownTimeout time.Duration + address string +} + +func writeProfilerConfig(c *config.Config) profilerConfig { + return profilerConfig{ + enabled: profilerconfig.Enabled(c), + shutdownTimeout: profilerconfig.ShutdownTimeout(c), + address: profilerconfig.Address(c), + } +} + +func (m1 profilerConfig) isUpdated(c *config.Config) bool { + return m1.enabled != profilerconfig.Enabled(c) || + m1.shutdownTimeout != profilerconfig.ShutdownTimeout(c) || + m1.address != profilerconfig.Address(c) +}