From 4b842ce4458b484d2870573f7c8d63a11df98f15 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 18 Oct 2023 10:55:53 +0800 Subject: [PATCH] better metrics for node status (#620) --- metrics/handler.go | 13 +------------ metrics/metrics.go | 45 +++++++++++++++++---------------------------- utils/utils.go | 5 +++++ utils/utils_test.go | 5 +++++ 4 files changed, 28 insertions(+), 40 deletions(-) diff --git a/metrics/handler.go b/metrics/handler.go index f00ad7acd..cca790f70 100644 --- a/metrics/handler.go +++ b/metrics/handler.go @@ -20,16 +20,8 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) if err != nil { logger.Error(ctx, err, "Get all nodes err") } - podUpDownNodes := map[string][]int{} for node := range nodes { - if podUpDownNodes[node.Podname] == nil { - podUpDownNodes[node.Podname] = []int{0, 0} - } - if node.IsDown() { - podUpDownNodes[node.Podname][1]++ - } else { - podUpDownNodes[node.Podname][0]++ - } + m.SendPodNodeStatus(ctx, node) metrics, err := m.rmgr.GetNodeMetrics(ctx, node) if err != nil { logger.Error(ctx, err, "Get metrics failed") @@ -37,9 +29,6 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) } m.SendMetrics(ctx, metrics...) } - for podname, ud := range podUpDownNodes { - m.SendPodUpDownNodes(ctx, podname, ud[0], ud[1]) - } h.ServeHTTP(w, r) }) diff --git a/metrics/metrics.go b/metrics/metrics.go index 6b10d5122..f30bc393f 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -2,6 +2,7 @@ package metrics import ( "context" + "fmt" "os" "strconv" "sync" @@ -21,12 +22,10 @@ import ( ) const ( - deployCountKey = "core.%s.deploy.count" - deployCountName = "core_deploy" - upNodesKey = "core.up.nodes" - upNodesName = "core_up_nodes" - downNodesKey = "core.down.nodes" - downNodesName = "core_down_nodes" + deployCountKey = "core.%s.deploy.count" + deployCountName = "core_deploy" + podNodeStatusKey = "pod.node.%s.up" + podNodeStatusName = "pod_node_up" gaugeType = "gauge" counterType = "counter" @@ -58,22 +57,17 @@ func (m *Metrics) SendDeployCount(ctx context.Context, n int) { m.SendMetrics(ctx, metrics) } -func (m *Metrics) SendPodUpDownNodes(ctx context.Context, podname string, up, down int) { - log.WithFunc("metrics.SendUpDownNodes").Info(ctx, "Update deploy counter") - m1 := &plugintypes.Metrics{ - Name: upNodesName, - Labels: []string{m.Hostname, podname}, - Key: upNodesKey, - Value: strconv.Itoa(up), +func (m *Metrics) SendPodNodeStatus(ctx context.Context, node *types.Node) { + log.WithFunc("metrics.SendPodNodeStatus").Info(ctx, "update pod node status") + up := !node.IsDown() + metrics := &plugintypes.Metrics{ + Name: podNodeStatusName, + Labels: []string{m.Hostname, node.Podname, node.Name}, + Key: fmt.Sprintf(podNodeStatusKey, node.Name), + Value: strconv.Itoa(utils.Bool2Int(up)), } - m2 := &plugintypes.Metrics{ - Name: downNodesName, - Labels: []string{m.Hostname, podname}, - Key: downNodesKey, - Value: strconv.Itoa(down), - } - m.SendMetrics(ctx, m1, m2) + m.SendMetrics(ctx, metrics) } // SendMetrics update metrics @@ -231,15 +225,10 @@ func InitMetrics(ctx context.Context, config types.Config, metricsDescriptions [ Help: "core deploy counter", }, []string{"hostname"}) - Client.Collectors[upNodesName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: upNodesName, + Client.Collectors[podNodeStatusName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: podNodeStatusName, Help: "number of up nodes", - }, []string{"hostname", "podname"}) - - Client.Collectors[downNodesName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: downNodesName, - Help: "number of down nodes", - }, []string{"hostname", "podname"}) + }, []string{"hostname", "podname", "nodename"}) once.Do(func() { prometheus.MustRegister(maps.Values(Client.Collectors)...) diff --git a/utils/utils.go b/utils/utils.go index 4b7ebc518..523df850c 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -14,6 +14,7 @@ import ( "os" "strings" "time" + "unsafe" "github.com/cockroachdb/errors" "github.com/projecteru2/core/cluster" @@ -270,3 +271,7 @@ func safeSplit(s string) []string { return result } + +func Bool2Int(a bool) int { + return *(*int)(unsafe.Pointer(&a)) & 1 +} diff --git a/utils/utils_test.go b/utils/utils_test.go index 0ed53734a..b8e7b1f62 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -221,3 +221,8 @@ func TestSHA256(t *testing.T) { str := "hhh" assert.Equal(t, "24d166cd6c8b826c779040b49d5b6708d649b236558e8744339dfee6afe11999", SHA256(str)) } + +func TestBool2Int(t *testing.T) { + assert.Equal(t, 1, Bool2Int(true)) + assert.Equal(t, 0, Bool2Int(false)) +}