From 27e1296252c6d1395310dd346823dde6a3444cfb Mon Sep 17 00:00:00 2001 From: Jeffrey Chien Date: Mon, 2 Oct 2023 13:57:22 -0400 Subject: [PATCH 1/2] Filter terminated pods from node request metrics. --- .chloggen/filter-terminated-pods.yaml | 27 +++++++++ .../internal/stores/podstore.go | 10 ++-- .../internal/stores/podstore_test.go | 57 +++++++++++++++++++ 3 files changed, 90 insertions(+), 4 deletions(-) create mode 100755 .chloggen/filter-terminated-pods.yaml diff --git a/.chloggen/filter-terminated-pods.yaml b/.chloggen/filter-terminated-pods.yaml new file mode 100755 index 000000000000..8b6a840765ea --- /dev/null +++ b/.chloggen/filter-terminated-pods.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: awscontainerinsightreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Filter terminated pods from node request metrics + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [27262] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go index 9c587f9564f1..2b1fc1eb4d51 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go @@ -265,10 +265,12 @@ func (p *PodStore) refreshInternal(now time.Time, podList []corev1.Pod) { p.logger.Warn(fmt.Sprintf("podKey is unavailable, refresh pod store for pod %s", pod.Name)) continue } - tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer) - cpuRequest += tmpCPUReq - tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer) - memRequest += tmpMemReq + if pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed { + tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer) + cpuRequest += tmpCPUReq + tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer) + memRequest += tmpMemReq + } if pod.Status.Phase == corev1.PodRunning { podCount++ } diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go index 447ceb91f12a..d692774a8c5b 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go @@ -624,6 +624,63 @@ func TestPodStore_decorateNode(t *testing.T) { assert.Equal(t, int(1), metric.GetField("node_number_of_running_pods").(int)) } +func TestPodStore_decorateNode_multiplePodStates(t *testing.T) { + podStore := getPodStore() + defer require.NoError(t, podStore.Shutdown()) + + tags := map[string]string{ci.MetricType: ci.TypeNode} + fields := map[string]interface{}{ + ci.MetricName(ci.TypeNode, ci.CPUTotal): float64(100), + ci.MetricName(ci.TypeNode, ci.CPULimit): uint64(4000), + ci.MetricName(ci.TypeNode, ci.MemWorkingset): float64(100 * 1024 * 1024), + ci.MetricName(ci.TypeNode, ci.MemLimit): uint64(400 * 1024 * 1024), + } + metric := generateMetric(fields, tags) + + // terminated pods should not contribute to requests + failedPod := getBaseTestPodInfo() + failedPod.Status.Phase = corev1.PodFailed + succeededPod := getBaseTestPodInfo() + succeededPod.Status.Phase = corev1.PodSucceeded + podList := []corev1.Pod{*failedPod, *succeededPod} + podStore.refreshInternal(time.Now(), podList) + podStore.decorateNode(metric) + + assert.Equal(t, uint64(0), metric.GetField("node_cpu_request").(uint64)) + assert.Equal(t, uint64(4000), metric.GetField("node_cpu_limit").(uint64)) + assert.Equal(t, float64(0), metric.GetField("node_cpu_reserved_capacity").(float64)) + assert.Equal(t, float64(100), metric.GetField("node_cpu_usage_total").(float64)) + + assert.Equal(t, uint64(0), metric.GetField("node_memory_request").(uint64)) + assert.Equal(t, uint64(400*1024*1024), metric.GetField("node_memory_limit").(uint64)) + assert.Equal(t, float64(0), metric.GetField("node_memory_reserved_capacity").(float64)) + assert.Equal(t, float64(100*1024*1024), metric.GetField("node_memory_working_set").(float64)) + + // non-terminated pods should contribute to requests + pendingPod := getBaseTestPodInfo() + pendingPod.Status.Phase = corev1.PodPending + podList = append(podList, *pendingPod) + podStore.refreshInternal(time.Now(), podList) + podStore.decorateNode(metric) + assert.Equal(t, uint64(10), metric.GetField("node_cpu_request").(uint64)) + assert.Equal(t, float64(0.25), metric.GetField("node_cpu_reserved_capacity").(float64)) + + assert.Equal(t, uint64(50*1024*1024), metric.GetField("node_memory_request").(uint64)) + assert.Equal(t, float64(12.5), metric.GetField("node_memory_reserved_capacity").(float64)) + + runningPod := getBaseTestPodInfo() + runningPod.Status.Phase = corev1.PodRunning + podList = append(podList, *runningPod) + podStore.refreshInternal(time.Now(), podList) + podStore.decorateNode(metric) + + assert.Equal(t, uint64(20), metric.GetField("node_cpu_request").(uint64)) + assert.Equal(t, float64(0.5), metric.GetField("node_cpu_reserved_capacity").(float64)) + + assert.Equal(t, uint64(100*1024*1024), metric.GetField("node_memory_request").(uint64)) + assert.Equal(t, float64(25), metric.GetField("node_memory_reserved_capacity").(float64)) +} + func TestPodStore_Decorate(t *testing.T) { // not the metrics for decoration tags := map[string]string{} From 81ec7faa2d09b13f32a06b0623343492ad15e92f Mon Sep 17 00:00:00 2001 From: Jeffrey Chien Date: Tue, 28 Nov 2023 18:32:17 -0500 Subject: [PATCH 2/2] Replace interface with any. --- .../internal/stores/podstore_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go index 9bfb51ab789e..34a2d94c283f 100644 --- a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go +++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go @@ -629,7 +629,7 @@ func TestPodStore_decorateNode_multiplePodStates(t *testing.T) { defer require.NoError(t, podStore.Shutdown()) tags := map[string]string{ci.MetricType: ci.TypeNode} - fields := map[string]interface{}{ + fields := map[string]any{ ci.MetricName(ci.TypeNode, ci.CPUTotal): float64(100), ci.MetricName(ci.TypeNode, ci.CPULimit): uint64(4000), ci.MetricName(ci.TypeNode, ci.MemWorkingset): float64(100 * 1024 * 1024),