From 54494190533c16a4c3ab9fdc64a213723b2e56b7 Mon Sep 17 00:00:00 2001
From: Jeffrey Chien <chienjef@amazon.com>
Date: Fri, 8 Dec 2023 10:29:19 -0500
Subject: [PATCH] [receiver/awscontainerinsight] Filter terminated pods from
 node request metrics. (#27299)

**Description:** The `node_<cpu|memory>_request` metrics and metrics
derived from them (`node_<cpu|memory>_reserved_capacity`) differ from
the output of `kubectl describe node <node_name>`. This is because
kubectl [filters out terminated
pods](https://github.com/kubernetes/kubectl/blob/302f330c8712e717ee45bbeff27e1d3008da9f00/pkg/describe/describe.go#L3624).
See linked issue for more details.

Adds a filter for terminated (succeeded/failed state) pods.

**Link to tracking Issue:**
https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/27262

**Testing:** Added unit test to validate pod state filtering. Built and
deployed changes to cluster. Deployed `cpu-test` pod.


![image](https://github.com/amazon-contributing/opentelemetry-collector-contrib/assets/84729962/b557be2d-e14e-428a-895a-761f7724d9bd)


The gap is when the change was deployed. The metric drops after the
deployment due to the filter. The metric can be seen spiking up while
the `cpu-test` pod is running (~19:15) and then returns to the previous
request size after it has terminated.

**Documentation:** N/A
---
 .chloggen/filter-terminated-pods.yaml         | 27 +++++++++
 .../internal/stores/podstore.go               | 10 ++--
 .../internal/stores/podstore_test.go          | 57 +++++++++++++++++++
 3 files changed, 90 insertions(+), 4 deletions(-)
 create mode 100755 .chloggen/filter-terminated-pods.yaml

diff --git a/.chloggen/filter-terminated-pods.yaml b/.chloggen/filter-terminated-pods.yaml
new file mode 100755
index 000000000000..8b6a840765ea
--- /dev/null
+++ b/.chloggen/filter-terminated-pods.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: bug_fix
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: awscontainerinsightreceiver
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Filter terminated pods from node request metrics
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [27262]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go
index 3edd62433d5a..bf464069a349 100644
--- a/receiver/awscontainerinsightreceiver/internal/stores/podstore.go
+++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore.go
@@ -265,10 +265,12 @@ func (p *PodStore) refreshInternal(now time.Time, podList []corev1.Pod) {
 			p.logger.Warn(fmt.Sprintf("podKey is unavailable, refresh pod store for pod %s", pod.Name))
 			continue
 		}
-		tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer)
-		cpuRequest += tmpCPUReq
-		tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer)
-		memRequest += tmpMemReq
+		if pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed {
+			tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer)
+			cpuRequest += tmpCPUReq
+			tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer)
+			memRequest += tmpMemReq
+		}
 		if pod.Status.Phase == corev1.PodRunning {
 			podCount++
 		}
diff --git a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go
index 89c91bdcb3d9..34a2d94c283f 100644
--- a/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go
+++ b/receiver/awscontainerinsightreceiver/internal/stores/podstore_test.go
@@ -624,6 +624,63 @@ func TestPodStore_decorateNode(t *testing.T) {
 	assert.Equal(t, int(1), metric.GetField("node_number_of_running_pods").(int))
 }
 
+func TestPodStore_decorateNode_multiplePodStates(t *testing.T) {
+	podStore := getPodStore()
+	defer require.NoError(t, podStore.Shutdown())
+
+	tags := map[string]string{ci.MetricType: ci.TypeNode}
+	fields := map[string]any{
+		ci.MetricName(ci.TypeNode, ci.CPUTotal):      float64(100),
+		ci.MetricName(ci.TypeNode, ci.CPULimit):      uint64(4000),
+		ci.MetricName(ci.TypeNode, ci.MemWorkingset): float64(100 * 1024 * 1024),
+		ci.MetricName(ci.TypeNode, ci.MemLimit):      uint64(400 * 1024 * 1024),
+	}
+	metric := generateMetric(fields, tags)
+
+	// terminated pods should not contribute to requests
+	failedPod := getBaseTestPodInfo()
+	failedPod.Status.Phase = corev1.PodFailed
+	succeededPod := getBaseTestPodInfo()
+	succeededPod.Status.Phase = corev1.PodSucceeded
+	podList := []corev1.Pod{*failedPod, *succeededPod}
+	podStore.refreshInternal(time.Now(), podList)
+	podStore.decorateNode(metric)
+
+	assert.Equal(t, uint64(0), metric.GetField("node_cpu_request").(uint64))
+	assert.Equal(t, uint64(4000), metric.GetField("node_cpu_limit").(uint64))
+	assert.Equal(t, float64(0), metric.GetField("node_cpu_reserved_capacity").(float64))
+	assert.Equal(t, float64(100), metric.GetField("node_cpu_usage_total").(float64))
+
+	assert.Equal(t, uint64(0), metric.GetField("node_memory_request").(uint64))
+	assert.Equal(t, uint64(400*1024*1024), metric.GetField("node_memory_limit").(uint64))
+	assert.Equal(t, float64(0), metric.GetField("node_memory_reserved_capacity").(float64))
+	assert.Equal(t, float64(100*1024*1024), metric.GetField("node_memory_working_set").(float64))
+
+	// non-terminated pods should contribute to requests
+	pendingPod := getBaseTestPodInfo()
+	pendingPod.Status.Phase = corev1.PodPending
+	podList = append(podList, *pendingPod)
+	podStore.refreshInternal(time.Now(), podList)
+	podStore.decorateNode(metric)
+	assert.Equal(t, uint64(10), metric.GetField("node_cpu_request").(uint64))
+	assert.Equal(t, float64(0.25), metric.GetField("node_cpu_reserved_capacity").(float64))
+
+	assert.Equal(t, uint64(50*1024*1024), metric.GetField("node_memory_request").(uint64))
+	assert.Equal(t, float64(12.5), metric.GetField("node_memory_reserved_capacity").(float64))
+
+	runningPod := getBaseTestPodInfo()
+	runningPod.Status.Phase = corev1.PodRunning
+	podList = append(podList, *runningPod)
+	podStore.refreshInternal(time.Now(), podList)
+	podStore.decorateNode(metric)
+
+	assert.Equal(t, uint64(20), metric.GetField("node_cpu_request").(uint64))
+	assert.Equal(t, float64(0.5), metric.GetField("node_cpu_reserved_capacity").(float64))
+
+	assert.Equal(t, uint64(100*1024*1024), metric.GetField("node_memory_request").(uint64))
+	assert.Equal(t, float64(25), metric.GetField("node_memory_reserved_capacity").(float64))
+}
+
 func TestPodStore_Decorate(t *testing.T) {
 	// not the metrics for decoration
 	tags := map[string]string{}