Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CECO-1153] APM e2e test #1549

Merged
merged 13 commits into from
Jan 10, 2025
3 changes: 2 additions & 1 deletion test/e2e/kind_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ package e2e
import (
"context"
"fmt"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"
"path/filepath"
"strconv"
"strings"
"testing"
"time"

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"

Comment on lines +20 to +21
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is due to my IDE setting, is no-op and this kind_test.go will be removed eventually post refactor

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner"
Expand Down
19 changes: 19 additions & 0 deletions test/e2e/manifests/new_manifests/apm/datadog-agent-apm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: datadoghq.com/v2alpha1
kind: DatadogAgent
metadata:
namespace: e2e-operator
labels:
agent.datadoghq.com/e2e-test: datadog-agent-apm
spec:
global:
kubelet:
tlsVerify: false
features:
apm:
enabled: true
hostPortConfig:
enabled: true
hostPort: 8126
unixDomainSocketConfig:
enabled: true
path: /var/run/datadog/apm.socket
79 changes: 79 additions & 0 deletions test/e2e/manifests/new_manifests/apm/tracegen-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tracegen-tribrid
namespace: e2e-operator
labels:
app: tracegen-tribrid
spec:
replicas: 1
selector:
matchLabels:
app: tracegen-tribrid
template:
metadata:
labels:
app: tracegen-tribrid
spec:
containers:
- name: tracegen-tcp-hostip
image: ghcr.io/datadog/apps-tracegen:main
env:
# IP of the node - listened by the trace-Agent if hostPort is enabled
- name: DD_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: DD_SERVICE
value: "e2e-test-apm-hostip"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-tcp-agent-service
image: ghcr.io/datadog/apps-tracegen:main
env:
# Kubernetes service of the node Agent - enabled by default with the APM feature
# The service is created by the Datadog Operator following convention: <datadog-agent-name>-agent
- name: DD_AGENT_HOST
value: "datadog-agent-apm-agent"
- name: DD_SERVICE
value: "e2e-test-apm-agent-service"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-udp
image: ghcr.io/datadog/apps-tracegen:main
env:
# Socket of the trace-agent
- name: DD_TRACE_AGENT_URL
value: "unix:///var/run/datadog/apm.socket"
- name: DD_SERVICE
value: "e2e-test-apm-socket"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
volumeMounts:
- name: apmsocketpath
mountPath: /var/run/datadog
volumes:
- name: apmsocketpath
hostPath:
path: /var/run/datadog/
69 changes: 69 additions & 0 deletions test/e2e/tests/k8s_suite/k8s_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,69 @@ func (s *k8sSuite) TestGenericK8s() {
s.verifyAPILogs()
}, 900*time.Second, 15*time.Second, "could not valid logs collection in time")
})

s.T().Run("APM hostPort k8s service UDP works", func(t *testing.T) {

// Cleanup to avoid potential lingering DatadogAgent
// Avoid race with the new Agent not being able to bind to the hostPort
withoutDDAProvisionerOptions := []provisioners.KubernetesProvisionerOption{
provisioners.WithTestName("e2e-operator-apm"),
provisioners.WithoutDDA(),
provisioners.WithLocal(s.local),
}
withoutDDAProvisionerOptions = append(withoutDDAProvisionerOptions, defaultProvisionerOpts...)
s.UpdateEnv(provisioners.KubernetesProvisioner(withoutDDAProvisionerOptions...))

var apmAgentSelector = ",agent.datadoghq.com/name=datadog-agent-apm"
ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "apm", "datadog-agent-apm.yaml"))
assert.NoError(s.T(), err)

ddaOpts := []agentwithoperatorparams.Option{
agentwithoperatorparams.WithDDAConfig(agentwithoperatorparams.DDAConfig{
Name: "datadog-agent-apm",
YamlFilePath: ddaConfigPath,
}),
}
ddaOpts = append(ddaOpts, defaultDDAOpts...)

ddaProvisionerOptions := []provisioners.KubernetesProvisionerOption{
provisioners.WithTestName("e2e-operator-apm"),
provisioners.WithDDAOptions(ddaOpts...),
provisioners.WithYAMLWorkload(provisioners.YAMLWorkload{
Name: "tracegen-deploy",
Path: strings.Join([]string{common.ManifestsPath, "apm", "tracegen-deploy.yaml"}, "/"),
}),
provisioners.WithLocal(s.local),
}
ddaProvisionerOptions = append(ddaProvisionerOptions, defaultProvisionerOpts...)

// Deploy APM DatadogAgent and tracegen
s.UpdateEnv(provisioners.KubernetesProvisioner(ddaProvisionerOptions...))

// Verify traces collection on agent pod
s.EventuallyWithTf(func(c *assert.CollectT) {
// Verify tracegen deployment is running
utils.VerifyNumPodsForSelector(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), 1, "app=tracegen-tribrid")

// Verify agent pods are running
utils.VerifyAgentPods(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), common.NodeAgentSelector+apmAgentSelector)
agentPods, err := s.Env().KubernetesCluster.Client().CoreV1().Pods(common.NamespaceName).List(context.TODO(), metav1.ListOptions{LabelSelector: common.NodeAgentSelector + apmAgentSelector, FieldSelector: "status.phase=Running"})
assert.NoError(c, err)

// This works because we have a single Agent pod (so located on same node as tracegen)
// Otherwise, we would need to deploy tracegen on the same node as the Agent pod / as a DaemonSet
for _, pod := range agentPods.Items {

output, _, err := s.Env().KubernetesCluster.KubernetesClient.PodExec(common.NamespaceName, pod.Name, "agent", []string{"agent", "status", "apm agent", "-j"})
assert.NoError(c, err)

utils.VerifyAgentTraces(c, output)
}

// Verify traces collection ingestion by fakeintake
s.verifyAPITraces(c)
}, 600*time.Second, 15*time.Second, "could not validate traces on agent pod") // TODO: check duration
})
}

func (s *k8sSuite) verifyAPILogs() {
Expand All @@ -262,6 +325,12 @@ func (s *k8sSuite) verifyAPILogs() {
s.Assert().NotEmptyf(logs, fmt.Sprintf("Expected fake intake-ingested logs to not be empty: %s", err))
}

func (s *k8sSuite) verifyAPITraces(c *assert.CollectT) {
traces, err := s.Env().FakeIntake.Client().GetTraces()
assert.NoError(c, err)
assert.NotEmptyf(c, traces, fmt.Sprintf("Expected fake intake-ingested traces to not be empty: %s", err))
}

func (s *k8sSuite) verifyKSMCheck(c *assert.CollectT) {
metricNames, err := s.Env().FakeIntake.Client().GetMetricNames()
assert.NoError(c, err)
Expand Down
52 changes: 49 additions & 3 deletions test/e2e/tests/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ package utils
import (
"context"
"fmt"
"strconv"
"strings"
"testing"

"github.com/DataDog/datadog-operator/test/e2e/common"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeClient "k8s.io/client-go/kubernetes"
"strconv"
"strings"
"testing"
)

func VerifyOperator(t *testing.T, c *assert.CollectT, namespace string, k8sClient kubeClient.Interface) {
Expand Down Expand Up @@ -106,3 +107,48 @@ func VerifyAgentPodLogs(c *assert.CollectT, collectorOutput string) {
totalIntegrations := len(agentLogs)
assert.True(c, tailedIntegrations >= totalIntegrations*80/100, "Expected at least 80%% of integrations to be tailed, got %d/%d", tailedIntegrations, totalIntegrations)
}

// isInternalTrafficPolicySupported checks if the internalTrafficPolicy field is supported in the current Kubernetes version.
// This is accomplished by checking if the Kubernetes minor version is >= 22.
func isInternalTrafficPolicySupported() bool {
k8sVersion := common.K8sVersion
splits := strings.Split(k8sVersion, ".")
// Avoid panics by checking if the version is in the expected format (X.Y)
if len(splits) < 2 {
return false
}
minorVersion, err := strconv.Atoi(splits[1])
if err != nil {
return false
}
return minorVersion >= 22
}

func VerifyAgentTraces(c *assert.CollectT, collectorOutput string) {
apmAgentJson := common.ParseCollectorJson(collectorOutput)
// The order of services in the Agent JSON output is not guaranteed.
// We use a map to assert that we have received traces for all expected services.
expectedServices := map[string]bool{
"e2e-test-apm-hostip": true,
"e2e-test-apm-socket": true,
}
// On Kubernetes >= 1.22, the node Agent k8s service is created since internalTrafficPolicy is supported.
if isInternalTrafficPolicySupported() {
expectedServices["e2e-test-apm-agent-service"] = true
}
// Track found services
foundServices := map[string]bool{}

if apmAgentJson != nil {
apmStats := apmAgentJson["apmStats"].(map[string]interface{})["receiver"].([]interface{})
for _, service := range apmStats {
serviceName := service.(map[string]interface{})["Service"].(string)
tracesReceived := service.(map[string]interface{})["TracesReceived"].(float64)
// Ensure we received at least one trace for the service
assert.Greater(c, tracesReceived, float64(0), "Expected traces to be received for service %s", serviceName)
// Mark the service as found
foundServices[serviceName] = true
}
}
assert.Equal(c, expectedServices, foundServices, "The found services do not match the expected services")
}
Loading