Skip to content

Commit

Permalink
Merge branch 'fanny/refactor-kind-e2e' into fanny/renable-rc-e2e
Browse files Browse the repository at this point in the history
  • Loading branch information
fanny-jiang authored Jan 10, 2025
2 parents 8876dc2 + 4f6af20 commit f506933
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 4 deletions.
3 changes: 2 additions & 1 deletion test/e2e/kind_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ package e2e
import (
"context"
"fmt"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners"

"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner"
Expand Down
19 changes: 19 additions & 0 deletions test/e2e/manifests/new_manifests/apm/datadog-agent-apm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: datadoghq.com/v2alpha1
kind: DatadogAgent
metadata:
namespace: e2e-operator
labels:
agent.datadoghq.com/e2e-test: datadog-agent-apm
spec:
global:
kubelet:
tlsVerify: false
features:
apm:
enabled: true
hostPortConfig:
enabled: true
hostPort: 8126
unixDomainSocketConfig:
enabled: true
path: /var/run/datadog/apm.socket
79 changes: 79 additions & 0 deletions test/e2e/manifests/new_manifests/apm/tracegen-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tracegen-tribrid
namespace: e2e-operator
labels:
app: tracegen-tribrid
spec:
replicas: 1
selector:
matchLabels:
app: tracegen-tribrid
template:
metadata:
labels:
app: tracegen-tribrid
spec:
containers:
- name: tracegen-tcp-hostip
image: ghcr.io/datadog/apps-tracegen:main
env:
# IP of the node - listened by the trace-Agent if hostPort is enabled
- name: DD_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: DD_SERVICE
value: "e2e-test-apm-hostip"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-tcp-agent-service
image: ghcr.io/datadog/apps-tracegen:main
env:
# Kubernetes service of the node Agent - enabled by default with the APM feature
# The service is created by the Datadog Operator following convention: <datadog-agent-name>-agent
- name: DD_AGENT_HOST
value: "datadog-agent-apm-agent"
- name: DD_SERVICE
value: "e2e-test-apm-agent-service"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
- name: tracegen-udp
image: ghcr.io/datadog/apps-tracegen:main
env:
# Socket of the trace-agent
- name: DD_TRACE_AGENT_URL
value: "unix:///var/run/datadog/apm.socket"
- name: DD_SERVICE
value: "e2e-test-apm-socket"
- name: DD_ENV
value: "e2e-operator"
resources:
requests:
memory: "32Mi"
cpu: "2m"
limits:
memory: "32Mi"
cpu: "10m"
volumeMounts:
- name: apmsocketpath
mountPath: /var/run/datadog
volumes:
- name: apmsocketpath
hostPath:
path: /var/run/datadog/
69 changes: 69 additions & 0 deletions test/e2e/tests/k8s_suite/k8s_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,69 @@ func (s *k8sSuite) TestGenericK8s() {
s.verifyAPILogs()
}, 900*time.Second, 15*time.Second, "could not valid logs collection in time")
})

s.T().Run("APM hostPort k8s service UDP works", func(t *testing.T) {

// Cleanup to avoid potential lingering DatadogAgent
// Avoid race with the new Agent not being able to bind to the hostPort
withoutDDAProvisionerOptions := []provisioners.KubernetesProvisionerOption{
provisioners.WithTestName("e2e-operator-apm"),
provisioners.WithoutDDA(),
provisioners.WithLocal(s.local),
}
withoutDDAProvisionerOptions = append(withoutDDAProvisionerOptions, defaultProvisionerOpts...)
s.UpdateEnv(provisioners.KubernetesProvisioner(withoutDDAProvisionerOptions...))

var apmAgentSelector = ",agent.datadoghq.com/name=datadog-agent-apm"
ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "apm", "datadog-agent-apm.yaml"))
assert.NoError(s.T(), err)

ddaOpts := []agentwithoperatorparams.Option{
agentwithoperatorparams.WithDDAConfig(agentwithoperatorparams.DDAConfig{
Name: "datadog-agent-apm",
YamlFilePath: ddaConfigPath,
}),
}
ddaOpts = append(ddaOpts, defaultDDAOpts...)

ddaProvisionerOptions := []provisioners.KubernetesProvisionerOption{
provisioners.WithTestName("e2e-operator-apm"),
provisioners.WithDDAOptions(ddaOpts...),
provisioners.WithYAMLWorkload(provisioners.YAMLWorkload{
Name: "tracegen-deploy",
Path: strings.Join([]string{common.ManifestsPath, "apm", "tracegen-deploy.yaml"}, "/"),
}),
provisioners.WithLocal(s.local),
}
ddaProvisionerOptions = append(ddaProvisionerOptions, defaultProvisionerOpts...)

// Deploy APM DatadogAgent and tracegen
s.UpdateEnv(provisioners.KubernetesProvisioner(ddaProvisionerOptions...))

// Verify traces collection on agent pod
s.EventuallyWithTf(func(c *assert.CollectT) {
// Verify tracegen deployment is running
utils.VerifyNumPodsForSelector(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), 1, "app=tracegen-tribrid")

// Verify agent pods are running
utils.VerifyAgentPods(s.T(), c, common.NamespaceName, s.Env().KubernetesCluster.Client(), common.NodeAgentSelector+apmAgentSelector)
agentPods, err := s.Env().KubernetesCluster.Client().CoreV1().Pods(common.NamespaceName).List(context.TODO(), metav1.ListOptions{LabelSelector: common.NodeAgentSelector + apmAgentSelector, FieldSelector: "status.phase=Running"})
assert.NoError(c, err)

// This works because we have a single Agent pod (so located on same node as tracegen)
// Otherwise, we would need to deploy tracegen on the same node as the Agent pod / as a DaemonSet
for _, pod := range agentPods.Items {

output, _, err := s.Env().KubernetesCluster.KubernetesClient.PodExec(common.NamespaceName, pod.Name, "agent", []string{"agent", "status", "apm agent", "-j"})
assert.NoError(c, err)

utils.VerifyAgentTraces(c, output)
}

// Verify traces collection ingestion by fakeintake
s.verifyAPITraces(c)
}, 600*time.Second, 15*time.Second, "could not validate traces on agent pod") // TODO: check duration
})
}

func (s *k8sSuite) verifyAPILogs() {
Expand All @@ -262,6 +325,12 @@ func (s *k8sSuite) verifyAPILogs() {
s.Assert().NotEmptyf(logs, fmt.Sprintf("Expected fake intake-ingested logs to not be empty: %s", err))
}

func (s *k8sSuite) verifyAPITraces(c *assert.CollectT) {
traces, err := s.Env().FakeIntake.Client().GetTraces()
assert.NoError(c, err)
assert.NotEmptyf(c, traces, fmt.Sprintf("Expected fake intake-ingested traces to not be empty: %s", err))
}

func (s *k8sSuite) verifyKSMCheck(c *assert.CollectT) {
metricNames, err := s.Env().FakeIntake.Client().GetMetricNames()
assert.NoError(c, err)
Expand Down
52 changes: 49 additions & 3 deletions test/e2e/tests/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ package utils
import (
"context"
"fmt"
"strconv"
"strings"
"testing"

"github.com/DataDog/datadog-operator/test/e2e/common"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeClient "k8s.io/client-go/kubernetes"
"strconv"
"strings"
"testing"
)

func VerifyOperator(t *testing.T, c *assert.CollectT, namespace string, k8sClient kubeClient.Interface) {
Expand Down Expand Up @@ -106,3 +107,48 @@ func VerifyAgentPodLogs(c *assert.CollectT, collectorOutput string) {
totalIntegrations := len(agentLogs)
assert.True(c, tailedIntegrations >= totalIntegrations*80/100, "Expected at least 80%% of integrations to be tailed, got %d/%d", tailedIntegrations, totalIntegrations)
}

// isInternalTrafficPolicySupported checks if the internalTrafficPolicy field is supported in the current Kubernetes version.
// This is accomplished by checking if the Kubernetes minor version is >= 22.
func isInternalTrafficPolicySupported() bool {
k8sVersion := common.K8sVersion
splits := strings.Split(k8sVersion, ".")
// Avoid panics by checking if the version is in the expected format (X.Y)
if len(splits) < 2 {
return false
}
minorVersion, err := strconv.Atoi(splits[1])
if err != nil {
return false
}
return minorVersion >= 22
}

func VerifyAgentTraces(c *assert.CollectT, collectorOutput string) {
apmAgentJson := common.ParseCollectorJson(collectorOutput)
// The order of services in the Agent JSON output is not guaranteed.
// We use a map to assert that we have received traces for all expected services.
expectedServices := map[string]bool{
"e2e-test-apm-hostip": true,
"e2e-test-apm-socket": true,
}
// On Kubernetes >= 1.22, the node Agent k8s service is created since internalTrafficPolicy is supported.
if isInternalTrafficPolicySupported() {
expectedServices["e2e-test-apm-agent-service"] = true
}
// Track found services
foundServices := map[string]bool{}

if apmAgentJson != nil {
apmStats := apmAgentJson["apmStats"].(map[string]interface{})["receiver"].([]interface{})
for _, service := range apmStats {
serviceName := service.(map[string]interface{})["Service"].(string)
tracesReceived := service.(map[string]interface{})["TracesReceived"].(float64)
// Ensure we received at least one trace for the service
assert.Greater(c, tracesReceived, float64(0), "Expected traces to be received for service %s", serviceName)
// Mark the service as found
foundServices[serviceName] = true
}
}
assert.Equal(c, expectedServices, foundServices, "The found services do not match the expected services")
}

0 comments on commit f506933

Please sign in to comment.