Skip to content

Commit

Permalink
tagger: handle GPU tags (#32052)
Browse files Browse the repository at this point in the history
  • Loading branch information
gjulianm authored Jan 13, 2025
1 parent 8b6ba64 commit e8c8a77
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 2 deletions.
1 change: 1 addition & 0 deletions comp/core/tagger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Tagger entities are identified by a string-typed ID, with one of the following f
| workloadmeta.KindKubernetesMetadata | `kubernetes_metadata://<group>/<resourceType>/<namespace>/<name>` (`<namespace>` is empty in cluster-scoped objects) |
| workloadmeta.KindKubernetesPod | `kubernetes_pod_uid://<uid>` |
| workloadmeta.KindProcess | `process://<pid>` |
| workloadmeta.KindGPU | `gpu://<gpu-uuid>` |

## Tagger

Expand Down
31 changes: 30 additions & 1 deletion comp/core/tagger/collectors/workloadmeta_extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func (c *WorkloadMetaCollector) processEvents(evBundle workloadmeta.EventBundle)
case workloadmeta.KindKubernetesDeployment:
tagInfos = append(tagInfos, c.handleKubeDeployment(ev)...)
case workloadmeta.KindGPU:
// tagInfos = append(tagInfos, c.handleGPU(ev)...) No tags for now
tagInfos = append(tagInfos, c.handleGPU(ev)...)
default:
log.Errorf("cannot handle event for entity %q with kind %q", entityID.ID, entityID.Kind)
}
Expand Down Expand Up @@ -615,6 +615,35 @@ func (c *WorkloadMetaCollector) handleKubeMetadata(ev workloadmeta.Event) []*typ
return tagInfos
}

func (c *WorkloadMetaCollector) handleGPU(ev workloadmeta.Event) []*types.TagInfo {
gpu := ev.Entity.(*workloadmeta.GPU)

tagList := taglist.NewTagList()

tagList.AddLow(tags.KubeGPUVendor, gpu.Vendor)
tagList.AddLow(tags.KubeGPUDevice, gpu.Device)
tagList.AddLow(tags.KubeGPUUUID, gpu.ID)

low, orch, high, standard := tagList.Compute()

if len(low)+len(orch)+len(high)+len(standard) == 0 {
return nil
}

tagInfos := []*types.TagInfo{
{
Source: gpuSource,
EntityID: common.BuildTaggerEntityID(gpu.EntityID),
HighCardTags: high,
OrchestratorCardTags: orch,
LowCardTags: low,
StandardTags: standard,
},
}

return tagInfos
}

func (c *WorkloadMetaCollector) extractTagsFromPodLabels(pod *workloadmeta.KubernetesPod, tagList *taglist.TagList) {
for name, value := range pod.Labels {
switch name {
Expand Down
1 change: 1 addition & 0 deletions comp/core/tagger/collectors/workloadmeta_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
processSource = workloadmetaCollectorName + "-" + string(workloadmeta.KindProcess)
kubeMetadataSource = workloadmetaCollectorName + "-" + string(workloadmeta.KindKubernetesMetadata)
deploymentSource = workloadmetaCollectorName + "-" + string(workloadmeta.KindKubernetesDeployment)
gpuSource = workloadmetaCollectorName + "-" + string(workloadmeta.KindGPU)

clusterTagNamePrefix = "kube_cluster_name"
)
Expand Down
55 changes: 55 additions & 0 deletions comp/core/tagger/collectors/workloadmeta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2253,6 +2253,61 @@ func TestHandleContainerImage(t *testing.T) {
}
}

func TestHandleGPU(t *testing.T) {
entityID := workloadmeta.EntityID{
Kind: workloadmeta.KindGPU,
ID: "gpu-1234",
}

taggerEntityID := types.NewEntityID(types.GPU, entityID.ID)

tests := []struct {
name string
gpu workloadmeta.GPU
expected []*types.TagInfo
}{
{
name: "basic",
gpu: workloadmeta.GPU{
EntityID: entityID,
EntityMeta: workloadmeta.EntityMeta{
Name: entityID.ID,
},
Vendor: "nvidia",
Device: "tesla-v100",
},
expected: []*types.TagInfo{
{
Source: gpuSource,
EntityID: taggerEntityID,
HighCardTags: []string{},
OrchestratorCardTags: []string{},
LowCardTags: []string{
"gpu_vendor:nvidia",
"gpu_device:tesla-v100",
"gpu_uuid:gpu-1234",
},
StandardTags: []string{},
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := configmock.New(t)
collector := NewWorkloadMetaCollector(context.Background(), cfg, nil, nil)

actual := collector.handleGPU(workloadmeta.Event{
Type: workloadmeta.EventTypeSet,
Entity: &tt.gpu,
})

assertTagInfoListEqual(t, tt.expected, actual)
})
}
}

func TestHandleDelete(t *testing.T) {
const (
podName = "datadog-agent-foobar"
Expand Down
2 changes: 2 additions & 0 deletions comp/core/tagger/common/entity_id_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func BuildTaggerEntityID(entityID workloadmeta.EntityID) types.EntityID {
return types.NewEntityID(types.KubernetesDeployment, entityID.ID)
case workloadmeta.KindKubernetesMetadata:
return types.NewEntityID(types.KubernetesMetadata, entityID.ID)
case workloadmeta.KindGPU:
return types.NewEntityID(types.GPU, entityID.ID)
default:
log.Errorf("can't recognize entity %q with kind %q; trying %s://%s as tagger entity",
entityID.ID, entityID.Kind, entityID.ID, entityID.Kind)
Expand Down
11 changes: 10 additions & 1 deletion comp/core/tagger/tags/tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,18 @@ const (

// GPU related tags

// KubeGPUVendor the tag for the Kubernetes Resource GPU vendor
// KubeGPUVendor the tag for the Kubernetes Resource GPU vendor (e.g., NVIDIA).
KubeGPUVendor = "gpu_vendor"

// KubeGPUDevice is the tag for the Kubernetes Resource GPU device. This is
// the commercial name of the device (e.g., Tesla T4). See
// comp/core/workloadmeta/def/types.go:GPU.Device for more detail on this
// field.
KubeGPUDevice = "gpu_device"

// KubeGPUUUID is the tag for the Kubernetes Resource GPU UUID
KubeGPUUUID = "gpu_uuid"

// OpenshiftDeploymentConfig is the tag for the OpenShift deployment config name
OpenshiftDeploymentConfig = "oshift_deployment_config"

Expand Down
3 changes: 3 additions & 0 deletions comp/core/tagger/types/entity_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ const (
Process EntityIDPrefix = "process"
// InternalID is the prefix `internal`
InternalID EntityIDPrefix = "internal"
// GPU is the prefix `gpu`
GPU EntityIDPrefix = "gpu"
)

// AllPrefixesSet returns a set of all possible entity id prefixes that can be used in the tagger
Expand All @@ -85,6 +87,7 @@ func AllPrefixesSet() map[EntityIDPrefix]struct{} {
KubernetesPodUID: {},
Process: {},
InternalID: {},
GPU: {},
}
}

Expand Down
1 change: 1 addition & 0 deletions comp/core/tagger/types/filter_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func TestFilterBuilderOps(t *testing.T) {
KubernetesPodUID: {},
Process: {},
InternalID: {},
GPU: {},
},
cardinality: HighCardinality,
},
Expand Down

0 comments on commit e8c8a77

Please sign in to comment.