From 56aa52edff2f364847dd678731624576072d66fd Mon Sep 17 00:00:00 2001 From: glowkey <4634408+glowkey@users.noreply.github.com> Date: Wed, 20 Dec 2023 09:20:36 -0700 Subject: [PATCH] Rename most "Gpu" instances to "GPU" (#226) Signed-off-by: Douglas Wightman --- pkg/cmd/app.go | 6 +- pkg/dcgmexporter/gpu_collector.go | 4 +- pkg/dcgmexporter/gpu_collector_test.go | 4 +- pkg/dcgmexporter/kubernetes.go | 2 +- pkg/dcgmexporter/system_info.go | 132 ++++++++++++------------- pkg/dcgmexporter/system_info_test.go | 22 ++--- pkg/dcgmexporter/types.go | 2 +- 7 files changed, 86 insertions(+), 86 deletions(-) diff --git a/pkg/cmd/app.go b/pkg/cmd/app.go index 50079723..f88899f9 100644 --- a/pkg/cmd/app.go +++ b/pkg/cmd/app.go @@ -57,7 +57,7 @@ var ( CLISwitchDevices = "switch-devices" CLICPUDevices = "cpu-devices" CLINoHostname = "no-hostname" - CLIUseFakeGpus = "fake-gpus" + CLIUseFakeGPUs = "fake-gpus" CLIConfigMapData = "configmap-data" CLIWebSystemdSocket = "web-systemd-socket" CLIWebConfigFile = "web-config-file" @@ -163,7 +163,7 @@ func NewApp(buildVersion ...string) *cli.App { EnvVars: []string{"DCGM_EXPORTER_OTHER_DEVICES_STR"}, }, &cli.BoolFlag{ - Name: CLIUseFakeGpus, + Name: CLIUseFakeGPUs, Value: false, Usage: "Accept GPUs that are fake, for testing purposes only", EnvVars: []string{"DCGM_EXPORTER_USE_FAKE_GPUS"}, @@ -371,7 +371,7 @@ func contextToConfig(c *cli.Context) (*dcgmexporter.Config, error) { SwitchDevices: sOpt, CPUDevices: cOpt, NoHostname: c.Bool(CLINoHostname), - UseFakeGpus: c.Bool(CLIUseFakeGpus), + UseFakeGPUs: c.Bool(CLIUseFakeGPUs), ConfigMapData: c.String(CLIConfigMapData), WebSystemdSocket: c.Bool(CLIWebSystemdSocket), WebConfigFile: c.String(CLIWebConfigFile), diff --git a/pkg/dcgmexporter/gpu_collector.go b/pkg/dcgmexporter/gpu_collector.go index f0c6007f..0314158b 100644 --- a/pkg/dcgmexporter/gpu_collector.go +++ b/pkg/dcgmexporter/gpu_collector.go @@ -25,7 +25,7 @@ import ( ) func NewDCGMCollector(c []Counter, config *Config, entityType dcgm.Field_Entity_Group) (*DCGMCollector, func(), error) { - sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CPUDevices, config.UseFakeGpus, entityType) + sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CPUDevices, config.UseFakeGPUs, entityType) if err != nil { return nil, func() {}, err } @@ -207,7 +207,7 @@ func ToCPUMetric(values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, us return metrics } -func ToMetric(values []dcgm.FieldValue_v1, c []Counter, d dcgm.Device, instanceInfo *GpuInstanceInfo, useOld bool, hostname string) []Metric { +func ToMetric(values []dcgm.FieldValue_v1, c []Counter, d dcgm.Device, instanceInfo *GPUInstanceInfo, useOld bool, hostname string) []Metric { var metrics []Metric var labels = map[string]string{} diff --git a/pkg/dcgmexporter/gpu_collector_test.go b/pkg/dcgmexporter/gpu_collector_test.go index ad0eae6b..747888c7 100644 --- a/pkg/dcgmexporter/gpu_collector_test.go +++ b/pkg/dcgmexporter/gpu_collector_test.go @@ -67,7 +67,7 @@ func testDCGMGPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun GPUDevices: dOpt, NoHostname: false, UseOldNamespace: false, - UseFakeGpus: false, + UseFakeGPUs: false, } dcgmGetAllDeviceCount = func() (uint, error) { @@ -152,7 +152,7 @@ func testDCGMCPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun CPUDevices: dOpt, NoHostname: false, UseOldNamespace: false, - UseFakeGpus: false, + UseFakeGPUs: false, } dcgmGetAllDeviceCount = func() (uint, error) { diff --git a/pkg/dcgmexporter/kubernetes.go b/pkg/dcgmexporter/kubernetes.go index 71399175..b745d585 100644 --- a/pkg/dcgmexporter/kubernetes.go +++ b/pkg/dcgmexporter/kubernetes.go @@ -159,7 +159,7 @@ func ToDeviceToPod(devicePods *podresourcesapi.ListPodResourcesResponse, sysInfo if strings.HasPrefix(deviceid, MIG_UUID_PREFIX) { gpuUuid, gi, _, err := nvml.ParseMigDeviceUUID(deviceid) if err == nil { - giIdentifier := GetGpuInstanceIdentifier(sysInfo, gpuUuid, gi) + giIdentifier := GetGPUInstanceIdentifier(sysInfo, gpuUuid, gi) deviceToPodMap[giIdentifier] = podInfo } else { gpuUuid = deviceid[len(MIG_UUID_PREFIX):] diff --git a/pkg/dcgmexporter/system_info.go b/pkg/dcgmexporter/system_info.go index 95b4baa5..b9386767 100644 --- a/pkg/dcgmexporter/system_info.go +++ b/pkg/dcgmexporter/system_info.go @@ -47,16 +47,16 @@ type ComputeInstanceInfo struct { EntityId uint } -type GpuInstanceInfo struct { +type GPUInstanceInfo struct { Info dcgm.MigEntityInfo ProfileName string EntityId uint ComputeInstances []ComputeInstanceInfo } -type GpuInfo struct { +type GPUInfo struct { DeviceInfo dcgm.Device - GpuInstances []GpuInstanceInfo + GPUInstances []GPUInstanceInfo MigEnabled bool } @@ -71,8 +71,8 @@ type CPUInfo struct { } type SystemInfo struct { - GpuCount uint - Gpus [dcgm.MAX_NUM_DEVICES]GpuInfo + GPUCount uint + GPUs [dcgm.MAX_NUM_DEVICES]GPUInfo gOpt DeviceOptions sOpt DeviceOptions cOpt DeviceOptions @@ -84,15 +84,15 @@ type SystemInfo struct { type MonitoringInfo struct { Entity dcgm.GroupEntityPair DeviceInfo dcgm.Device - InstanceInfo *GpuInstanceInfo + InstanceInfo *GPUInstanceInfo ParentId uint } -func SetGpuInstanceProfileName(sysInfo *SystemInfo, entityId uint, profileName string) bool { - for i := uint(0); i < sysInfo.GpuCount; i++ { - for j := range sysInfo.Gpus[i].GpuInstances { - if sysInfo.Gpus[i].GpuInstances[j].EntityId == entityId { - sysInfo.Gpus[i].GpuInstances[j].ProfileName = profileName +func SetGPUInstanceProfileName(sysInfo *SystemInfo, entityId uint, profileName string) bool { + for i := uint(0); i < sysInfo.GPUCount; i++ { + for j := range sysInfo.GPUs[i].GPUInstances { + if sysInfo.GPUs[i].GPUInstances[j].EntityId == entityId { + sysInfo.GPUs[i].GPUInstances[j].ProfileName = profileName return true } } @@ -105,7 +105,7 @@ func SetMigProfileNames(sysInfo *SystemInfo, values []dcgm.FieldValue_v2) error notFound := false err := fmt.Errorf("Cannot find match for entities:") for _, v := range values { - found := SetGpuInstanceProfileName(sysInfo, v.EntityId, dcgm.Fv2_String(v)) + found := SetGPUInstanceProfileName(sysInfo, v.EntityId, dcgm.Fv2_String(v)) if found == false { err = fmt.Errorf("%s group %d, id %d", err, v.EntityGroupId, v.EntityId) notFound = true @@ -137,9 +137,9 @@ func PopulateMigProfileNames(sysInfo *SystemInfo, entities []dcgm.GroupEntityPai return SetMigProfileNames(sysInfo, values) } -func GpuIdExists(sysInfo *SystemInfo, gpuId int) bool { - for i := uint(0); i < sysInfo.GpuCount; i++ { - if sysInfo.Gpus[i].DeviceInfo.GPU == uint(gpuId) { +func GPUIdExists(sysInfo *SystemInfo, gpuId int) bool { + for i := uint(0); i < sysInfo.GPUCount; i++ { + if sysInfo.GPUs[i].DeviceInfo.GPU == uint(gpuId) { return true } } @@ -164,9 +164,9 @@ func CPUIdExists(sysInfo *SystemInfo, cpuId int) bool { return false } -func GpuInstanceIdExists(sysInfo *SystemInfo, gpuInstanceId int) bool { - for i := uint(0); i < sysInfo.GpuCount; i++ { - for _, instance := range sysInfo.Gpus[i].GpuInstances { +func GPUInstanceIdExists(sysInfo *SystemInfo, gpuInstanceId int) bool { + for i := uint(0); i < sysInfo.GPUCount; i++ { + for _, instance := range sysInfo.GPUs[i].GPUInstances { if instance.EntityId == uint(gpuInstanceId) { return true } @@ -255,7 +255,7 @@ func VerifyDevicePresence(sysInfo *SystemInfo, gOpt DeviceOptions) error { if len(gOpt.MajorRange) > 0 && gOpt.MajorRange[0] != -1 { // Verify we can find all the specified GPUs for _, gpuId := range gOpt.MajorRange { - if GpuIdExists(sysInfo, gpuId) == false { + if GPUIdExists(sysInfo, gpuId) == false { return fmt.Errorf("Couldn't find requested GPU id %d", gpuId) } } @@ -263,7 +263,7 @@ func VerifyDevicePresence(sysInfo *SystemInfo, gOpt DeviceOptions) error { if len(gOpt.MinorRange) > 0 && gOpt.MinorRange[0] != -1 { for _, gpuInstanceId := range gOpt.MinorRange { - if GpuInstanceIdExists(sysInfo, gpuInstanceId) == false { + if GPUInstanceIdExists(sysInfo, gpuInstanceId) == false { return fmt.Errorf("Couldn't find requested GPU instance id %d", gpuInstanceId) } } @@ -359,21 +359,21 @@ func InitializeNvSwitchInfo(sysInfo SystemInfo, sOpt DeviceOptions) (SystemInfo, return sysInfo, nil } -func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool) (SystemInfo, error) { +func InitializeGPUInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGPUs bool) (SystemInfo, error) { gpuCount, err := dcgmGetAllDeviceCount() if err != nil { return sysInfo, err } - sysInfo.GpuCount = gpuCount + sysInfo.GPUCount = gpuCount - for i := uint(0); i < sysInfo.GpuCount; i++ { + for i := uint(0); i < sysInfo.GPUCount; i++ { // Default mig enabled to false - sysInfo.Gpus[i].MigEnabled = false - sysInfo.Gpus[i].DeviceInfo, err = dcgmGetDeviceInfo(i) + sysInfo.GPUs[i].MigEnabled = false + sysInfo.GPUs[i].DeviceInfo, err = dcgmGetDeviceInfo(i) if err != nil { - if useFakeGpus { - sysInfo.Gpus[i].DeviceInfo.GPU = i - sysInfo.Gpus[i].DeviceInfo.UUID = fmt.Sprintf("fake%d", i) + if useFakeGPUs { + sysInfo.GPUs[i].DeviceInfo.GPU = i + sysInfo.GPUs[i].DeviceInfo.UUID = fmt.Sprintf("fake%d", i) } else { return sysInfo, err } @@ -395,20 +395,20 @@ func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool) // We are adding a GPU instance gpuId = hierarchy.EntityList[i].Parent.EntityId entityId := hierarchy.EntityList[i].Entity.EntityId - instanceInfo := GpuInstanceInfo{ + instanceInfo := GPUInstanceInfo{ Info: hierarchy.EntityList[i].Info, ProfileName: "", EntityId: entityId, } - sysInfo.Gpus[gpuId].MigEnabled = true - sysInfo.Gpus[gpuId].GpuInstances = append(sysInfo.Gpus[gpuId].GpuInstances, instanceInfo) + sysInfo.GPUs[gpuId].MigEnabled = true + sysInfo.GPUs[gpuId].GPUInstances = append(sysInfo.GPUs[gpuId].GPUInstances, instanceInfo) entities = append(entities, dcgm.GroupEntityPair{dcgm.FE_GPU_I, entityId}) - instanceIndex = len(sysInfo.Gpus[gpuId].GpuInstances) - 1 + instanceIndex = len(sysInfo.GPUs[gpuId].GPUInstances) - 1 } else if hierarchy.EntityList[i].Parent.EntityGroupId == dcgm.FE_GPU_I { // Add the compute instance, gpuId is recorded previously entityId := hierarchy.EntityList[i].Entity.EntityId ciInfo := ComputeInstanceInfo{hierarchy.EntityList[i].Info, "", entityId} - sysInfo.Gpus[gpuId].GpuInstances[instanceIndex].ComputeInstances = append(sysInfo.Gpus[gpuId].GpuInstances[instanceIndex].ComputeInstances, ciInfo) + sysInfo.GPUs[gpuId].GPUInstances[instanceIndex].ComputeInstances = append(sysInfo.GPUs[gpuId].GPUInstances[instanceIndex].ComputeInstances, ciInfo) } } @@ -424,7 +424,7 @@ func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool) return sysInfo, err } -func InitializeSystemInfo(gOpt DeviceOptions, sOpt DeviceOptions, cOpt DeviceOptions, useFakeGpus bool, entityType dcgm.Field_Entity_Group) (SystemInfo, error) { +func InitializeSystemInfo(gOpt DeviceOptions, sOpt DeviceOptions, cOpt DeviceOptions, useFakeGPUs bool, entityType dcgm.Field_Entity_Group) (SystemInfo, error) { sysInfo := SystemInfo{} logrus.Info("Initializing system entities of type: ", entityType) @@ -437,7 +437,7 @@ func InitializeSystemInfo(gOpt DeviceOptions, sOpt DeviceOptions, cOpt DeviceOpt return InitializeNvSwitchInfo(sysInfo, sOpt) case dcgm.FE_GPU: sysInfo.InfoType = dcgm.FE_GPU - return InitializeGpuInfo(sysInfo, gOpt, useFakeGpus) + return InitializeGPUInfo(sysInfo, gOpt, useFakeGPUs) case dcgm.FE_CPU: sysInfo.InfoType = dcgm.FE_CPU return InitializeCPUInfo(sysInfo, cOpt) @@ -541,13 +541,13 @@ func CreateGroupFromSystemInfo(sysInfo SystemInfo) (dcgm.GroupHandle, func(), er return groupId, func() { dcgm.DestroyGroup(groupId) }, nil } -func AddAllGpus(sysInfo SystemInfo) []MonitoringInfo { +func AddAllGPUs(sysInfo SystemInfo) []MonitoringInfo { var monitoring []MonitoringInfo - for i := uint(0); i < sysInfo.GpuCount; i++ { + for i := uint(0); i < sysInfo.GPUCount; i++ { mi := MonitoringInfo{ - dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.Gpus[i].DeviceInfo.GPU}, - sysInfo.Gpus[i].DeviceInfo, + dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.GPUs[i].DeviceInfo.GPU}, + sysInfo.GPUs[i].DeviceInfo, nil, PARENT_ID_IGNORED, } @@ -759,24 +759,24 @@ func AddAllCPUCores(sysInfo SystemInfo) []MonitoringInfo { return monitoring } -func AddAllGpuInstances(sysInfo SystemInfo, addFlexibly bool) []MonitoringInfo { +func AddAllGPUInstances(sysInfo SystemInfo, addFlexibly bool) []MonitoringInfo { var monitoring []MonitoringInfo - for i := uint(0); i < sysInfo.GpuCount; i++ { - if addFlexibly == true && len(sysInfo.Gpus[i].GpuInstances) == 0 { + for i := uint(0); i < sysInfo.GPUCount; i++ { + if addFlexibly == true && len(sysInfo.GPUs[i].GPUInstances) == 0 { mi := MonitoringInfo{ - dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.Gpus[i].DeviceInfo.GPU}, - sysInfo.Gpus[i].DeviceInfo, + dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.GPUs[i].DeviceInfo.GPU}, + sysInfo.GPUs[i].DeviceInfo, nil, PARENT_ID_IGNORED, } monitoring = append(monitoring, mi) } else { - for j := 0; j < len(sysInfo.Gpus[i].GpuInstances); j++ { + for j := 0; j < len(sysInfo.GPUs[i].GPUInstances); j++ { mi := MonitoringInfo{ - dcgm.GroupEntityPair{dcgm.FE_GPU_I, sysInfo.Gpus[i].GpuInstances[j].EntityId}, - sysInfo.Gpus[i].DeviceInfo, - &sysInfo.Gpus[i].GpuInstances[j], + dcgm.GroupEntityPair{dcgm.FE_GPU_I, sysInfo.GPUs[i].GPUInstances[j].EntityId}, + sysInfo.GPUs[i].DeviceInfo, + &sysInfo.GPUs[i].GPUInstances[j], PARENT_ID_IGNORED, } monitoring = append(monitoring, mi) @@ -787,12 +787,12 @@ func AddAllGpuInstances(sysInfo SystemInfo, addFlexibly bool) []MonitoringInfo { return monitoring } -func GetMonitoringInfoForGpu(sysInfo SystemInfo, gpuId int) *MonitoringInfo { - for i := uint(0); i < sysInfo.GpuCount; i++ { - if sysInfo.Gpus[i].DeviceInfo.GPU == uint(gpuId) { +func GetMonitoringInfoForGPU(sysInfo SystemInfo, gpuId int) *MonitoringInfo { + for i := uint(0); i < sysInfo.GPUCount; i++ { + if sysInfo.GPUs[i].DeviceInfo.GPU == uint(gpuId) { return &MonitoringInfo{ - dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.Gpus[i].DeviceInfo.GPU}, - sysInfo.Gpus[i].DeviceInfo, + dcgm.GroupEntityPair{dcgm.FE_GPU, sysInfo.GPUs[i].DeviceInfo.GPU}, + sysInfo.GPUs[i].DeviceInfo, nil, PARENT_ID_IGNORED, } @@ -802,13 +802,13 @@ func GetMonitoringInfoForGpu(sysInfo SystemInfo, gpuId int) *MonitoringInfo { return nil } -func GetMonitoringInfoForGpuInstance(sysInfo SystemInfo, gpuInstanceId int) *MonitoringInfo { - for i := uint(0); i < sysInfo.GpuCount; i++ { - for _, instance := range sysInfo.Gpus[i].GpuInstances { +func GetMonitoringInfoForGPUInstance(sysInfo SystemInfo, gpuInstanceId int) *MonitoringInfo { + for i := uint(0); i < sysInfo.GPUCount; i++ { + for _, instance := range sysInfo.GPUs[i].GPUInstances { if instance.EntityId == uint(gpuInstanceId) { return &MonitoringInfo{ dcgm.GroupEntityPair{dcgm.FE_GPU_I, uint(gpuInstanceId)}, - sysInfo.Gpus[i].DeviceInfo, + sysInfo.GPUs[i].DeviceInfo, &instance, PARENT_ID_IGNORED, } @@ -831,23 +831,23 @@ func GetMonitoredEntities(sysInfo SystemInfo) []MonitoringInfo { } else if sysInfo.InfoType == dcgm.FE_CPU_CORE { monitoring = AddAllCPUCores(sysInfo) } else if sysInfo.gOpt.Flex == true { - monitoring = AddAllGpuInstances(sysInfo, true) + monitoring = AddAllGPUInstances(sysInfo, true) } else { if len(sysInfo.gOpt.MajorRange) > 0 && sysInfo.gOpt.MajorRange[0] == -1 { - monitoring = AddAllGpus(sysInfo) + monitoring = AddAllGPUs(sysInfo) } else { for _, gpuId := range sysInfo.gOpt.MajorRange { // We've already verified that everything in the options list exists - monitoring = append(monitoring, *GetMonitoringInfoForGpu(sysInfo, gpuId)) + monitoring = append(monitoring, *GetMonitoringInfoForGPU(sysInfo, gpuId)) } } if len(sysInfo.gOpt.MinorRange) > 0 && sysInfo.gOpt.MinorRange[0] == -1 { - monitoring = AddAllGpuInstances(sysInfo, false) + monitoring = AddAllGPUInstances(sysInfo, false) } else { for _, gpuInstanceId := range sysInfo.gOpt.MinorRange { // We've already verified that everything in the options list exists - monitoring = append(monitoring, *GetMonitoringInfoForGpuInstance(sysInfo, gpuInstanceId)) + monitoring = append(monitoring, *GetMonitoringInfoForGPUInstance(sysInfo, gpuInstanceId)) } } } @@ -855,10 +855,10 @@ func GetMonitoredEntities(sysInfo SystemInfo) []MonitoringInfo { return monitoring } -func GetGpuInstanceIdentifier(sysInfo SystemInfo, gpuuuid string, gpuInstanceId uint) string { - for i := uint(0); i < sysInfo.GpuCount; i++ { - if sysInfo.Gpus[i].DeviceInfo.UUID == gpuuuid { - identifier := fmt.Sprintf("%d-%d", sysInfo.Gpus[i].DeviceInfo.GPU, gpuInstanceId) +func GetGPUInstanceIdentifier(sysInfo SystemInfo, gpuuuid string, gpuInstanceId uint) string { + for i := uint(0); i < sysInfo.GPUCount; i++ { + if sysInfo.GPUs[i].DeviceInfo.UUID == gpuuuid { + identifier := fmt.Sprintf("%d-%d", sysInfo.GPUs[i].DeviceInfo.GPU, gpuInstanceId) return identifier } } diff --git a/pkg/dcgmexporter/system_info_test.go b/pkg/dcgmexporter/system_info_test.go index 5a7fd886..a7f024a8 100644 --- a/pkg/dcgmexporter/system_info_test.go +++ b/pkg/dcgmexporter/system_info_test.go @@ -78,21 +78,21 @@ func SpoofSwitchSystemInfo() SystemInfo { func SpoofSystemInfo() SystemInfo { var sysInfo SystemInfo - sysInfo.GpuCount = 2 - sysInfo.Gpus[0].DeviceInfo.GPU = 0 - gi := GpuInstanceInfo{ + sysInfo.GPUCount = 2 + sysInfo.GPUs[0].DeviceInfo.GPU = 0 + gi := GPUInstanceInfo{ Info: dcgm.MigEntityInfo{"fake", 0, 0, 0, 0, 3}, ProfileName: fakeProfileName, EntityId: 0, } - sysInfo.Gpus[0].GpuInstances = append(sysInfo.Gpus[0].GpuInstances, gi) - gi2 := GpuInstanceInfo{ + sysInfo.GPUs[0].GPUInstances = append(sysInfo.GPUs[0].GPUInstances, gi) + gi2 := GPUInstanceInfo{ Info: dcgm.MigEntityInfo{"fake", 0, 1, 0, 0, 3}, ProfileName: fakeProfileName, EntityId: 14, } - sysInfo.Gpus[1].GpuInstances = append(sysInfo.Gpus[1].GpuInstances, gi2) - sysInfo.Gpus[1].DeviceInfo.GPU = 1 + sysInfo.GPUs[1].GPUInstances = append(sysInfo.GPUs[1].GPUInstances, gi2) + sysInfo.GPUs[1].DeviceInfo.GPU = 1 return sysInfo } @@ -116,20 +116,20 @@ func TestMonitoredEntities(t *testing.T) { } } else { gpuCount = gpuCount + 1 - require.Equal(t, mi.InstanceInfo, (*GpuInstanceInfo)(nil), "Expected InstanceInfo to be nil but it wasn't") + require.Equal(t, mi.InstanceInfo, (*GPUInstanceInfo)(nil), "Expected InstanceInfo to be nil but it wasn't") } } require.Equal(t, instanceCount, 2, "Expected 2 GPU instances but found %d", instanceCount) require.Equal(t, gpuCount, 0, "Expected 0 GPUs but found %d", gpuCount) - sysInfo.Gpus[0].GpuInstances = sysInfo.Gpus[0].GpuInstances[:0] - sysInfo.Gpus[1].GpuInstances = sysInfo.Gpus[1].GpuInstances[:0] + sysInfo.GPUs[0].GPUInstances = sysInfo.GPUs[0].GPUInstances[:0] + sysInfo.GPUs[1].GPUInstances = sysInfo.GPUs[1].GPUInstances[:0] monitoring = GetMonitoredEntities(sysInfo) require.Equal(t, 2, len(monitoring), fmt.Sprintf("Should have 2 monitored entities but found %d", len(monitoring))) for i, mi := range monitoring { require.Equal(t, mi.Entity.EntityGroupId, dcgm.FE_GPU, "Expected FE_GPU but found %d", mi.Entity.EntityGroupId) require.Equal(t, uint(i), mi.DeviceInfo.GPU, "Expected GPU %d but found %d", i, mi.DeviceInfo.GPU) - require.Equal(t, (*GpuInstanceInfo)(nil), mi.InstanceInfo, "Expected InstanceInfo not to be populated but it was") + require.Equal(t, (*GPUInstanceInfo)(nil), mi.InstanceInfo, "Expected InstanceInfo not to be populated but it was") } } diff --git a/pkg/dcgmexporter/types.go b/pkg/dcgmexporter/types.go index 46375518..799d70af 100644 --- a/pkg/dcgmexporter/types.go +++ b/pkg/dcgmexporter/types.go @@ -73,7 +73,7 @@ type Config struct { SwitchDevices DeviceOptions CPUDevices DeviceOptions NoHostname bool - UseFakeGpus bool + UseFakeGPUs bool ConfigMapData string MetricGroups []dcgm.MetricGroup WebSystemdSocket bool