diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index 07e424761..a263489a4 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -148,6 +148,9 @@ type OperatorSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="On OpenShift, enable DriverToolkit image to build and install driver modules" // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" UseOpenShiftDriverToolkit *bool `json:"use_ocp_driver_toolkit,omitempty"` + + // UseDevicePluginCDIDevicesFeature indicates if the device plug-in should be configured to use the CDI devices feature + UseDevicePluginCDIDevicesFeature *bool `json:"useDevicePluginCDIDevicesFeature,omitempty"` } // HostPathsSpec defines various paths on the host needed by GPU Operator components @@ -1827,6 +1830,15 @@ func ImagePullPolicy(pullPolicy string) corev1.PullPolicy { return imagePullPolicy } +// DevicePluginCDIDevicesFeatureEnabled returns true if use DevicePluginCDIDevices feature is enabled +func (s *OperatorSpec) DevicePluginCDIDevicesFeatureEnabled() bool { + if s.UseDevicePluginCDIDevicesFeature == nil { + // default is false if not specified by user + return false + } + return *s.UseDevicePluginCDIDevicesFeature +} + // IsEnabled returns true if driver install is enabled(default) through gpu-operator func (d *DriverSpec) IsEnabled() bool { if d.Enabled == nil { diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index 6d876f675..749822e59 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -1131,6 +1131,11 @@ func (in *OperatorSpec) DeepCopyInto(out *OperatorSpec) { *out = new(bool) **out = **in } + if in.UseDevicePluginCDIDevicesFeature != nil { + in, out := &in.UseDevicePluginCDIDevicesFeature, &out.UseDevicePluginCDIDevicesFeature + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OperatorSpec. diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index 8ee8e9a8a..a4f7c3c4a 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -1558,6 +1558,10 @@ spec: image should be used on OpenShift to build and install driver modules type: boolean + useDevicePluginCDIDevicesFeature: + description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in + should be configured to use the CDI devices feature + type: boolean required: - defaultRuntime type: object diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index 8ee8e9a8a..f750ad490 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -1558,6 +1558,10 @@ spec: image should be used on OpenShift to build and install driver modules type: boolean + useDevicePluginCDIDevicesFeature: + description: UseDevicePluginCDIDevicesFeature indicates if the + device plug-in should be configured to use the CDI devices feature + type: boolean required: - defaultRuntime type: object diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 2b4a92528..9702c16b2 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1398,7 +1398,11 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe // update env required for CDI support if config.CDI.IsEnabled() { setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true") - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations") + if config.Operator.DevicePluginCDIDevicesFeatureEnabled() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "cdi-cri") + } else { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations") + } setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/") if config.Toolkit.IsEnabled() { setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook")) diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index 8ee8e9a8a..a4f7c3c4a 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -1558,6 +1558,10 @@ spec: image should be used on OpenShift to build and install driver modules type: boolean + useDevicePluginCDIDevicesFeature: + description: UseDevicePluginCDIDevicesFeature indicates if the device plug-in + should be configured to use the CDI devices feature + type: boolean required: - defaultRuntime type: object diff --git a/deployments/gpu-operator/templates/clusterpolicy.yaml b/deployments/gpu-operator/templates/clusterpolicy.yaml index af9e87c38..bf893d4a1 100644 --- a/deployments/gpu-operator/templates/clusterpolicy.yaml +++ b/deployments/gpu-operator/templates/clusterpolicy.yaml @@ -46,6 +46,9 @@ spec: {{- if .Values.operator.use_ocp_driver_toolkit }} use_ocp_driver_toolkit: {{ .Values.operator.use_ocp_driver_toolkit }} {{- end }} + {{- if .Values.operator.useDevicePluginCDIDevicesFeature }} + useDevicePluginCDIDevicesFeature: {{ .Values.operator.useDevicePluginCDIDevicesFeature }} + {{- end }} daemonsets: labels: {{- include "gpu-operator.operand-labels" . | nindent 6 }} diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index cad9bedd8..a1ae94e98 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -80,6 +80,8 @@ operator: # upgrade CRD on chart upgrade, requires --disable-openapi-validation flag # to be passed during helm upgrade. upgradeCRD: true + # use DevicePluginCDIDevices feature + useDevicePluginCDIDevicesFeature: false initContainer: image: cuda repository: nvcr.io/nvidia