From 234abe6823a4e41daa7969c54a517012e9b5afc3 Mon Sep 17 00:00:00 2001 From: Erez Freiberger Date: Wed, 14 Aug 2024 18:00:40 +0300 Subject: [PATCH] implement config map controller instead of node controller --- .../kwok-gpu-device-plugin/role.yaml | 13 ++ .../kwok-gpu-device-plugin/rolebinding.yaml | 12 ++ internal/kwok-gpu-device-plugin/app.go | 10 +- .../controllers/configmap/controller.go | 102 ++++++++++++++ .../controllers/node/controller.go | 86 ------------ .../handlers/configmap/handler.go | 59 +++++++++ .../handlers/node/handler.go | 76 ----------- .../controllers/node/controller.go | 6 +- .../handlers/node/fake_node_deployments.go | 125 ++++++++++++++++++ .../status-updater/handlers/node/handler.go | 12 +- .../handlers/node/topology_cm.go | 12 +- 11 files changed, 339 insertions(+), 174 deletions(-) create mode 100644 deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/role.yaml create mode 100644 deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/rolebinding.yaml create mode 100644 internal/kwok-gpu-device-plugin/controllers/configmap/controller.go delete mode 100644 internal/kwok-gpu-device-plugin/controllers/node/controller.go create mode 100644 internal/kwok-gpu-device-plugin/handlers/configmap/handler.go delete mode 100644 internal/kwok-gpu-device-plugin/handlers/node/handler.go create mode 100644 internal/status-updater/handlers/node/fake_node_deployments.go diff --git a/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/role.yaml b/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/role.yaml new file mode 100644 index 0000000..3ac8803 --- /dev/null +++ b/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: plugin +rules: + - apiGroups: + - "" + resources: + - configmaps + verbs: + - list + - get + - watch diff --git a/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/rolebinding.yaml b/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/rolebinding.yaml new file mode 100644 index 0000000..2d04e08 --- /dev/null +++ b/deploy/fake-gpu-operator/templates/kwok-gpu-device-plugin/rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: fake-kwok-gpu-device-plugin +roleRef: + kind: Role + apiGroup: rbac.authorization.k8s.io + name: fake-kwok-gpu-device-plugin +subjects: + - kind: ServiceAccount + name: kwok-gpu-device-plugin + namespace: "{{ .Release.Namespace }}" diff --git a/internal/kwok-gpu-device-plugin/app.go b/internal/kwok-gpu-device-plugin/app.go index e6af3c1..86d221b 100644 --- a/internal/kwok-gpu-device-plugin/app.go +++ b/internal/kwok-gpu-device-plugin/app.go @@ -7,9 +7,11 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + "github.com/spf13/viper" ctrl "sigs.k8s.io/controller-runtime" - nodecontroller "github.com/run-ai/fake-gpu-operator/internal/kwok-gpu-device-plugin/controllers/node" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" + cmcontroller "github.com/run-ai/fake-gpu-operator/internal/kwok-gpu-device-plugin/controllers/configmap" "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers" ) @@ -57,7 +59,11 @@ func (app *StatusUpdaterApp) Init(stopCh chan struct{}) { app.kubeClient = KubeClientFn(clusterConfig) - app.Controllers = append(app.Controllers, nodecontroller.NewNodeController(app.kubeClient, app.wg)) + app.Controllers = append( + app.Controllers, cmcontroller.NewConfigMapController( + app.kubeClient, app.wg, viper.GetString(constants.EnvTopologyCmNamespace), + ), + ) } func (app *StatusUpdaterApp) Name() string { diff --git a/internal/kwok-gpu-device-plugin/controllers/configmap/controller.go b/internal/kwok-gpu-device-plugin/controllers/configmap/controller.go new file mode 100644 index 0000000..3fea2d8 --- /dev/null +++ b/internal/kwok-gpu-device-plugin/controllers/configmap/controller.go @@ -0,0 +1,102 @@ +package configmamp + +import ( + "log" + + "github.com/run-ai/fake-gpu-operator/internal/common/constants" + "github.com/run-ai/fake-gpu-operator/internal/common/topology" + "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers" + "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers/util" + + cmhandler "github.com/run-ai/fake-gpu-operator/internal/kwok-gpu-device-plugin/handlers/configmap" + + v1 "k8s.io/api/core/v1" + listersv1 "k8s.io/client-go/listers/core/v1" + + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" +) + +type ConfigMapController struct { + kubeClient kubernetes.Interface + cmInformer cache.SharedIndexInformer + nodeLister listersv1.NodeLister + informerFactory informers.SharedInformerFactory + handler cmhandler.Interface + + clusterTopology *topology.ClusterTopology +} + +var _ controllers.Interface = &ConfigMapController{} + +func NewConfigMapController( + kubeClient kubernetes.Interface, namespace string, +) *ConfigMapController { + clusterTopology, err := topology.GetClusterTopologyFromCM(kubeClient) + if err != nil { + log.Fatalf("Failed to get cluster topology: %v", err) + } + + informerFactory := informers.NewSharedInformerFactoryWithOptions( + kubeClient, 0, informers.WithNamespace(namespace)) + c := &ConfigMapController{ + kubeClient: kubeClient, + cmInformer: informerFactory.Core().V1().ConfigMaps().Informer(), + nodeLister: informerFactory.Core().V1().Nodes().Lister(), + handler: cmhandler.NewConfigMapHandler(kubeClient, clusterTopology), + clusterTopology: clusterTopology, + } + + _, err = c.cmInformer.AddEventHandler(cache.FilteringResourceEventHandler{ + FilterFunc: func(obj interface{}) bool { + switch cm := obj.(type) { + case *v1.ConfigMap: + return c.isFakeGpuKWOKNodeConfigMap(cm) + default: + return false + } + }, + Handler: cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + go func() { + c.callConfigMapHandler(obj.(*v1.ConfigMap)) + }() + }, + }, + }) + if err != nil { + log.Fatalf("Failed to add config map event handler: %v", err) + } + + return c +} + +func (c *ConfigMapController) Run(stopCh <-chan struct{}) { + log.Println("Starting config map controller") + c.informerFactory.Start(stopCh) +} + +func (c *ConfigMapController) isFakeGpuKWOKNodeConfigMap(cm *v1.ConfigMap) bool { + if cm == nil || cm.Labels == nil { + return false + } + nodeName := cm.Labels[constants.LabelTopologyCMNodeName] + + node, err := c.nodeLister.Get(nodeName) + if err != nil { + return false + } + + _, isNodeAssignedToNodePool := node.Labels[c.clusterTopology.NodePoolLabelKey] + return isNodeAssignedToNodePool && node.Annotations[constants.AnnotationKwokNode] == "fake" +} + +func (c *ConfigMapController) callConfigMapHandler(cm *v1.ConfigMap) { + nodeName := cm.Labels[constants.LabelTopologyCMNodeName] + node, err := c.nodeLister.Get(nodeName) + if err != nil { + + } + util.LogErrorIfExist(c.handler.HandleAdd(cm, node), "Failed to handle cm addition") +} diff --git a/internal/kwok-gpu-device-plugin/controllers/node/controller.go b/internal/kwok-gpu-device-plugin/controllers/node/controller.go deleted file mode 100644 index da87638..0000000 --- a/internal/kwok-gpu-device-plugin/controllers/node/controller.go +++ /dev/null @@ -1,86 +0,0 @@ -package node - -import ( - "log" - "sync" - - "github.com/run-ai/fake-gpu-operator/internal/common/constants" - "github.com/run-ai/fake-gpu-operator/internal/common/topology" - "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers" - "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers/util" - - nodehandler "github.com/run-ai/fake-gpu-operator/internal/kwok-gpu-device-plugin/handlers/node" - - v1 "k8s.io/api/core/v1" - - "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" -) - -type NodeController struct { - kubeClient kubernetes.Interface - informer cache.SharedIndexInformer - handler nodehandler.Interface - - clusterTopology *topology.ClusterTopology -} - -var _ controllers.Interface = &NodeController{} - -func NewNodeController(kubeClient kubernetes.Interface, wg *sync.WaitGroup) *NodeController { - clusterTopology, err := topology.GetClusterTopologyFromCM(kubeClient) - if err != nil { - log.Fatalf("Failed to get cluster topology: %v", err) - } - - c := &NodeController{ - kubeClient: kubeClient, - informer: informers.NewSharedInformerFactory(kubeClient, 0).Core().V1().Nodes().Informer(), - handler: nodehandler.NewNodeHandler(kubeClient, clusterTopology), - clusterTopology: clusterTopology, - } - - _, err = c.informer.AddEventHandler(cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch node := obj.(type) { - case *v1.Node: - return c.isFakeGpuKWOKNode(node) - default: - return false - } - }, - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - go func() { - node := obj.(*v1.Node) - util.LogErrorIfExist(c.handler.HandleAdd(node), "Failed to handle node addition") - }() - }, - DeleteFunc: func(obj interface{}) { - go func() { - node := obj.(*v1.Node) - util.LogErrorIfExist(c.handler.HandleDelete(node), "Failed to handle node deletion") - }() - }, - }, - }) - if err != nil { - log.Fatalf("Failed to add node event handler: %v", err) - } - - return c -} - -func (c *NodeController) Run(stopCh <-chan struct{}) { - log.Println("Starting node controller") - c.informer.Run(stopCh) -} - -func (c *NodeController) isFakeGpuKWOKNode(node *v1.Node) bool { - if node == nil || node.Labels == nil { - return false - } - _, isNodeAssignedToNodePool := node.Labels[c.clusterTopology.NodePoolLabelKey] - return isNodeAssignedToNodePool && node.Annotations[constants.AnnotationKwokNode] == "fake" -} diff --git a/internal/kwok-gpu-device-plugin/handlers/configmap/handler.go b/internal/kwok-gpu-device-plugin/handlers/configmap/handler.go new file mode 100644 index 0000000..5cc5332 --- /dev/null +++ b/internal/kwok-gpu-device-plugin/handlers/configmap/handler.go @@ -0,0 +1,59 @@ +package configmap + +import ( + "context" + "fmt" + "log" + + "github.com/run-ai/fake-gpu-operator/internal/common/constants" + "github.com/run-ai/fake-gpu-operator/internal/common/topology" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" +) + +type Interface interface { + HandleAdd(cm *v1.ConfigMap, node *v1.Node) error +} + +type ConfigMapHandler struct { + kubeClient kubernetes.Interface + + clusterTopology *topology.ClusterTopology +} + +var _ Interface = &ConfigMapHandler{} + +func NewConfigMapHandler(kubeClient kubernetes.Interface, clusterTopology *topology.ClusterTopology) *ConfigMapHandler { + return &ConfigMapHandler{ + kubeClient: kubeClient, + clusterTopology: clusterTopology, + } +} + +func (p *ConfigMapHandler) HandleAdd(cm *v1.ConfigMap, node *v1.Node) error { + log.Printf("Handling node addition: %s\n", cm.Name) + + nodeTopology, err := topology.FromNodeTopologyCM(cm) + if err != nil { + return fmt.Errorf("failed to create node topology ConfigMap: %w", err) + } + + return p.applyFakeDevicePlugin(len(nodeTopology.Gpus), node) +} + +func (p *ConfigMapHandler) applyFakeDevicePlugin(gpuCount int, node *v1.Node) error { + patch := fmt.Sprintf( + `{"status": {"capacity": {"%s": "%d"}, "allocatable": {"%s": "%d"}}}`, + constants.GpuResourceName, gpuCount, constants.GpuResourceName, gpuCount, + ) + _, err := p.kubeClient.CoreV1().Nodes().Patch( + context.TODO(), node.Name, types.MergePatchType, []byte(patch), metav1.PatchOptions{}, "status", + ) + if err != nil { + return fmt.Errorf("failed to update node capacity and allocatable: %v", err) + } + + return nil +} diff --git a/internal/kwok-gpu-device-plugin/handlers/node/handler.go b/internal/kwok-gpu-device-plugin/handlers/node/handler.go deleted file mode 100644 index e70685b..0000000 --- a/internal/kwok-gpu-device-plugin/handlers/node/handler.go +++ /dev/null @@ -1,76 +0,0 @@ -package node - -import ( - "context" - "fmt" - "log" - - "github.com/run-ai/fake-gpu-operator/internal/common/constants" - "github.com/run-ai/fake-gpu-operator/internal/common/topology" - normalnodehandler "github.com/run-ai/fake-gpu-operator/internal/status-updater/handlers/node" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes" -) - -type Interface interface { - HandleAdd(node *v1.Node) error - HandleDelete(node *v1.Node) error -} - -type NodeHandler struct { - kubeClient kubernetes.Interface - - clusterTopology *topology.ClusterTopology - normalStatusUpdaterHandler *normalnodehandler.NodeHandler -} - -var _ Interface = &NodeHandler{} - -func NewNodeHandler(kubeClient kubernetes.Interface, clusterTopology *topology.ClusterTopology) *NodeHandler { - return &NodeHandler{ - kubeClient: kubeClient, - clusterTopology: clusterTopology, - - normalStatusUpdaterHandler: normalnodehandler.NewNodeHandler(kubeClient, clusterTopology), - } -} - -func (p *NodeHandler) HandleAdd(node *v1.Node) error { - log.Printf("Handling node addition: %s\n", node.Name) - - nodeTopology, err := p.normalStatusUpdaterHandler.CreateNodeTopologyCM(node) - if err != nil { - return fmt.Errorf("failed to create node topology ConfigMap: %w", err) - } - - return p.applyFakeDevicePlugin(len(nodeTopology.Gpus), node) -} - -func (p *NodeHandler) HandleDelete(node *v1.Node) error { - log.Printf("Handling node deletion: %s\n", node.Name) - - err := topology.DeleteNodeTopologyCM(p.kubeClient, node.Name) - if err != nil && !errors.IsNotFound(err) { - return fmt.Errorf("failed to delete node topology: %w", err) - } - - return nil -} - -func (p *NodeHandler) applyFakeDevicePlugin(gpuCount int, node *v1.Node) error { - patch := fmt.Sprintf( - `{"status": {"capacity": {"%s": "%d"}, "allocatable": {"%s": "%d"}}}`, - constants.GpuResourceName, gpuCount, constants.GpuResourceName, gpuCount, - ) - _, err := p.kubeClient.CoreV1().Nodes().Patch( - context.TODO(), node.Name, types.MergePatchType, []byte(patch), metav1.PatchOptions{}, "status", - ) - if err != nil { - return fmt.Errorf("failed to update node capacity and allocatable: %v", err) - } - - return nil -} diff --git a/internal/status-updater/controllers/node/controller.go b/internal/status-updater/controllers/node/controller.go index 824a040..fb01470 100644 --- a/internal/status-updater/controllers/node/controller.go +++ b/internal/status-updater/controllers/node/controller.go @@ -53,7 +53,7 @@ func NewNodeController(kubeClient kubernetes.Interface, wg *sync.WaitGroup) *Nod FilterFunc: func(obj interface{}) bool { switch node := obj.(type) { case *v1.Node: - return c.isFakeGpuNodeNotKWOK(node) + return c.isFakeGpuNode(node) default: return false } @@ -164,9 +164,9 @@ func (c *NodeController) pruneTopologyConfigMap(cm *v1.ConfigMap, isValidNodeTop return nil } -func (c *NodeController) isFakeGpuNodeNotKWOK(node *v1.Node) bool { +func (c *NodeController) isFakeGpuNode(node *v1.Node) bool { _, isNodeAssignedToNodePool := node.Labels[c.clusterTopology.NodePoolLabelKey] - return isNodeAssignedToNodePool && node.Annotations[constants.AnnotationKwokNode] != "fake" + return isNodeAssignedToNodePool } func isPodExist(kubeClient kubernetes.Interface, podName string, namespace string) (bool, error) { diff --git a/internal/status-updater/handlers/node/fake_node_deployments.go b/internal/status-updater/handlers/node/fake_node_deployments.go new file mode 100644 index 0000000..f75ab9e --- /dev/null +++ b/internal/status-updater/handlers/node/fake_node_deployments.go @@ -0,0 +1,125 @@ +package node + +import ( + "context" + "fmt" + "os" + + "github.com/run-ai/fake-gpu-operator/internal/common/constants" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +func (p *NodeHandler) applyFakeNodeDeployments(node *v1.Node) error { + if !isFakeNode(node) { + return nil + } + + deployments, err := p.generateFakeNodeDeployments(node) + if err != nil { + return fmt.Errorf("failed to get fake node deployments: %w", err) + } + + for _, deployment := range deployments { + err := p.applyDeployment(deployment) + if err != nil { + return fmt.Errorf("failed to apply deployment: %w", err) + } + } + + return nil +} + +func (p *NodeHandler) deleteFakeNodeDeployments(node *v1.Node) error { + if !isFakeNode(node) { + return nil + } + + deployments, err := p.generateFakeNodeDeployments(node) + if err != nil { + return fmt.Errorf("failed to get fake node deployments: %w", err) + } + + for _, deployment := range deployments { + err := p.kubeClient.AppsV1().Deployments(deployment.Namespace).Delete(context.TODO(), deployment.Name, metav1.DeleteOptions{}) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete deployment %s: %w", deployment.Name, err) + } + } + + return nil +} + +func (p *NodeHandler) generateFakeNodeDeployments(node *v1.Node) ([]appsv1.Deployment, error) { + deploymentTemplates, err := p.kubeClient.AppsV1().Deployments(os.Getenv(constants.EnvFakeGpuOperatorNs)).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s=true", constants.LabelFakeNodeDeploymentTemplate), + }) + if err != nil { + return nil, fmt.Errorf("failed to list deployments: %w", err) + } + + deployments := []appsv1.Deployment{} + for i := range deploymentTemplates.Items { + deployments = append(deployments, *generateFakeNodeDeploymentFromTemplate(&deploymentTemplates.Items[i], node)) + } + + return deployments, nil +} + +func (p *NodeHandler) applyDeployment(deployment appsv1.Deployment) error { + existingDeployment, err := p.kubeClient.AppsV1().Deployments(deployment.Namespace).Get(context.TODO(), deployment.Name, metav1.GetOptions{}) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to get deployment %s: %w", deployment.Name, err) + } + + if errors.IsNotFound(err) { + deployment.ResourceVersion = "" + _, err := p.kubeClient.AppsV1().Deployments(deployment.Namespace).Create(context.TODO(), &deployment, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create deployment %s: %w", deployment.Name, err) + } + } else { + deployment.UID = existingDeployment.UID + deployment.ResourceVersion = existingDeployment.ResourceVersion + _, err := p.kubeClient.AppsV1().Deployments(deployment.Namespace).Update(context.TODO(), &deployment, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update deployment %s: %w", deployment.Name, err) + } + } + + return nil +} + +func generateFakeNodeDeploymentFromTemplate(template *appsv1.Deployment, node *v1.Node) *appsv1.Deployment { + deployment := template.DeepCopy() + + delete(deployment.Labels, constants.LabelFakeNodeDeploymentTemplate) + deployment.Name = fmt.Sprintf("%s-%s", deployment.Name, node.Name) + deployment.Spec.Replicas = ptr.To(int32(1)) + deployment.Spec.Template.Spec.Containers[0].Env = append(deployment.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{ + Name: constants.EnvNodeName, + Value: node.Name, + }, v1.EnvVar{ + Name: constants.EnvFakeNode, + Value: "true", + }) + + deployment.Spec.Template.Spec.Containers[0].Resources.Limits = v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourceCPU: resource.MustParse("50m"), + } + deployment.Spec.Template.Spec.Containers[0].Resources.Requests = v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("20Mi"), + v1.ResourceCPU: resource.MustParse("10m"), + } + + return deployment +} + +func isFakeNode(node *v1.Node) bool { + return node != nil && node.Annotations[constants.AnnotationKwokNode] == "fake" +} diff --git a/internal/status-updater/handlers/node/handler.go b/internal/status-updater/handlers/node/handler.go index 4786217..f404278 100644 --- a/internal/status-updater/handlers/node/handler.go +++ b/internal/status-updater/handlers/node/handler.go @@ -33,11 +33,16 @@ func NewNodeHandler(kubeClient kubernetes.Interface, clusterTopology *topology.C func (p *NodeHandler) HandleAdd(node *v1.Node) error { log.Printf("Handling node addition: %s\n", node.Name) - _, err := p.CreateNodeTopologyCM(node) + err := p.createNodeTopologyCM(node) if err != nil { return fmt.Errorf("failed to create node topology ConfigMap: %w", err) } + err = p.applyFakeNodeDeployments(node) + if err != nil { + return fmt.Errorf("failed to apply fake node deployments: %w", err) + } + err = p.labelNode(node) if err != nil { return fmt.Errorf("failed to label node: %w", err) @@ -54,6 +59,11 @@ func (p *NodeHandler) HandleDelete(node *v1.Node) error { return fmt.Errorf("failed to delete node topology: %w", err) } + err = p.deleteFakeNodeDeployments(node) + if err != nil { + return fmt.Errorf("failed to delete fake node deployments: %w", err) + } + err = p.unlabelNode(node) if err != nil { return fmt.Errorf("failed to unlabel node: %w", err) diff --git a/internal/status-updater/handlers/node/topology_cm.go b/internal/status-updater/handlers/node/topology_cm.go index 4f66e54..e6a4067 100644 --- a/internal/status-updater/handlers/node/topology_cm.go +++ b/internal/status-updater/handlers/node/topology_cm.go @@ -8,20 +8,20 @@ import ( v1 "k8s.io/api/core/v1" ) -func (p *NodeHandler) CreateNodeTopologyCM(node *v1.Node) (*topology.NodeTopology, error) { +func (p *NodeHandler) createNodeTopologyCM(node *v1.Node) error { nodeTopology, _ := topology.GetNodeTopologyFromCM(p.kubeClient, node.Name) if nodeTopology != nil { - return nodeTopology, nil + return nil } nodePoolName, ok := node.Labels[p.clusterTopology.NodePoolLabelKey] if !ok { - return nil, fmt.Errorf("node %s does not have a nodepool label", node.Name) + return fmt.Errorf("node %s does not have a nodepool label", node.Name) } nodePoolTopology, ok := p.clusterTopology.NodePools[nodePoolName] if !ok { - return nil, fmt.Errorf("nodepool %s not found in cluster topology", nodePoolName) + return fmt.Errorf("nodepool %s not found in cluster topology", nodePoolName) } nodeTopology = &topology.NodeTopology{ @@ -33,10 +33,10 @@ func (p *NodeHandler) CreateNodeTopologyCM(node *v1.Node) (*topology.NodeTopolog err := topology.CreateNodeTopologyCM(p.kubeClient, nodeTopology, node.Name) if err != nil { - return nil, fmt.Errorf("failed to create node topology: %w", err) + return fmt.Errorf("failed to create node topology: %w", err) } - return nodeTopology, nil + return nil } func generateGpuDetails(gpuCount int, nodeName string) []topology.GpuDetails {