diff --git a/cmd/device-plugin/main.go b/cmd/device-plugin/main.go index b5d47e6..3dc1f1b 100644 --- a/cmd/device-plugin/main.go +++ b/cmd/device-plugin/main.go @@ -9,6 +9,7 @@ import ( "github.com/otiai10/copy" "github.com/run-ai/fake-gpu-operator/internal/common/config" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/run-ai/fake-gpu-operator/internal/deviceplugin" "github.com/spf13/viper" @@ -28,11 +29,11 @@ func main() { kubeClient := KubeClientFn(clusterConfig) log.Println("Fake Device Plugin Running") - requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "NODE_NAME"} + requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvNodeName} config.ValidateConfig(requiredEnvVars) viper.AutomaticEnv() - topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv("NODE_NAME")) + topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv(constants.EnvNodeName)) if err != nil { log.Printf("Failed to get topology: %s\n", err) os.Exit(1) diff --git a/cmd/nvidia-smi/main.go b/cmd/nvidia-smi/main.go index 51eae00..ca05268 100644 --- a/cmd/nvidia-smi/main.go +++ b/cmd/nvidia-smi/main.go @@ -11,6 +11,7 @@ import ( "time" "github.com/jedib0t/go-pretty/v6/table" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/topology" ) @@ -25,8 +26,8 @@ type nvidiaSmiArgs struct { // main is the entry point for the application. func main() { - os.Setenv("TOPOLOGY_CM_NAMESPACE", "gpu-operator") - os.Setenv("TOPOLOGY_CM_NAME", "topology") + os.Setenv(constants.EnvTopologyCmNamespace, "gpu-operator") + os.Setenv(constants.EnvTopologyCmName, "topology") args := getNvidiaSmiArgs() @@ -34,7 +35,7 @@ func main() { } func getNvidiaSmiArgs() (args nvidiaSmiArgs) { - nodeName := os.Getenv("NODE_NAME") + nodeName := os.Getenv(constants.EnvNodeName) // Send http request to topology-server to get the topology resp, err := http.Get("http://topology-server.gpu-operator/topology/nodes/" + nodeName) diff --git a/cmd/status-updater/main.go b/cmd/status-updater/main.go index 7a8d4f8..c2a6862 100644 --- a/cmd/status-updater/main.go +++ b/cmd/status-updater/main.go @@ -3,11 +3,12 @@ package main import ( "github.com/run-ai/fake-gpu-operator/internal/common/app" "github.com/run-ai/fake-gpu-operator/internal/common/config" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" status_updater "github.com/run-ai/fake-gpu-operator/internal/status-updater" ) func main() { - requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "FAKE_GPU_OPERATOR_NAMESPACE"} + requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvFakeGpuOperatorNs} config.ValidateConfig(requiredEnvVars) appRunner := app.NewAppRunner(&status_updater.StatusUpdaterApp{}) diff --git a/internal/common/app/apprunner.go b/internal/common/app/apprunner.go index 022d92f..26fb703 100644 --- a/internal/common/app/apprunner.go +++ b/internal/common/app/apprunner.go @@ -9,6 +9,7 @@ import ( "github.com/go-playground/validator" "github.com/mitchellh/mapstructure" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/spf13/viper" ) @@ -95,6 +96,6 @@ func bindStruct(input interface{}) error { } func setDefaults() { - viper.SetDefault("TOPOLOGY_CM_NAME", "topology") - viper.SetDefault("TOPOLOGY_CM_NAMESPACE", "gpu-operator") + viper.SetDefault(constants.EnvTopologyCmName, "topology") + viper.SetDefault(constants.EnvTopologyCmNamespace, "gpu-operator") } diff --git a/internal/common/kubeclient/kubeclient.go b/internal/common/kubeclient/kubeclient.go index 49b05b6..4ebbb11 100644 --- a/internal/common/kubeclient/kubeclient.go +++ b/internal/common/kubeclient/kubeclient.go @@ -4,6 +4,7 @@ import ( "context" "log" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/spf13/viper" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -44,7 +45,7 @@ func NewKubeClient(config *rest.Config, stop chan struct{}) *KubeClient { } func (client *KubeClient) SetNodeLabels(lables map[string]string) error { - nodeName := viper.GetString("NODE_NAME") + nodeName := viper.GetString(constants.EnvNodeName) node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil { return err @@ -60,7 +61,7 @@ func (client *KubeClient) SetNodeLabels(lables map[string]string) error { } func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) error { - nodeName := viper.GetString("NODE_NAME") + nodeName := viper.GetString(constants.EnvNodeName) node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil { return err @@ -76,7 +77,7 @@ func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) erro } func (client *KubeClient) GetNodeLabels() (map[string]string, error) { - nodeName := viper.GetString("NODE_NAME") + nodeName := viper.GetString(constants.EnvNodeName) node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil { return nil, err diff --git a/internal/common/topology/kubernetes.go b/internal/common/topology/kubernetes.go index 95c8e8b..e2de164 100644 --- a/internal/common/topology/kubernetes.go +++ b/internal/common/topology/kubernetes.go @@ -6,6 +6,7 @@ import ( "gopkg.in/yaml.v3" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/spf13/viper" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -15,7 +16,7 @@ import ( func GetNodeTopologyFromCM(kubeclient kubernetes.Interface, nodeName string) (*NodeTopology, error) { cmName := GetNodeTopologyCMName(nodeName) cm, err := kubeclient.CoreV1().ConfigMaps( - viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get( + viper.GetString(constants.EnvTopologyCmNamespace)).Get( context.TODO(), cmName, metav1.GetOptions{}) if err != nil { return nil, err @@ -31,7 +32,7 @@ func CreateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop } _, err = kubeclient.CoreV1().ConfigMaps( - viper.GetString("TOPOLOGY_CM_NAMESPACE")).Create(context.TODO(), cm, metav1.CreateOptions{}) + viper.GetString(constants.EnvTopologyCmNamespace)).Create(context.TODO(), cm, metav1.CreateOptions{}) return err } @@ -42,21 +43,21 @@ func UpdateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop } _, err = kubeclient.CoreV1().ConfigMaps( - viper.GetString("TOPOLOGY_CM_NAMESPACE")).Update(context.TODO(), cm, metav1.UpdateOptions{}) + viper.GetString(constants.EnvTopologyCmNamespace)).Update(context.TODO(), cm, metav1.UpdateOptions{}) return err } func DeleteNodeTopologyCM(kubeclient kubernetes.Interface, nodeName string) error { err := kubeclient.CoreV1().ConfigMaps( - viper.GetString("TOPOLOGY_CM_NAMESPACE")).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{}) + viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{}) return err } func GetBaseTopologyFromCM(kubeclient kubernetes.Interface) (*BaseTopology, error) { topologyCm, err := kubeclient.CoreV1().ConfigMaps( - viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get( - context.TODO(), viper.GetString("TOPOLOGY_CM_NAME"), metav1.GetOptions{}) + viper.GetString(constants.EnvTopologyCmNamespace)).Get( + context.TODO(), viper.GetString(constants.EnvTopologyCmName), metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get topology configmap: %v", err) } @@ -92,8 +93,8 @@ func FromNodeTopologyCM(cm *corev1.ConfigMap) (*NodeTopology, error) { func ToBaseTopologyCM(baseTopology *BaseTopology) (*corev1.ConfigMap, error) { cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ - Name: viper.GetString("TOPOLOGY_CM_NAME"), - Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"), + Name: viper.GetString(constants.EnvTopologyCmName), + Namespace: viper.GetString(constants.EnvTopologyCmNamespace), }, Data: make(map[string]string), } @@ -112,7 +113,7 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: GetNodeTopologyCMName(nodeName), - Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"), + Namespace: viper.GetString(constants.EnvTopologyCmNamespace), Labels: map[string]string{ "node-topology": "true", }, @@ -131,5 +132,5 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf } func GetNodeTopologyCMName(nodeName string) string { - return viper.GetString("TOPOLOGY_CM_NAME") + "-" + nodeName + return viper.GetString(constants.EnvTopologyCmName) + "-" + nodeName } diff --git a/internal/migfaker/syncconfig.go b/internal/migfaker/syncconfig.go index 852a2f6..d4ab0b9 100644 --- a/internal/migfaker/syncconfig.go +++ b/internal/migfaker/syncconfig.go @@ -3,6 +3,7 @@ package migfaker import ( "sync" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/spf13/viper" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" @@ -51,7 +52,7 @@ func ContinuouslySyncMigConfigChanges(clientset kubernetes.Interface, migConfig clientset.CoreV1().RESTClient(), ResourceNodes, v1.NamespaceAll, - fields.OneTermEqualSelector("metadata.name", viper.GetString("NODE_NAME")), + fields.OneTermEqualSelector("metadata.name", viper.GetString(constants.EnvNodeName)), ) _, controller := cache.NewInformer( diff --git a/internal/status-exporter/app.go b/internal/status-exporter/app.go index cdfe5fb..ab073d2 100644 --- a/internal/status-exporter/app.go +++ b/internal/status-exporter/app.go @@ -13,7 +13,7 @@ import ( type StatusExporterAppConfig struct { NodeName string `mapstructure:"NODE_NAME" validator:"required"` - TopologyCmName string `mapstructure:"TOPOLOGY_CM_NAME" validator:"required"` + TopologyCmName string `mapstructure:constants.EnvTopologyCmName validator:"required"` TopologyCmNamespace string `mapstructure:"TOPOLOGY_CM_NAMESPACE" validator:"required"` TopologyMaxExportInterval string `mapstructure:"TOPOLOGY_MAX_EXPORT_INTERVAL"` } diff --git a/internal/status-exporter/app_test.go b/internal/status-exporter/app_test.go index b6de76a..7c5039f 100644 --- a/internal/status-exporter/app_test.go +++ b/internal/status-exporter/app_test.go @@ -18,6 +18,7 @@ import ( "k8s.io/client-go/kubernetes/fake" "github.com/run-ai/fake-gpu-operator/internal/common/app" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/kubeclient" "github.com/run-ai/fake-gpu-operator/internal/common/topology" status_exporter "github.com/run-ai/fake-gpu-operator/internal/status-exporter" @@ -102,9 +103,9 @@ func setupConfig() { } func setupEnvs() { - os.Setenv("TOPOLOGY_CM_NAME", topologyCmName) - os.Setenv("TOPOLOGY_CM_NAMESPACE", topologyCmNamespace) - os.Setenv("NODE_NAME", nodeName) + os.Setenv(constants.EnvTopologyCmName, topologyCmName) + os.Setenv(constants.EnvTopologyCmNamespace, topologyCmNamespace) + os.Setenv(constants.EnvNodeName, nodeName) os.Setenv("KUBERNETES_SERVICE_HOST", "fake-k8s-service-host") os.Setenv("KUBERNETES_SERVICE_PORT", "fake-k8s-service-port") } diff --git a/internal/status-exporter/export/labels/exporter_test.go b/internal/status-exporter/export/labels/exporter_test.go index 605b7be..d988954 100644 --- a/internal/status-exporter/export/labels/exporter_test.go +++ b/internal/status-exporter/export/labels/exporter_test.go @@ -5,6 +5,7 @@ import ( "sync" "testing" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/kubeclient" "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/run-ai/fake-gpu-operator/internal/status-exporter/export/labels" @@ -22,7 +23,7 @@ func (watcher *FakeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology) func (watcher *FakeWatcher) Watch(stopCh <-chan struct{}) {} func TestExport(t *testing.T) { - viper.SetDefault("NODE_NAME", "my_node") + viper.SetDefault(constants.EnvNodeName, "my_node") myNode := &topology.NodeTopology{ GpuProduct: "some gpu", diff --git a/internal/status-exporter/export/metrics/exporter.go b/internal/status-exporter/export/metrics/exporter.go index 3a3fc32..a6328e3 100644 --- a/internal/status-exporter/export/metrics/exporter.go +++ b/internal/status-exporter/export/metrics/exporter.go @@ -10,6 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/run-ai/fake-gpu-operator/internal/status-exporter/export" "github.com/run-ai/fake-gpu-operator/internal/status-exporter/watch" @@ -61,7 +62,7 @@ func (e *MetricsExporter) Run(stopCh <-chan struct{}) { } func (e *MetricsExporter) export(nodeTopology *topology.NodeTopology) error { - nodeName := viper.GetString("NODE_NAME") + nodeName := viper.GetString(constants.EnvNodeName) gpuUtilization.Reset() gpuFbUsed.Reset() diff --git a/internal/status-exporter/watch/kubewatcher.go b/internal/status-exporter/watch/kubewatcher.go index 80b04e9..ac53cf1 100644 --- a/internal/status-exporter/watch/kubewatcher.go +++ b/internal/status-exporter/watch/kubewatcher.go @@ -4,6 +4,7 @@ import ( "log" "time" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/kubeclient" "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/spf13/viper" @@ -28,7 +29,7 @@ func (w *KubeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology) { } func (w *KubeWatcher) Watch(stopCh <-chan struct{}) { - cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME"))) + cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName))) if err != nil { panic(err) } @@ -50,7 +51,7 @@ func (w *KubeWatcher) Watch(stopCh <-chan struct{}) { case <-ticker.C: log.Printf("Topology update not received within interval, publishing...\n") - cm, ok := w.kubeclient.GetConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME"))) + cm, ok := w.kubeclient.GetConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName))) if !ok { break } diff --git a/internal/status-updater/app_test.go b/internal/status-updater/app_test.go index 195774e..b774579 100644 --- a/internal/status-updater/app_test.go +++ b/internal/status-updater/app_test.go @@ -501,8 +501,8 @@ func setupConfig() { } func setupEnvs() { - os.Setenv("TOPOLOGY_CM_NAME", "fake-cm-name") - os.Setenv("TOPOLOGY_CM_NAMESPACE", "fake-cm-namespace") + os.Setenv(constants.EnvTopologyCmName, "fake-cm-name") + os.Setenv(constants.EnvTopologyCmNamespace, "fake-cm-namespace") } func createTopology(gpuCount int64, nodeName string) *topology.NodeTopology { diff --git a/internal/status-updater/controllers/node/controller.go b/internal/status-updater/controllers/node/controller.go index b2bf7cd..38289fb 100644 --- a/internal/status-updater/controllers/node/controller.go +++ b/internal/status-updater/controllers/node/controller.go @@ -6,6 +6,7 @@ import ( "log" "sync" + "github.com/run-ai/fake-gpu-operator/internal/common/constants" "github.com/run-ai/fake-gpu-operator/internal/common/topology" "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers" "github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers/util" @@ -82,7 +83,7 @@ func (c *NodeController) pruneTopologyNodes() error { return fmt.Errorf("failed listing fake gpu nodes: %v", err) } - nodeTopologyCms, err := c.kubeClient.CoreV1().ConfigMaps(viper.GetString("TOPOLOGY_CM_NAMESPACE")).List(context.TODO(), metav1.ListOptions{ + nodeTopologyCms, err := c.kubeClient.CoreV1().ConfigMaps(viper.GetString(constants.EnvTopologyCmNamespace)).List(context.TODO(), metav1.ListOptions{ LabelSelector: "node-topology=true", }) if err != nil { @@ -96,7 +97,7 @@ func (c *NodeController) pruneTopologyNodes() error { for _, cm := range nodeTopologyCms.Items { if _, ok := validNodeTopologyCMMap[cm.Name]; !ok { - util.LogErrorIfExist(c.kubeClient.CoreV1().ConfigMaps(viper.GetString("TOPOLOGY_CM_NAMESPACE")).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{}), fmt.Sprintf("Failed to delete node topology cm %s", cm.Name)) + util.LogErrorIfExist(c.kubeClient.CoreV1().ConfigMaps(viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{}), fmt.Sprintf("Failed to delete node topology cm %s", cm.Name)) } }