Skip to content

Commit

Permalink
Update environment variable names to use constants
Browse files Browse the repository at this point in the history
  • Loading branch information
gshaibi committed Apr 2, 2024
1 parent 3f0fd06 commit 2c07b0c
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 34 deletions.
5 changes: 3 additions & 2 deletions cmd/device-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/otiai10/copy"
"github.com/run-ai/fake-gpu-operator/internal/common/config"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/deviceplugin"
"github.com/spf13/viper"
Expand All @@ -28,11 +29,11 @@ func main() {
kubeClient := KubeClientFn(clusterConfig)

log.Println("Fake Device Plugin Running")
requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "NODE_NAME"}
requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvNodeName}
config.ValidateConfig(requiredEnvVars)
viper.AutomaticEnv()

topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv("NODE_NAME"))
topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv(constants.EnvNodeName))
if err != nil {
log.Printf("Failed to get topology: %s\n", err)
os.Exit(1)
Expand Down
7 changes: 4 additions & 3 deletions cmd/nvidia-smi/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/jedib0t/go-pretty/v6/table"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
)

Expand All @@ -25,16 +26,16 @@ type nvidiaSmiArgs struct {

// main is the entry point for the application.
func main() {
os.Setenv("TOPOLOGY_CM_NAMESPACE", "gpu-operator")
os.Setenv("TOPOLOGY_CM_NAME", "topology")
os.Setenv(constants.EnvTopologyCmNamespace, "gpu-operator")
os.Setenv(constants.EnvTopologyCmName, "topology")

args := getNvidiaSmiArgs()

printArgs(args)
}

func getNvidiaSmiArgs() (args nvidiaSmiArgs) {
nodeName := os.Getenv("NODE_NAME")
nodeName := os.Getenv(constants.EnvNodeName)

// Send http request to topology-server to get the topology
resp, err := http.Get("http://topology-server.gpu-operator/topology/nodes/" + nodeName)
Expand Down
3 changes: 2 additions & 1 deletion cmd/status-updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package main
import (
"github.com/run-ai/fake-gpu-operator/internal/common/app"
"github.com/run-ai/fake-gpu-operator/internal/common/config"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
status_updater "github.com/run-ai/fake-gpu-operator/internal/status-updater"
)

func main() {
requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "FAKE_GPU_OPERATOR_NAMESPACE"}
requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvFakeGpuOperatorNs}
config.ValidateConfig(requiredEnvVars)

appRunner := app.NewAppRunner(&status_updater.StatusUpdaterApp{})
Expand Down
5 changes: 3 additions & 2 deletions internal/common/app/apprunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/go-playground/validator"
"github.com/mitchellh/mapstructure"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
)

Expand Down Expand Up @@ -95,6 +96,6 @@ func bindStruct(input interface{}) error {
}

func setDefaults() {
viper.SetDefault("TOPOLOGY_CM_NAME", "topology")
viper.SetDefault("TOPOLOGY_CM_NAMESPACE", "gpu-operator")
viper.SetDefault(constants.EnvTopologyCmName, "topology")
viper.SetDefault(constants.EnvTopologyCmNamespace, "gpu-operator")
}
7 changes: 4 additions & 3 deletions internal/common/kubeclient/kubeclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"log"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -44,7 +45,7 @@ func NewKubeClient(config *rest.Config, stop chan struct{}) *KubeClient {
}

func (client *KubeClient) SetNodeLabels(lables map[string]string) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return err
Expand All @@ -60,7 +61,7 @@ func (client *KubeClient) SetNodeLabels(lables map[string]string) error {
}

func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return err
Expand All @@ -76,7 +77,7 @@ func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) erro
}

func (client *KubeClient) GetNodeLabels() (map[string]string, error) {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return nil, err
Expand Down
21 changes: 11 additions & 10 deletions internal/common/topology/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"gopkg.in/yaml.v3"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -15,7 +16,7 @@ import (
func GetNodeTopologyFromCM(kubeclient kubernetes.Interface, nodeName string) (*NodeTopology, error) {
cmName := GetNodeTopologyCMName(nodeName)
cm, err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get(
viper.GetString(constants.EnvTopologyCmNamespace)).Get(
context.TODO(), cmName, metav1.GetOptions{})
if err != nil {
return nil, err
Expand All @@ -31,7 +32,7 @@ func CreateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop
}

_, err = kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Create(context.TODO(), cm, metav1.CreateOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Create(context.TODO(), cm, metav1.CreateOptions{})
return err
}

Expand All @@ -42,21 +43,21 @@ func UpdateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop
}

_, err = kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Update(context.TODO(), cm, metav1.UpdateOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Update(context.TODO(), cm, metav1.UpdateOptions{})
return err
}

func DeleteNodeTopologyCM(kubeclient kubernetes.Interface, nodeName string) error {

err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{})
return err
}

func GetBaseTopologyFromCM(kubeclient kubernetes.Interface) (*BaseTopology, error) {
topologyCm, err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get(
context.TODO(), viper.GetString("TOPOLOGY_CM_NAME"), metav1.GetOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Get(
context.TODO(), viper.GetString(constants.EnvTopologyCmName), metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get topology configmap: %v", err)
}
Expand Down Expand Up @@ -92,8 +93,8 @@ func FromNodeTopologyCM(cm *corev1.ConfigMap) (*NodeTopology, error) {
func ToBaseTopologyCM(baseTopology *BaseTopology) (*corev1.ConfigMap, error) {
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: viper.GetString("TOPOLOGY_CM_NAME"),
Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"),
Name: viper.GetString(constants.EnvTopologyCmName),
Namespace: viper.GetString(constants.EnvTopologyCmNamespace),
},
Data: make(map[string]string),
}
Expand All @@ -112,7 +113,7 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: GetNodeTopologyCMName(nodeName),
Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"),
Namespace: viper.GetString(constants.EnvTopologyCmNamespace),
Labels: map[string]string{
"node-topology": "true",
},
Expand All @@ -131,5 +132,5 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf
}

func GetNodeTopologyCMName(nodeName string) string {
return viper.GetString("TOPOLOGY_CM_NAME") + "-" + nodeName
return viper.GetString(constants.EnvTopologyCmName) + "-" + nodeName
}
3 changes: 2 additions & 1 deletion internal/migfaker/syncconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package migfaker
import (
"sync"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
Expand Down Expand Up @@ -51,7 +52,7 @@ func ContinuouslySyncMigConfigChanges(clientset kubernetes.Interface, migConfig
clientset.CoreV1().RESTClient(),
ResourceNodes,
v1.NamespaceAll,
fields.OneTermEqualSelector("metadata.name", viper.GetString("NODE_NAME")),
fields.OneTermEqualSelector("metadata.name", viper.GetString(constants.EnvNodeName)),
)

_, controller := cache.NewInformer(
Expand Down
2 changes: 1 addition & 1 deletion internal/status-exporter/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (

type StatusExporterAppConfig struct {
NodeName string `mapstructure:"NODE_NAME" validator:"required"`
TopologyCmName string `mapstructure:"TOPOLOGY_CM_NAME" validator:"required"`
TopologyCmName string `mapstructure:constants.EnvTopologyCmName validator:"required"`
TopologyCmNamespace string `mapstructure:"TOPOLOGY_CM_NAMESPACE" validator:"required"`
TopologyMaxExportInterval string `mapstructure:"TOPOLOGY_MAX_EXPORT_INTERVAL"`
}
Expand Down
7 changes: 4 additions & 3 deletions internal/status-exporter/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"k8s.io/client-go/kubernetes/fake"

"github.com/run-ai/fake-gpu-operator/internal/common/app"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
status_exporter "github.com/run-ai/fake-gpu-operator/internal/status-exporter"
Expand Down Expand Up @@ -102,9 +103,9 @@ func setupConfig() {
}

func setupEnvs() {
os.Setenv("TOPOLOGY_CM_NAME", topologyCmName)
os.Setenv("TOPOLOGY_CM_NAMESPACE", topologyCmNamespace)
os.Setenv("NODE_NAME", nodeName)
os.Setenv(constants.EnvTopologyCmName, topologyCmName)
os.Setenv(constants.EnvTopologyCmNamespace, topologyCmNamespace)
os.Setenv(constants.EnvNodeName, nodeName)
os.Setenv("KUBERNETES_SERVICE_HOST", "fake-k8s-service-host")
os.Setenv("KUBERNETES_SERVICE_PORT", "fake-k8s-service-port")
}
Expand Down
3 changes: 2 additions & 1 deletion internal/status-exporter/export/labels/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"sync"
"testing"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/export/labels"
Expand All @@ -22,7 +23,7 @@ func (watcher *FakeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology)
func (watcher *FakeWatcher) Watch(stopCh <-chan struct{}) {}

func TestExport(t *testing.T) {
viper.SetDefault("NODE_NAME", "my_node")
viper.SetDefault(constants.EnvNodeName, "my_node")

myNode := &topology.NodeTopology{
GpuProduct: "some gpu",
Expand Down
3 changes: 2 additions & 1 deletion internal/status-exporter/export/metrics/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/export"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/watch"
Expand Down Expand Up @@ -61,7 +62,7 @@ func (e *MetricsExporter) Run(stopCh <-chan struct{}) {
}

func (e *MetricsExporter) export(nodeTopology *topology.NodeTopology) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)

gpuUtilization.Reset()
gpuFbUsed.Reset()
Expand Down
5 changes: 3 additions & 2 deletions internal/status-exporter/watch/kubewatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"log"
"time"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/spf13/viper"
Expand All @@ -28,7 +29,7 @@ func (w *KubeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology) {
}

func (w *KubeWatcher) Watch(stopCh <-chan struct{}) {
cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME")))
cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName)))
if err != nil {
panic(err)
}
Expand All @@ -50,7 +51,7 @@ func (w *KubeWatcher) Watch(stopCh <-chan struct{}) {

case <-ticker.C:
log.Printf("Topology update not received within interval, publishing...\n")
cm, ok := w.kubeclient.GetConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME")))
cm, ok := w.kubeclient.GetConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName)))
if !ok {
break
}
Expand Down
4 changes: 2 additions & 2 deletions internal/status-updater/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,8 @@ func setupConfig() {
}

func setupEnvs() {
os.Setenv("TOPOLOGY_CM_NAME", "fake-cm-name")
os.Setenv("TOPOLOGY_CM_NAMESPACE", "fake-cm-namespace")
os.Setenv(constants.EnvTopologyCmName, "fake-cm-name")
os.Setenv(constants.EnvTopologyCmNamespace, "fake-cm-namespace")
}

func createTopology(gpuCount int64, nodeName string) *topology.NodeTopology {
Expand Down
5 changes: 3 additions & 2 deletions internal/status-updater/controllers/node/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"sync"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers"
"github.com/run-ai/fake-gpu-operator/internal/status-updater/controllers/util"
Expand Down Expand Up @@ -82,7 +83,7 @@ func (c *NodeController) pruneTopologyNodes() error {
return fmt.Errorf("failed listing fake gpu nodes: %v", err)
}

nodeTopologyCms, err := c.kubeClient.CoreV1().ConfigMaps(viper.GetString("TOPOLOGY_CM_NAMESPACE")).List(context.TODO(), metav1.ListOptions{
nodeTopologyCms, err := c.kubeClient.CoreV1().ConfigMaps(viper.GetString(constants.EnvTopologyCmNamespace)).List(context.TODO(), metav1.ListOptions{
LabelSelector: "node-topology=true",
})
if err != nil {
Expand All @@ -96,7 +97,7 @@ func (c *NodeController) pruneTopologyNodes() error {

for _, cm := range nodeTopologyCms.Items {
if _, ok := validNodeTopologyCMMap[cm.Name]; !ok {
util.LogErrorIfExist(c.kubeClient.CoreV1().ConfigMaps(viper.GetString("TOPOLOGY_CM_NAMESPACE")).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{}), fmt.Sprintf("Failed to delete node topology cm %s", cm.Name))
util.LogErrorIfExist(c.kubeClient.CoreV1().ConfigMaps(viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{}), fmt.Sprintf("Failed to delete node topology cm %s", cm.Name))
}
}

Expand Down

0 comments on commit 2c07b0c

Please sign in to comment.