Skip to content

Commit

Permalink
Refactor - Extract envvar names to constants (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
gshaibi authored Apr 2, 2024
1 parent 568f01e commit eb12f64
Show file tree
Hide file tree
Showing 21 changed files with 81 additions and 70 deletions.
5 changes: 3 additions & 2 deletions cmd/device-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/otiai10/copy"
"github.com/run-ai/fake-gpu-operator/internal/common/config"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/deviceplugin"
"github.com/spf13/viper"
Expand All @@ -28,11 +29,11 @@ func main() {
kubeClient := KubeClientFn(clusterConfig)

log.Println("Fake Device Plugin Running")
requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "NODE_NAME"}
requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvNodeName}
config.ValidateConfig(requiredEnvVars)
viper.AutomaticEnv()

topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv("NODE_NAME"))
topology, err := topology.GetNodeTopologyFromCM(kubeClient, os.Getenv(constants.EnvNodeName))
if err != nil {
log.Printf("Failed to get topology: %s\n", err)
os.Exit(1)
Expand Down
7 changes: 4 additions & 3 deletions cmd/nvidia-smi/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/jedib0t/go-pretty/v6/table"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
)

Expand All @@ -25,16 +26,16 @@ type nvidiaSmiArgs struct {

// main is the entry point for the application.
func main() {
os.Setenv("TOPOLOGY_CM_NAMESPACE", "gpu-operator")
os.Setenv("TOPOLOGY_CM_NAME", "topology")
os.Setenv(constants.EnvTopologyCmNamespace, "gpu-operator")
os.Setenv(constants.EnvTopologyCmName, "topology")

args := getNvidiaSmiArgs()

printArgs(args)
}

func getNvidiaSmiArgs() (args nvidiaSmiArgs) {
nodeName := os.Getenv("NODE_NAME")
nodeName := os.Getenv(constants.EnvNodeName)

// Send http request to topology-server to get the topology
resp, err := http.Get("http://topology-server.gpu-operator/topology/nodes/" + nodeName)
Expand Down
3 changes: 2 additions & 1 deletion cmd/status-updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package main
import (
"github.com/run-ai/fake-gpu-operator/internal/common/app"
"github.com/run-ai/fake-gpu-operator/internal/common/config"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
status_updater "github.com/run-ai/fake-gpu-operator/internal/status-updater"
)

func main() {
requiredEnvVars := []string{"TOPOLOGY_CM_NAME", "TOPOLOGY_CM_NAMESPACE", "FAKE_GPU_OPERATOR_NAMESPACE"}
requiredEnvVars := []string{constants.EnvTopologyCmName, constants.EnvTopologyCmNamespace, constants.EnvFakeGpuOperatorNs}
config.ValidateConfig(requiredEnvVars)

appRunner := app.NewAppRunner(&status_updater.StatusUpdaterApp{})
Expand Down
5 changes: 3 additions & 2 deletions internal/common/app/apprunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/go-playground/validator"
"github.com/mitchellh/mapstructure"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
)

Expand Down Expand Up @@ -95,6 +96,6 @@ func bindStruct(input interface{}) error {
}

func setDefaults() {
viper.SetDefault("TOPOLOGY_CM_NAME", "topology")
viper.SetDefault("TOPOLOGY_CM_NAMESPACE", "gpu-operator")
viper.SetDefault(constants.EnvTopologyCmName, "topology")
viper.SetDefault(constants.EnvTopologyCmNamespace, "gpu-operator")
}
21 changes: 10 additions & 11 deletions internal/common/constants/constants.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
package constants

const (
GpuIdxAnnotation = "runai-gpu"
GpuFractionAnnotation = "gpu-fraction"
PodGroupNameAnnotation = "pod-group-name"
ReservationPodGpuIdxAnnotation = "run.ai/reserve_for_gpu_index"
MigMappingAnnotation = "run.ai/mig-mapping"
KwokNodeAnnotation = "kwok.x-k8s.io/node"
AnnotationGpuIdx = "runai-gpu"
AnnotationGpuFraction = "gpu-fraction"
AnnotationPodGroupName = "pod-group-name"
AnnotationReservationPodGpuIdx = "run.ai/reserve_for_gpu_index"
AnnotationMigMapping = "run.ai/mig-mapping"
AnnotationKwokNode = "kwok.x-k8s.io/node"

GpuGroupLabel = "runai-gpu-group"
GpuProductLabel = "nvidia.com/gpu.product"
MigConfigStateLabel = "nvidia.com/mig.config.state"
FakeNodeDeploymentTemplateLabel = "run.ai/fake-node-deployment-template"
LabelGpuGroup = "runai-gpu-group"
LabelGpuProduct = "nvidia.com/gpu.product"
LabelMigConfigState = "nvidia.com/mig.config.state"
LabelFakeNodeDeploymentTemplate = "run.ai/fake-node-deployment-template"

ReservationNs = "runai-reservation"

GpuResourceName = "nvidia.com/gpu"

// GuyTodo: Use these constants in the code
EnvFakeNode = "FAKE_NODE"
EnvNodeName = "NODE_NAME"
EnvTopologyCmName = "TOPOLOGY_CM_NAME"
Expand Down
7 changes: 4 additions & 3 deletions internal/common/kubeclient/kubeclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"log"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -44,7 +45,7 @@ func NewKubeClient(config *rest.Config, stop chan struct{}) *KubeClient {
}

func (client *KubeClient) SetNodeLabels(lables map[string]string) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return err
Expand All @@ -60,7 +61,7 @@ func (client *KubeClient) SetNodeLabels(lables map[string]string) error {
}

func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return err
Expand All @@ -76,7 +77,7 @@ func (client *KubeClient) SetNodeAnnotations(annotations map[string]string) erro
}

func (client *KubeClient) GetNodeLabels() (map[string]string, error) {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)
node, err := client.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
if err != nil {
return nil, err
Expand Down
21 changes: 11 additions & 10 deletions internal/common/topology/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"gopkg.in/yaml.v3"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -15,7 +16,7 @@ import (
func GetNodeTopologyFromCM(kubeclient kubernetes.Interface, nodeName string) (*NodeTopology, error) {
cmName := GetNodeTopologyCMName(nodeName)
cm, err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get(
viper.GetString(constants.EnvTopologyCmNamespace)).Get(
context.TODO(), cmName, metav1.GetOptions{})
if err != nil {
return nil, err
Expand All @@ -31,7 +32,7 @@ func CreateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop
}

_, err = kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Create(context.TODO(), cm, metav1.CreateOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Create(context.TODO(), cm, metav1.CreateOptions{})
return err
}

Expand All @@ -42,21 +43,21 @@ func UpdateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop
}

_, err = kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Update(context.TODO(), cm, metav1.UpdateOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Update(context.TODO(), cm, metav1.UpdateOptions{})
return err
}

func DeleteNodeTopologyCM(kubeclient kubernetes.Interface, nodeName string) error {

err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{})
return err
}

func GetBaseTopologyFromCM(kubeclient kubernetes.Interface) (*BaseTopology, error) {
topologyCm, err := kubeclient.CoreV1().ConfigMaps(
viper.GetString("TOPOLOGY_CM_NAMESPACE")).Get(
context.TODO(), viper.GetString("TOPOLOGY_CM_NAME"), metav1.GetOptions{})
viper.GetString(constants.EnvTopologyCmNamespace)).Get(
context.TODO(), viper.GetString(constants.EnvTopologyCmName), metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get topology configmap: %v", err)
}
Expand Down Expand Up @@ -92,8 +93,8 @@ func FromNodeTopologyCM(cm *corev1.ConfigMap) (*NodeTopology, error) {
func ToBaseTopologyCM(baseTopology *BaseTopology) (*corev1.ConfigMap, error) {
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: viper.GetString("TOPOLOGY_CM_NAME"),
Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"),
Name: viper.GetString(constants.EnvTopologyCmName),
Namespace: viper.GetString(constants.EnvTopologyCmNamespace),
},
Data: make(map[string]string),
}
Expand All @@ -112,7 +113,7 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: GetNodeTopologyCMName(nodeName),
Namespace: viper.GetString("TOPOLOGY_CM_NAMESPACE"),
Namespace: viper.GetString(constants.EnvTopologyCmNamespace),
Labels: map[string]string{
"node-topology": "true",
},
Expand All @@ -131,5 +132,5 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf
}

func GetNodeTopologyCMName(nodeName string) string {
return viper.GetString("TOPOLOGY_CM_NAME") + "-" + nodeName
return viper.GetString(constants.EnvTopologyCmName) + "-" + nodeName
}
6 changes: 3 additions & 3 deletions internal/migfaker/migfaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ func (faker *MigFaker) FakeMapping(config *MigConfigs) error {
smappings, _ := json.Marshal(mappings)

labels := map[string]string{
constants.MigConfigStateLabel: "success",
constants.LabelMigConfigState: "success",
}
annotations := map[string]string{
constants.MigMappingAnnotation: base64.StdEncoding.EncodeToString(smappings),
constants.AnnotationMigMapping: base64.StdEncoding.EncodeToString(smappings),
}

err := faker.kubeclient.SetNodeLabels(labels)
Expand Down Expand Up @@ -95,7 +95,7 @@ func (faker *MigFaker) getGpuProduct() (string, error) {
return "", fmt.Errorf("failed to get node labels: %w", err)
}

return nodeLabels[constants.GpuProductLabel], nil
return nodeLabels[constants.LabelGpuProduct], nil
}

func migInstanceNameToGpuInstanceId(gpuProduct string, migInstanceName string) (int, error) {
Expand Down
2 changes: 1 addition & 1 deletion internal/migfaker/migfaker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestFakeMapping(t *testing.T) {
}
kubeClientMock.ActualGetNodeLabels = func() (map[string]string, error) {
return map[string]string{
constants.GpuProductLabel: "NVIDIA-A100-SXM4-40GB",
constants.LabelGpuProduct: "NVIDIA-A100-SXM4-40GB",
}, nil
}

Expand Down
3 changes: 2 additions & 1 deletion internal/migfaker/syncconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package migfaker
import (
"sync"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/spf13/viper"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
Expand Down Expand Up @@ -51,7 +52,7 @@ func ContinuouslySyncMigConfigChanges(clientset kubernetes.Interface, migConfig
clientset.CoreV1().RESTClient(),
ResourceNodes,
v1.NamespaceAll,
fields.OneTermEqualSelector("metadata.name", viper.GetString("NODE_NAME")),
fields.OneTermEqualSelector("metadata.name", viper.GetString(constants.EnvNodeName)),
)

_, controller := cache.NewInformer(
Expand Down
7 changes: 4 additions & 3 deletions internal/status-exporter/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"k8s.io/client-go/kubernetes/fake"

"github.com/run-ai/fake-gpu-operator/internal/common/app"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
status_exporter "github.com/run-ai/fake-gpu-operator/internal/status-exporter"
Expand Down Expand Up @@ -102,9 +103,9 @@ func setupConfig() {
}

func setupEnvs() {
os.Setenv("TOPOLOGY_CM_NAME", topologyCmName)
os.Setenv("TOPOLOGY_CM_NAMESPACE", topologyCmNamespace)
os.Setenv("NODE_NAME", nodeName)
os.Setenv(constants.EnvTopologyCmName, topologyCmName)
os.Setenv(constants.EnvTopologyCmNamespace, topologyCmNamespace)
os.Setenv(constants.EnvNodeName, nodeName)
os.Setenv("KUBERNETES_SERVICE_HOST", "fake-k8s-service-host")
os.Setenv("KUBERNETES_SERVICE_PORT", "fake-k8s-service-port")
}
Expand Down
3 changes: 2 additions & 1 deletion internal/status-exporter/export/labels/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"sync"
"testing"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/export/labels"
Expand All @@ -22,7 +23,7 @@ func (watcher *FakeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology)
func (watcher *FakeWatcher) Watch(stopCh <-chan struct{}) {}

func TestExport(t *testing.T) {
viper.SetDefault("NODE_NAME", "my_node")
viper.SetDefault(constants.EnvNodeName, "my_node")

myNode := &topology.NodeTopology{
GpuProduct: "some gpu",
Expand Down
3 changes: 2 additions & 1 deletion internal/status-exporter/export/metrics/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/export"
"github.com/run-ai/fake-gpu-operator/internal/status-exporter/watch"
Expand Down Expand Up @@ -61,7 +62,7 @@ func (e *MetricsExporter) Run(stopCh <-chan struct{}) {
}

func (e *MetricsExporter) export(nodeTopology *topology.NodeTopology) error {
nodeName := viper.GetString("NODE_NAME")
nodeName := viper.GetString(constants.EnvNodeName)

gpuUtilization.Reset()
gpuFbUsed.Reset()
Expand Down
5 changes: 3 additions & 2 deletions internal/status-exporter/watch/kubewatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"log"
"time"

"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/kubeclient"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
"github.com/spf13/viper"
Expand All @@ -28,7 +29,7 @@ func (w *KubeWatcher) Subscribe(subscriber chan<- *topology.NodeTopology) {
}

func (w *KubeWatcher) Watch(stopCh <-chan struct{}) {
cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME")))
cmChan, err := w.kubeclient.WatchConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName)))
if err != nil {
panic(err)
}
Expand All @@ -50,7 +51,7 @@ func (w *KubeWatcher) Watch(stopCh <-chan struct{}) {

case <-ticker.C:
log.Printf("Topology update not received within interval, publishing...\n")
cm, ok := w.kubeclient.GetConfigMap(viper.GetString("TOPOLOGY_CM_NAMESPACE"), topology.GetNodeTopologyCMName(viper.GetString("NODE_NAME")))
cm, ok := w.kubeclient.GetConfigMap(viper.GetString(constants.EnvTopologyCmNamespace), topology.GetNodeTopologyCMName(viper.GetString(constants.EnvNodeName)))
if !ok {
break
}
Expand Down
Loading

0 comments on commit eb12f64

Please sign in to comment.