diff --git a/internal/common/constants/constants.go b/internal/common/constants/constants.go index e4696d7..3fd076f 100644 --- a/internal/common/constants/constants.go +++ b/internal/common/constants/constants.go @@ -17,6 +17,7 @@ const ( LabelTopologyCMNodeName = "node-name" LabelApp = "app" + DCGMExporterApp = "nvidia-dcgm-exporter" KwokDCGMExporterApp = "kwok-nvidia-dcgm-exporter" ReservationNs = "runai-reservation" diff --git a/internal/status-exporter/export/metrics/exporter.go b/internal/status-exporter/export/metrics/exporter.go index 683a8c1..01b13ad 100644 --- a/internal/status-exporter/export/metrics/exporter.go +++ b/internal/status-exporter/export/metrics/exporter.go @@ -18,8 +18,12 @@ import ( ) const ( - exporterPort = 9400 - exporterContainerName = "nvidia-dcgm-exporter" + exporterPort = 9400 + + exporterJobName = constants.DCGMExporterApp + exporterServiceName = constants.DCGMExporterApp + exporterContainerName = constants.DCGMExporterApp + exporterHostnamePrefix = constants.DCGMExporterApp ) type MetricsExporter struct { @@ -116,7 +120,7 @@ func generateFakeHostname(nodeName string) string { h := sha1.New() h.Write([]byte(nodeName)) nodeNameSHA1 := h.Sum(nil) - nodeHostname := fmt.Sprintf("%s-%x", "nvidia-dcgm-exporter", nodeNameSHA1[:3]) + nodeHostname := fmt.Sprintf("%s-%x", exporterHostnamePrefix, nodeNameSHA1[:3]) return nodeHostname } @@ -132,8 +136,8 @@ func (e *MetricsExporter) enrichWithPrometheusLabels(labels prometheus.Labels) p labels["pod"] = viper.GetString(constants.EnvImpersonatePodName) labels["instance"] = fmt.Sprintf("%s:%d", viper.GetString(constants.EnvImpersonatePodIP), exporterPort) - labels["job"] = "nvidia-dcgm-exporter" - labels["service"] = "nvidia-dcgm-exporter" + labels["job"] = exporterJobName + labels["service"] = exporterServiceName return labels } diff --git a/internal/status-updater/handlers/node/fake_node_deployments.go b/internal/status-updater/handlers/node/fake_node_deployments.go index 7944115..fb941c1 100644 --- a/internal/status-updater/handlers/node/fake_node_deployments.go +++ b/internal/status-updater/handlers/node/fake_node_deployments.go @@ -17,6 +17,10 @@ import ( "k8s.io/utils/ptr" ) +const ( + dummyDcgmExporterPodTimeout = 5 * time.Minute +) + func (p *NodeHandler) applyFakeNodeDeployments(node *v1.Node) error { if !isFakeNode(node) { return nil @@ -145,10 +149,10 @@ func (p *NodeHandler) generateFakeNodeDeploymentFromTemplate(template *appsv1.De } func (p *NodeHandler) getDummyDcgmExporterPod(nodeName string) (*v1.Pod, error) { - labelSelector := "app=nvidia-dcgm-exporter" + labelSelector := fmt.Sprintf("%s=%s", constants.LabelApp, constants.DCGMExporterApp) fieldSelector := fields.OneTermEqualSelector("spec.nodeName", nodeName).String() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), dummyDcgmExporterPodTimeout) defer cancel() watcher, err := p.kubeClient.CoreV1().Pods(v1.NamespaceAll).Watch(ctx, metav1.ListOptions{