Skip to content

Commit

Permalink
RUN-10716 JSON Topology representation(#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
gshaibi authored Jun 22, 2023
1 parent 1698471 commit 47c9e49
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 41 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ RUN make build COMPONENT=nvidia-smi

FROM common-builder as preloader-builder
COPY ./cmd/preloader/ ./cmd/preloader/
RUN make build-shared COMPONENT=preloader
RUN make build-preloader

FROM common-builder as mig-faker-builder
COPY ./cmd/mig-faker/ ./cmd/mig-faker/
Expand Down
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ build:
env GOOS=linux GOARCH=amd64 go build -o ${BUILD_DIR}/ ./cmd/${COMPONENT}
.PHONY: build

build-shared:
env GOOS=linux GOARCH=amd64 go build -o ${BUILD_DIR}/ -buildmode=c-shared ./cmd/${COMPONENT}
build-preloader:
mkdir -p ${BUILD_DIR}
gcc -fPIC -shared -o ${BUILD_DIR}/preloader ./cmd/preloader/main.c
.PHONY: build

clean:
Expand Down Expand Up @@ -71,4 +72,4 @@ $(GINKGO):

test-all: $(GINKGO)
$(GINKGO) -r --procs=1 --output-dir=/tmp/artifacts/test-results/service-tests --compilers=1 --randomize-all --randomize-suites --fail-on-pending --keep-going --timeout=5m --race --trace --json-report=report.json
.PHONY: test-all
.PHONY: test-all
Empty file added cmd/preloader/main.c
Empty file.
5 changes: 0 additions & 5 deletions cmd/preloader/main.go

This file was deleted.

2 changes: 1 addition & 1 deletion deploy/fake-gpu-operator/templates/topology-cm.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v1
data:
topology.yml: |-
{{ toYaml .Values.initialTopology | indent 4 }}
{{ toJson .Values.initialTopology | indent 4 }}
kind: ConfigMap
metadata:
name: topology
7 changes: 3 additions & 4 deletions internal/common/topology/file.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package topology

import (
"encoding/json"
"os"

"gopkg.in/yaml.v2"
)

func GetClusterTopologyFromFs(topologyPath string) (*Cluster, error) {
Expand All @@ -14,9 +13,9 @@ func GetClusterTopologyFromFs(topologyPath string) (*Cluster, error) {
}
defer file.Close()

// Decode yaml file
// Decode json file
var clusterTopology Cluster
err = yaml.NewDecoder(file).Decode(&clusterTopology)
err = json.NewDecoder(file).Decode(&clusterTopology)
if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions internal/common/topology/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package topology

import (
"context"
"encoding/json"
"fmt"

"github.com/spf13/viper"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -40,7 +40,7 @@ func UpdateToKube(kubeclient kubernetes.Interface, clusterTopology *Cluster) err

func FromConfigMap(cm *corev1.ConfigMap) (*Cluster, error) {
var clusterTopology Cluster
err := yaml.Unmarshal([]byte(cm.Data[CmTopologyKey]), &clusterTopology)
err := json.Unmarshal([]byte(cm.Data[CmTopologyKey]), &clusterTopology)
if err != nil {
return nil, err
}
Expand All @@ -57,7 +57,7 @@ func ToConfigMap(clusterTopology *Cluster) (*corev1.ConfigMap, error) {
Data: make(map[string]string),
}

topologyData, err := yaml.Marshal(clusterTopology)
topologyData, err := json.Marshal(clusterTopology)
if err != nil {
return nil, err
}
Expand Down
53 changes: 31 additions & 22 deletions internal/common/topology/types.go
Original file line number Diff line number Diff line change
@@ -1,61 +1,70 @@
package topology

import (
"encoding/json"
"fmt"

"k8s.io/apimachinery/pkg/types"
)

type Cluster struct {
MigStrategy string `yaml:"mig-strategy"`
Nodes map[string]Node `yaml:"nodes"`
Config Config `yaml:"config"`
MigStrategy string `json:"mig-strategy"`
Nodes map[string]Node `json:"nodes"`
Config Config `json:"config"`
}

type Node struct {
GpuMemory int `yaml:"gpu-memory"`
GpuProduct string `yaml:"gpu-product"`
Gpus []GpuDetails `yaml:"gpus"`
GpuMemory int `json:"gpu-memory"`
GpuProduct string `json:"gpu-product"`
Gpus []GpuDetails `json:"gpus"`
}

type GpuDetails struct {
ID string `yaml:"id"`
Status GpuStatus `yaml:"status"`
ID string `json:"id"`
Status GpuStatus `json:"status"`
}

type PodGpuUsageStatusMap map[types.UID]GpuUsageStatus

func (m PodGpuUsageStatusMap) MarshalJSON() ([]byte, error) {
if m == nil {
return []byte("{}"), nil
}

return json.Marshal(map[types.UID]GpuUsageStatus(m))
}

type GpuStatus struct {
AllocatedBy ContainerDetails `yaml:"allocated-by"`
AllocatedBy ContainerDetails `json:"allocated-by"`
// Maps PodUID to its GPU usage status
PodGpuUsageStatus PodGpuUsageStatusMap `yaml:"pod-gpu-usage-status"`
PodGpuUsageStatus PodGpuUsageStatusMap `json:"pod-gpu-usage-status"`
}

type ContainerDetails struct {
Namespace string `yaml:"namespace"`
Pod string `yaml:"pod"`
Container string `yaml:"container"`
Namespace string `json:"namespace"`
Pod string `json:"pod"`
Container string `json:"container"`
}

type GpuUsageStatus struct {
Utilization Range `yaml:"utilization"`
FbUsed int `yaml:"fb-used"`
UseKnativeUtilization bool `yaml:"use-knative-utilization"`
Utilization Range `json:"utilization"`
FbUsed int `json:"fb-used"`
UseKnativeUtilization bool `json:"use-knative-utilization"`
}

type Range struct {
Min int `yaml:"min"`
Max int `yaml:"max"`
Min int `json:"min"`
Max int `json:"max"`
}

type Config struct {
NodeAutofill NodeAutofillSettings `yaml:"node-autofill"`
NodeAutofill NodeAutofillSettings `json:"node-autofill"`
}

type NodeAutofillSettings struct {
GpuCount int `yaml:"gpu-count"`
GpuMemory int `yaml:"gpu-memory"`
GpuProduct string `yaml:"gpu-product"`
GpuCount int `json:"gpu-count"`
GpuMemory int `json:"gpu-memory"`
GpuProduct string `json:"gpu-product"`
}

// Errors
Expand Down
4 changes: 2 additions & 2 deletions internal/status-updater/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package status_updater_test

import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
Expand All @@ -15,7 +16,6 @@ import (
"github.com/run-ai/fake-gpu-operator/internal/common/constants"
"github.com/run-ai/fake-gpu-operator/internal/common/topology"
status_updater "github.com/run-ai/fake-gpu-operator/internal/status-updater"
"gopkg.in/yaml.v3"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -75,7 +75,7 @@ var _ = Describe("StatusUpdater", func() {
Config: defaultTopologyConfig,
}

topologyStr, err := yaml.Marshal(clusterTopology)
topologyStr, err := json.Marshal(clusterTopology)
Expect(err).ToNot(HaveOccurred())
topologyConfigMap := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Expand Down

0 comments on commit 47c9e49

Please sign in to comment.