-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #100 from run-ai/erez/mig-devices-RUN-23506
adding multiple other devices fake device plugins
- Loading branch information
Showing
12 changed files
with
338 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
package topology | ||
|
||
const ( | ||
cmTopologyKey = "topology.yml" | ||
CmTopologyKey = "topology.yml" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,65 @@ | ||
package deviceplugin | ||
|
||
import ( | ||
"path" | ||
"strings" | ||
|
||
"github.com/run-ai/fake-gpu-operator/internal/common/constants" | ||
"github.com/run-ai/fake-gpu-operator/internal/common/topology" | ||
"github.com/spf13/viper" | ||
"k8s.io/client-go/kubernetes" | ||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" | ||
) | ||
|
||
const ( | ||
resourceName = "nvidia.com/gpu" | ||
nvidiaGPUResourceName = "nvidia.com/gpu" | ||
) | ||
|
||
type Interface interface { | ||
Serve() error | ||
Name() string | ||
} | ||
|
||
func NewDevicePlugin(topology *topology.NodeTopology, kubeClient kubernetes.Interface) Interface { | ||
func NewDevicePlugins(topology *topology.NodeTopology, kubeClient kubernetes.Interface) []Interface { | ||
if topology == nil { | ||
panic("topology is nil") | ||
} | ||
|
||
if viper.GetBool(constants.EnvFakeNode) { | ||
return &FakeNodeDevicePlugin{ | ||
kubeClient: kubeClient, | ||
gpuCount: getGpuCount(topology), | ||
otherDevices := make(map[string]int) | ||
for _, genericDevice := range topology.OtherDevices { | ||
otherDevices[genericDevice.Name] = genericDevice.Count | ||
} | ||
|
||
return []Interface{&FakeNodeDevicePlugin{ | ||
kubeClient: kubeClient, | ||
gpuCount: getGpuCount(topology), | ||
otherDevices: otherDevices, | ||
}} | ||
} | ||
|
||
devicePlugins := []Interface{ | ||
&RealNodeDevicePlugin{ | ||
devs: createDevices(getGpuCount(topology)), | ||
socket: serverSock, | ||
resourceName: nvidiaGPUResourceName, | ||
}, | ||
} | ||
|
||
return &RealNodeDevicePlugin{ | ||
devs: createDevices(getGpuCount(topology)), | ||
socket: serverSock, | ||
for _, genericDevice := range topology.OtherDevices { | ||
devicePlugins = append(devicePlugins, &RealNodeDevicePlugin{ | ||
devs: createDevices(genericDevice.Count), | ||
socket: path.Join(pluginapi.DevicePluginPath, normalizeDeviceName(genericDevice.Name)+".sock"), | ||
resourceName: genericDevice.Name, | ||
}) | ||
} | ||
|
||
return devicePlugins | ||
} | ||
|
||
func normalizeDeviceName(deviceName string) string { | ||
normalized := strings.ReplaceAll(deviceName, "/", "_") | ||
normalized = strings.ReplaceAll(normalized, ".", "_") | ||
normalized = strings.ReplaceAll(normalized, "-", "_") | ||
return normalized | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package deviceplugin | ||
|
||
import ( | ||
"testing" | ||
|
||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
"github.com/spf13/viper" | ||
|
||
"k8s.io/client-go/kubernetes/fake" | ||
|
||
"github.com/run-ai/fake-gpu-operator/internal/common/constants" | ||
"github.com/run-ai/fake-gpu-operator/internal/common/topology" | ||
) | ||
|
||
func TestDevicePlugin(t *testing.T) { | ||
RegisterFailHandler(Fail) | ||
RunSpecs(t, "DevicePlugin Suite") | ||
} | ||
|
||
var _ = Describe("NewDevicePlugins", func() { | ||
Context("When the topology is nil", func() { | ||
It("should panic", func() { | ||
Expect(func() { NewDevicePlugins(nil, nil) }).To(Panic()) | ||
}) | ||
}) | ||
|
||
Context("When the fake node is enabled", Ordered, func() { | ||
BeforeAll(func() { | ||
viper.Set(constants.EnvFakeNode, true) | ||
}) | ||
|
||
AfterAll(func() { | ||
viper.Set(constants.EnvFakeNode, false) | ||
}) | ||
|
||
It("should return a fake node device plugin", func() { | ||
topology := &topology.NodeTopology{} | ||
kubeClient := &fake.Clientset{} | ||
devicePlugins := NewDevicePlugins(topology, kubeClient) | ||
Expect(devicePlugins).To(HaveLen(1)) | ||
Expect(devicePlugins[0]).To(BeAssignableToTypeOf(&FakeNodeDevicePlugin{})) | ||
}) | ||
}) | ||
|
||
Context("With normal node", func() { | ||
It("should return a real node device plugin", func() { | ||
topology := &topology.NodeTopology{} | ||
kubeClient := &fake.Clientset{} | ||
devicePlugins := NewDevicePlugins(topology, kubeClient) | ||
Expect(devicePlugins).To(HaveLen(1)) | ||
Expect(devicePlugins[0]).To(BeAssignableToTypeOf(&RealNodeDevicePlugin{})) | ||
}) | ||
|
||
It("should return a device plugin for each other device", func() { | ||
topology := &topology.NodeTopology{ | ||
OtherDevices: []topology.GenericDevice{ | ||
{Name: "device1", Count: 1}, | ||
{Name: "device2", Count: 2}, | ||
}, | ||
} | ||
kubeClient := &fake.Clientset{} | ||
devicePlugins := NewDevicePlugins(topology, kubeClient) | ||
Expect(devicePlugins).To(HaveLen(3)) | ||
Expect(devicePlugins[0]).To(BeAssignableToTypeOf(&RealNodeDevicePlugin{})) | ||
}) | ||
}) | ||
|
||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,56 @@ | ||
package deviceplugin | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"os" | ||
|
||
"github.com/run-ai/fake-gpu-operator/internal/common/constants" | ||
"golang.org/x/net/context" | ||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/client-go/kubernetes" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
type FakeNodeDevicePlugin struct { | ||
kubeClient kubernetes.Interface | ||
gpuCount int | ||
kubeClient kubernetes.Interface | ||
gpuCount int | ||
otherDevices map[string]int | ||
} | ||
|
||
func (f *FakeNodeDevicePlugin) Serve() error { | ||
patch := fmt.Sprintf(`{"status": {"capacity": {"%s": "%d"}, "allocatable": {"%s": "%d"}}}`, resourceName, f.gpuCount, resourceName, f.gpuCount) | ||
_, err := f.kubeClient.CoreV1().Nodes().Patch(context.TODO(), os.Getenv(constants.EnvNodeName), types.MergePatchType, []byte(patch), metav1.PatchOptions{}, "status") | ||
nodeStatus := v1.NodeStatus{ | ||
Capacity: v1.ResourceList{ | ||
v1.ResourceName(nvidiaGPUResourceName): *resource.NewQuantity(int64(f.gpuCount), resource.DecimalSI), | ||
}, | ||
Allocatable: v1.ResourceList{ | ||
v1.ResourceName(nvidiaGPUResourceName): *resource.NewQuantity(int64(f.gpuCount), resource.DecimalSI), | ||
}, | ||
} | ||
|
||
for deviceName, count := range f.otherDevices { | ||
nodeStatus.Capacity[v1.ResourceName(deviceName)] = *resource.NewQuantity(int64(count), resource.DecimalSI) | ||
nodeStatus.Allocatable[v1.ResourceName(deviceName)] = *resource.NewQuantity(int64(count), resource.DecimalSI) | ||
} | ||
|
||
// Convert the patch struct to JSON | ||
patchBytes, err := json.Marshal(v1.Node{Status: nodeStatus}) | ||
if err != nil { | ||
return fmt.Errorf("failed to marshal patch: %v", err) | ||
} | ||
|
||
// Apply the patch | ||
_, err = f.kubeClient.CoreV1().Nodes().Patch(context.TODO(), os.Getenv(constants.EnvNodeName), types.MergePatchType, patchBytes, metav1.PatchOptions{}, "status") | ||
if err != nil { | ||
return fmt.Errorf("failed to update node capacity and allocatable: %v", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (f *FakeNodeDevicePlugin) Name() string { | ||
return "FakeNodeDevicePlugin" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package deviceplugin | ||
|
||
import ( | ||
"os" | ||
|
||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
"golang.org/x/net/context" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes/fake" | ||
) | ||
|
||
var _ = Describe("FakeNodeDevicePlugin.Serve", func() { | ||
It("should update the node capacity and allocatable", func() { | ||
node := &v1.Node{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "node1", | ||
}, | ||
} | ||
os.Setenv("NODE_NAME", "node1") | ||
|
||
fakeClient := fake.NewSimpleClientset(node) | ||
|
||
fakeNodeDevicePlugin := &FakeNodeDevicePlugin{ | ||
kubeClient: fakeClient, | ||
gpuCount: 1, | ||
otherDevices: map[string]int{"device1": 2}, | ||
} | ||
|
||
err := fakeNodeDevicePlugin.Serve() | ||
Expect(err).ToNot(HaveOccurred()) | ||
|
||
updateNode, err := fakeClient.CoreV1().Nodes().Get(context.TODO(), "node1", metav1.GetOptions{}) | ||
Expect(err).ToNot(HaveOccurred()) | ||
Expect(testResourceListCondition(updateNode.Status.Capacity, v1.ResourceName(nvidiaGPUResourceName), 1)).To(BeTrue()) | ||
Expect(testResourceListCondition(updateNode.Status.Allocatable, v1.ResourceName(nvidiaGPUResourceName), 1)).To(BeTrue()) | ||
Expect(testResourceListCondition(updateNode.Status.Capacity, v1.ResourceName("device1"), 2)).To(BeTrue()) | ||
Expect(testResourceListCondition(updateNode.Status.Allocatable, v1.ResourceName("device1"), 2)).To(BeTrue()) | ||
}) | ||
}) | ||
|
||
func testResourceListCondition(resourceList v1.ResourceList, resourceName v1.ResourceName, value int64) bool { | ||
quantity, found := resourceList[resourceName] | ||
if !found { | ||
return false | ||
} | ||
return quantity.Value() == value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.