Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug: rdma exlusive handling #603

Merged
merged 1 commit into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 53 additions & 27 deletions pkg/devices/rdma.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,69 @@
package devices

import (
"github.com/golang/glog"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"

"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
)

type rdmaSpec struct {
isSupportRdma bool
deviceSpec []*pluginapi.DeviceSpec
deviceID string
deviceType types.DeviceType
}

func newRdmaSpec(rdmaResources []string) types.RdmaSpec {
// NewRdmaSpec returns the RdmaSpec
func NewRdmaSpec(dt types.DeviceType, id string) types.RdmaSpec {
if dt == types.AcceleratorType {
return nil
}
return &rdmaSpec{deviceID: id, deviceType: dt}
}

func (r *rdmaSpec) IsRdma() bool {
if len(r.getRdmaResources()) > 0 {
return true
}
var bus string
//nolint: exhaustive
switch r.deviceType {
case types.NetDeviceType:
bus = "pci"
case types.AuxNetDeviceType:
bus = "auxiliary"
default:
return false
}
// In case of exclusive RDMA, if the resource is assigned to a pod
// the files used to check if the device support RDMA are removed from the host.
// In order to still report the resource in this state,
// netlink param "enable_rdma" is checked to verify if the device supports RDMA.
// This scenario cann happen if the device is discovered, assigned to a pod and then the plugin is restarted.
rdma, err := utils.HasRdmaParam(bus, r.deviceID)
if err != nil {
glog.Infof("HasRdmaParam(): unable to get Netlink RDMA param for device %s : %q", r.deviceID, err)
return false
}
return rdma
}

func (r *rdmaSpec) getRdmaResources() []string {
//nolint: exhaustive
switch r.deviceType {
case types.NetDeviceType:
return utils.GetRdmaProvider().GetRdmaDevicesForPcidev(r.deviceID)
case types.AuxNetDeviceType:
return utils.GetRdmaProvider().GetRdmaDevicesForAuxdev(r.deviceID)
default:
return make([]string, 0)
}
}

func (r *rdmaSpec) GetRdmaDeviceSpec() []*pluginapi.DeviceSpec {
rdmaResources := r.getRdmaResources()
deviceSpec := make([]*pluginapi.DeviceSpec, 0)
isSupportRdma := false
if len(rdmaResources) > 0 {
isSupportRdma = true
for _, res := range rdmaResources {
resRdmaDevices := utils.GetRdmaProvider().GetRdmaCharDevices(res)
for _, rdmaDevice := range resRdmaDevices {
Expand All @@ -45,26 +92,5 @@ func newRdmaSpec(rdmaResources []string) types.RdmaSpec {
}
}
}

return &rdmaSpec{isSupportRdma: isSupportRdma, deviceSpec: deviceSpec}
}

// NewRdmaSpec returns the RdmaSpec for PCI address
func NewRdmaSpec(pciAddr string) types.RdmaSpec {
rdmaResources := utils.GetRdmaProvider().GetRdmaDevicesForPcidev(pciAddr)
return newRdmaSpec(rdmaResources)
}

// NewAuxRdmaSpec returns the RdmaSpec for auxiliary device ID
func NewAuxRdmaSpec(deviceID string) types.RdmaSpec {
rdmaResources := utils.GetRdmaProvider().GetRdmaDevicesForAuxdev(deviceID)
return newRdmaSpec(rdmaResources)
}

func (r *rdmaSpec) IsRdma() bool {
return r.isSupportRdma
}

func (r *rdmaSpec) GetRdmaDeviceSpec() []*pluginapi.DeviceSpec {
return r.deviceSpec
return deviceSpec
}
49 changes: 46 additions & 3 deletions pkg/devices/rdma_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"

"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/devices"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils/mocks"
)
Expand All @@ -31,16 +32,58 @@ var _ = Describe("RdmaSpec", func() {
Describe("creating new RdmaSpec", func() {
t := GinkgoT()
Context("successfully", func() {
It("without device specs", func() {
It("without device specs, without netlink enable_rdma param", func() {
rollandf marked this conversation as resolved.
Show resolved Hide resolved
mockProvider := &mocks.NetlinkProvider{}
mockProvider.On("HasRdmaParam", "pci", "0000:00:00.0").Return(false, nil)
utils.SetNetlinkProviderInst(mockProvider)
fakeRdmaProvider := mocks.RdmaProvider{}
fakeRdmaProvider.On("GetRdmaDevicesForPcidev", "0000:00:00.0").Return([]string{})
utils.SetRdmaProviderInst(&fakeRdmaProvider)
spec := devices.NewRdmaSpec("0000:00:00.0")
spec := devices.NewRdmaSpec(types.NetDeviceType, "0000:00:00.0")

Expect(spec.IsRdma()).To(BeFalse())
Expect(spec.GetRdmaDeviceSpec()).To(HaveLen(0))
fakeRdmaProvider.AssertExpectations(t)
})
It("without device specs, with netlink enable_rdma param", func() {
mockProvider := &mocks.NetlinkProvider{}
mockProvider.On("HasRdmaParam", "pci", "0000:00:00.0").Return(true, nil)
utils.SetNetlinkProviderInst(mockProvider)
fakeRdmaProvider := mocks.RdmaProvider{}
fakeRdmaProvider.On("GetRdmaDevicesForPcidev", "0000:00:00.0").Return([]string{})
utils.SetRdmaProviderInst(&fakeRdmaProvider)
spec := devices.NewRdmaSpec(types.NetDeviceType, "0000:00:00.0")

Expect(spec.IsRdma()).To(BeTrue())
Expect(spec.GetRdmaDeviceSpec()).To(HaveLen(0))
fakeRdmaProvider.AssertExpectations(t)
})
It("aux without device specs, without netlink enable_rdma param", func() {
mockProvider := &mocks.NetlinkProvider{}
mockProvider.On("HasRdmaParam", "auxiliary", "mlx5_core.sf.4").Return(false, nil)
utils.SetNetlinkProviderInst(mockProvider)
fakeRdmaProvider := mocks.RdmaProvider{}
fakeRdmaProvider.On("GetRdmaDevicesForAuxdev", "mlx5_core.sf.4").Return([]string{})
utils.SetRdmaProviderInst(&fakeRdmaProvider)
spec := devices.NewRdmaSpec(types.AuxNetDeviceType, "mlx5_core.sf.4")

Expect(spec.IsRdma()).To(BeFalse())
Expect(spec.GetRdmaDeviceSpec()).To(HaveLen(0))
fakeRdmaProvider.AssertExpectations(t)
})
It("aux without device specs, with netlink enable_rdma param", func() {
mockProvider := &mocks.NetlinkProvider{}
mockProvider.On("HasRdmaParam", "auxiliary", "mlx5_core.sf.4").Return(true, nil)
utils.SetNetlinkProviderInst(mockProvider)
fakeRdmaProvider := mocks.RdmaProvider{}
fakeRdmaProvider.On("GetRdmaDevicesForAuxdev", "mlx5_core.sf.4").Return([]string{})
utils.SetRdmaProviderInst(&fakeRdmaProvider)
spec := devices.NewRdmaSpec(types.AuxNetDeviceType, "mlx5_core.sf.4")

Expect(spec.IsRdma()).To(BeTrue())
Expect(spec.GetRdmaDeviceSpec()).To(HaveLen(0))
fakeRdmaProvider.AssertExpectations(t)
})
It("with device specs", func() {
fakeRdmaProvider := mocks.RdmaProvider{}
fakeRdmaProvider.On("GetRdmaDevicesForPcidev", "0000:00:00.0").
Expand All @@ -50,7 +93,7 @@ var _ = Describe("RdmaSpec", func() {
"/dev/infiniband/uverbs0", "/dev/infiniband/rdma_cm",
}).On("GetRdmaCharDevices", "fake_1").Return([]string{"/dev/infiniband/rdma_cm"})
utils.SetRdmaProviderInst(&fakeRdmaProvider)
spec := devices.NewRdmaSpec("0000:00:00.0")
spec := devices.NewRdmaSpec(types.NetDeviceType, "0000:00:00.0")

Expect(spec.IsRdma()).To(BeTrue())
Expect(spec.GetRdmaDeviceSpec()).To(Equal([]*pluginapi.DeviceSpec{
Expand Down
10 changes: 1 addition & 9 deletions pkg/factory/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,15 +163,7 @@ func (rf *resourceFactory) GetResourcePool(rc *types.ResourceConfig, filteredDev
}

func (rf *resourceFactory) GetRdmaSpec(dt types.DeviceType, deviceID string) types.RdmaSpec {
//nolint: exhaustive
switch dt {
case types.NetDeviceType:
return devices.NewRdmaSpec(deviceID)
case types.AuxNetDeviceType:
return devices.NewAuxRdmaSpec(deviceID)
default:
return nil
}
return devices.NewRdmaSpec(dt, deviceID)
}

func (rf *resourceFactory) GetVdpaDevice(pciAddr string) types.VdpaDevice {
Expand Down
6 changes: 6 additions & 0 deletions pkg/factory/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types/mocks"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
utilmocks "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils/mocks"

. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/mock"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
)

Expand Down Expand Up @@ -606,6 +608,10 @@ var _ = Describe("Factory", func() {
)
Describe("getting rdma spec", func() {
Context("check c rdma spec", func() {
mockProvider := &utilmocks.NetlinkProvider{}
mockProvider.On("HasRdmaParam", mock.AnythingOfType("string"),
mock.AnythingOfType("string")).Return(false, nil)
utils.SetNetlinkProviderInst(mockProvider)
f := factory.NewResourceFactory("fake", "fake", true, false)
rs1 := f.GetRdmaSpec(types.NetDeviceType, "0000:00:00.1")
rs2 := f.GetRdmaSpec(types.AcceleratorType, "0000:00:00.2")
Expand Down
32 changes: 30 additions & 2 deletions pkg/utils/mocks/NetlinkProvider.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions pkg/utils/netlink_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ type NetlinkProvider interface {
GetIPv4RouteList(ifName string) ([]nl.Route, error)
// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map
GetDevlinkGetDeviceInfoByNameAsMap(bus, device string) (map[string]string, error)
// HasRdmaParam returns true if device has "enable_rdma" param
HasRdmaParam(bus, pciAddr string) (bool, error)
}

type defaultNetlinkProvider struct {
Expand All @@ -48,6 +50,26 @@ func GetNetlinkProvider() NetlinkProvider {
return netlinkProvider
}

// HasRdmaParam returns true if device has "enable_rdma" param
// equivalent to "devlink dev param show pci/0000:d8:01.1 name enable_rdma"
// or "devlink dev param show auxiliary/mlx5_core.sf.4 name enable_rdma"
func (defaultNetlinkProvider) HasRdmaParam(bus, deviceID string) (bool, error) {
param, err := nl.DevlinkGetDeviceParamByName(bus, deviceID, "enable_rdma")
if err != nil {
return false, fmt.Errorf("error getting enable_rdma attribute for device %s on bus %s %v",
deviceID, bus, err)
}
if len(param.Values) == 0 || param.Values[0].Data == nil {
return false, nil
}
var boolValue bool
boolValue, ok := param.Values[0].Data.(bool)
if !ok {
return false, fmt.Errorf("value is not a bool")
}
return boolValue, nil
}

// GetLinkAttrs returns a net device's link attributes.
func (defaultNetlinkProvider) GetLinkAttrs(ifName string) (*nl.LinkAttrs, error) {
link, err := nl.LinkByName(ifName)
Expand Down
11 changes: 11 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,17 @@ func GetPfEswitchMode(pciAddr string) (string, error) {
return devLinkDeviceAttrs.Mode, nil
}

// HasRdmaParam returns true if deviceID has "enable_rdma" param
// for example: pci 0000:d8:01.1
// or auxiliary mlx5_core.sf.4
func HasRdmaParam(bus, deviceID string) (bool, error) {
rdma, err := GetNetlinkProvider().HasRdmaParam(bus, deviceID)
if err != nil {
return false, err
}
return rdma, nil
}

// HasDefaultRoute returns true if PCI network device is default route interface
func HasDefaultRoute(pciAddr string) (bool, error) {
// Get net interface name
Expand Down
Loading