Skip to content

Commit

Permalink
CDI implementation
Browse files Browse the repository at this point in the history
This commit implements Container Device Interface [1] support.

[1] https://github.com/container-orchestrated-devices/container-device-interface
  • Loading branch information
e0ne committed Jun 30, 2023
1 parent b3aea76 commit 97d01f7
Show file tree
Hide file tree
Showing 89 changed files with 10,838 additions and 201 deletions.
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ GO2XUNIT = $(GOBIN)/go2xunit
GOMOCKERY = $(GOBIN)/mockery
# Package info
BINARY_NAME=sriovdp
CDI_BINARY_NAME=cdi-service
PACKAGE=sriov-network-device-plugin
ORG_PATH=github.com/k8snetworkplumbingwg
# Build info
Expand Down Expand Up @@ -73,6 +74,12 @@ build: $(BUILDDIR)/$(BINARY_NAME) | ; $(info Building $(BINARY_NAME)...) @ ## Bu
$(BUILDDIR)/$(BINARY_NAME): $(GOFILES) | $(BUILDDIR)
@cd $(BASE)/cmd/$(BINARY_NAME) && CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILDDIR)/$(BINARY_NAME) -tags no_openssl -v

build-cdi: $(BUILDDIR)/$(CDI_BINARY_NAME) | ; $(info Building $(CDI_BINARY_NAME)...) @ ## Build SR-IOV CDI Service
$(info Done!)

$(BUILDDIR)/$(CDI_BINARY_NAME): $(GOFILES) | $(BUILDDIR)
@cd $(BASE)/cmd/$(CDI_BINARY_NAME) && CGO_ENABLED=0 go build $(LDFLAGS) -o $(BUILDDIR)/$(CDI_BINARY_NAME) -tags no_openssl -v

$(GOLINT): | $(BASE) ; $(info building golint...)
$Q go install golang.org/x/lint/golint@latest

Expand Down
42 changes: 42 additions & 0 deletions cmd/cdi-service/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2018 Intel Corp. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"flag"
"github.com/golang/glog"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/cmd"
)

func main() {
cp := &cmd.CliParams{}
cmd.FlagInit(cp)
flag.Parse()
rm := cmd.NewResourceManager(cp)

err := rm.Init()
if err != nil {
glog.Fatalf("error initialization resources manager %v", err)
return
}

if err := rm.StoreConfig(); err != nil {
glog.Errorf("can not create config file %v", err)
}
if err := rm.CreateCdiSpec(); err != nil {
glog.Errorf("can not create CDI spec %v", err)
}
return
}
196 changes: 168 additions & 28 deletions cmd/sriovdp/manager.go → cmd/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,68 +12,121 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package main
package cmd

import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"path/filepath"

"github.com/golang/glog"
"github.com/jaypipes/ghw"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/cdi"

"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/factory"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
"github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils"
)

const (
socketSuffix = "sock"
defaultConfig = "/etc/pcidp/config.json"
socketSuffix = "sock"
)

type cliParams struct {
configFile string
resourcePrefix string
// CliParams presents CLI parameters for SR-IOV Network Device Plugin
type CliParams struct {
ConfigFile string
ResourcePrefix string
UseCdi bool
HostConfigDir string
Systemd bool
}

type resourceManager struct {
cliParams
// ResourceManager manages resources for SR-IOV Network Device Plugin binaries
type ResourceManager struct {
CliParams
pluginWatchMode bool
rFactory types.ResourceFactory
configList []*types.ResourceConfig
ConfigList []*types.ResourceConfig
resourceServers []types.ResourceServer
deviceProviders map[types.DeviceType]types.DeviceProvider
}

func newResourceManager(cp *cliParams) *resourceManager {
// FlagInit parse command line flags
func FlagInit(cp *CliParams) {
flag.StringVar(&cp.ConfigFile, "config-file", defaultConfig,
"JSON device pool config file location")
flag.StringVar(&cp.ResourcePrefix, "resource-prefix", "intel.com",
"resource name prefix used for K8s extended re"+
"source")
flag.BoolVar(&cp.UseCdi, "use-cdi", true,
"Use Container Device Interface to expose devices in containers")
flag.StringVar(&cp.HostConfigDir, "host-config-dir", "/host/etc/pcidp/",
"Device Plugin config directory on a host")
flag.BoolVar(&cp.Systemd, "systemd", false,
"Run as systemd service to keep CDI configuration consistent")
}

// NewResourceManager initiates a new instance of NewResourceManager
func NewResourceManager(cp *CliParams) *ResourceManager {
pluginWatchMode := utils.DetectPluginWatchMode(types.SockDir)
if pluginWatchMode {
glog.Infof("Using Kubelet Plugin Registry Mode")
} else {
glog.Infof("Using Deprecated Device Plugin Registry Path")
}

rf := factory.NewResourceFactory(cp.resourcePrefix, socketSuffix, pluginWatchMode)
rf := factory.NewResourceFactory(cp.ResourcePrefix, socketSuffix, pluginWatchMode, cp.UseCdi)
dp := make(map[types.DeviceType]types.DeviceProvider)
for k := range types.SupportedDevices {
dp[k] = rf.GetDeviceProvider(k)
}

return &resourceManager{
cliParams: *cp,
return &ResourceManager{
CliParams: *cp,
pluginWatchMode: pluginWatchMode,
rFactory: rf,
deviceProviders: dp,
}
}

// readConfig reads and validate configurations from Config file
func (rm *resourceManager) readConfig() error {
// Init common data for Device Plugin and CDI service
func (rm *ResourceManager) Init() error {
glog.Infof("resource manager reading configs")
if err := rm.ReadConfig(); err != nil {
glog.Errorf("error getting resources from file %v", err)
return err
}

if len(rm.ConfigList) < 1 {
glog.Errorf("no resource configuration")
return fmt.Errorf("no resource configuration")
}

// Validate configs
if !rm.ValidConfigs() {
glog.Fatalf("Exiting.. one or more invalid configuration(s) given")
return fmt.Errorf("one or more invalid configuration(s) given")
}
glog.Infof("Discovering host devices")
if err := rm.DiscoverHostDevices(); err != nil {
glog.Errorf("error discovering host devices%v", err)
return err
}

return nil
}

// ReadConfig reads and validate configurations from Config file
func (rm *ResourceManager) ReadConfig() error {
resources := &types.ResourceConfList{}
rawBytes, err := os.ReadFile(rm.configFile)
rawBytes, err := os.ReadFile(rm.ConfigFile)

if err != nil {
return fmt.Errorf("error reading file %s, %v", rm.configFile, err)
return fmt.Errorf("error reading file %s, %v", rm.ConfigFile, err)
}

glog.Infof("raw ResourceList: %s", rawBytes)
Expand All @@ -90,7 +143,7 @@ func (rm *resourceManager) readConfig() error {
return fmt.Errorf("unsupported deviceType: \"%s\"", conf.DeviceType)
}
if conf.SelectorObjs, err = rm.rFactory.GetDeviceFilter(conf); err == nil {
rm.configList = append(rm.configList, &resources.ResourceList[i])
rm.ConfigList = append(rm.ConfigList, &resources.ResourceList[i])
} else {
glog.Warningf("unable to get SelectorObj from selectors list:'%s' for deviceType: %s error: %s",
*conf.Selectors, conf.DeviceType, err)
Expand All @@ -100,11 +153,85 @@ func (rm *resourceManager) readConfig() error {
return nil
}

func (rm *resourceManager) initServers() error {
// StoreConfig saves SR-IOV Device Plugin config into the file
func (rm *ResourceManager) StoreConfig() error {
filename := filepath.Base(rm.ConfigFile)
source, err := os.Open(rm.ConfigFile)
if err != nil {
glog.Errorf("storeConfig(): error creating reading config file: %v", err)
return err
}
defer source.Close()

destination, err := os.Create(filepath.Join(rm.HostConfigDir, filename))
if err != nil {
glog.Errorf("storeConfig(): error creating creating config file on host: %v", err)
return err
}
defer destination.Close()

_, err = io.Copy(destination, source)
if err != nil {
glog.Errorf("storeConfig(): error during copy config file to host: %v", err)
return err
}
return nil
}

// CreateCdiSpec creates CDI definition for container runtime
func (rm *ResourceManager) CreateCdiSpec() error {
glog.Infof("number of config: %d\n", len(rm.ConfigList))
deviceAllocated := make(map[string]bool)
for _, rc := range rm.ConfigList {
// Create new ResourcePool
glog.Infof("")
glog.Infof("Creating new ResourcePool: %s", rc.ResourceName)
glog.Infof("DeviceType: %+v", rc.DeviceType)
dp, ok := rm.deviceProviders[rc.DeviceType]
if !ok {
glog.Infof("Unable to get device provider from deviceType: %s", rc.DeviceType)
return fmt.Errorf("error getting device provider")
}

filteredDevices := make([]types.HostDevice, 0)

for index := range rc.SelectorObjs {
devices := dp.GetDevices(rc, index)
partialFilteredDevices, err := dp.GetFilteredDevices(devices, rc, index)
if err != nil {
glog.Errorf("InitServers(): error getting filtered devices for config %+v: %q", rc, err)
}
partialFilteredDevices = rm.excludeAllocatedDevices(partialFilteredDevices, deviceAllocated)
glog.Infof("InitServers(): selector index %d will register %d devices", index, len(partialFilteredDevices))
filteredDevices = append(filteredDevices, partialFilteredDevices...)
}

if len(filteredDevices) < 1 {
glog.Infof("no devices in device pool, skipping creating resource server for %s", rc.ResourceName)
continue
}

rPool, err := rm.rFactory.GetResourcePool(rc, filteredDevices)
if err != nil {
glog.Errorf("createCdiSpec(): error creating ResourcePool with config %+v: %q", rc, err)
return err
}

err = cdi.CreateCDISpec(rm.ResourcePrefix, filteredDevices, rPool)
if err != nil {
glog.Errorf("createCdiSpec(): error creating CDI spec: %v", err)
return err
}
}
return nil
}

// InitServers initializes device providers and resource pools
func (rm *ResourceManager) InitServers() error {
rf := rm.rFactory
glog.Infof("number of config: %d\n", len(rm.configList))
glog.Infof("number of config: %d\n", len(rm.ConfigList))
deviceAllocated := make(map[string]bool)
for _, rc := range rm.configList {
for _, rc := range rm.ConfigList {
// Create new ResourcePool
glog.Infof("")
glog.Infof("Creating new ResourcePool: %s", rc.ResourceName)
Expand All @@ -131,11 +258,21 @@ func (rm *resourceManager) initServers() error {
glog.Infof("no devices in device pool, skipping creating resource server for %s", rc.ResourceName)
continue
}

rPool, err := rm.rFactory.GetResourcePool(rc, filteredDevices)
if err != nil {
glog.Errorf("initServers(): error creating ResourcePool with config %+v: %q", rc, err)
return err
}

if rm.UseCdi {
err = cdi.CreateCDISpec(rm.ResourcePrefix, filteredDevices, rPool)
if err != nil {
glog.Errorf("initServers(): error creating CDI spec: %v", err)
return err
}
}

// Create ResourceServer with this ResourcePool
s, err := rf.GetResourceServer(rPool)
if err != nil {
Expand All @@ -148,7 +285,7 @@ func (rm *resourceManager) initServers() error {
return nil
}

func (rm *resourceManager) excludeAllocatedDevices(filteredDevices []types.HostDevice, deviceAllocated map[string]bool) []types.HostDevice {
func (rm *ResourceManager) excludeAllocatedDevices(filteredDevices []types.HostDevice, deviceAllocated map[string]bool) []types.HostDevice {
filteredDevicesTemp := []types.HostDevice{}
for _, dev := range filteredDevices {
if !deviceAllocated[dev.GetDeviceID()] {
Expand All @@ -161,7 +298,8 @@ func (rm *resourceManager) excludeAllocatedDevices(filteredDevices []types.HostD
return filteredDevicesTemp
}

func (rm *resourceManager) startAllServers() error {
// StartAllServers start all Device Plugin resource servers
func (rm *ResourceManager) StartAllServers() error {
for _, rs := range rm.resourceServers {
if err := rs.Start(); err != nil {
return err
Expand All @@ -175,7 +313,8 @@ func (rm *resourceManager) startAllServers() error {
return nil
}

func (rm *resourceManager) stopAllServers() error {
// StopAllServers stops all Device Plugin resource servers
func (rm *ResourceManager) StopAllServers() error {
for _, rs := range rm.resourceServers {
if err := rs.Stop(); err != nil {
return err
Expand All @@ -184,19 +323,19 @@ func (rm *resourceManager) stopAllServers() error {
return nil
}

// Validate configurations
func (rm *resourceManager) validConfigs() bool {
// ValidConfigs validate configurations
func (rm *ResourceManager) ValidConfigs() bool {
resourceNames := make(map[string]string) // resource names placeholder

for _, conf := range rm.configList {
for _, conf := range rm.ConfigList {
// check if name contains acceptable characters
if !utils.ValidResourceName(conf.ResourceName) {
glog.Errorf("resource name \"%s\" contains invalid characters", conf.ResourceName)
return false
}

// resourcePrefix might be overridden for a given resource pool
resourcePrefix := rm.cliParams.resourcePrefix
resourcePrefix := rm.CliParams.ResourcePrefix
if conf.ResourcePrefix != "" {
resourcePrefix = conf.ResourcePrefix
}
Expand Down Expand Up @@ -229,7 +368,8 @@ func (rm *resourceManager) validConfigs() bool {
return true
}

func (rm *resourceManager) discoverHostDevices() error {
// DiscoverHostDevices finds SR-IOV devices on host
func (rm *ResourceManager) DiscoverHostDevices() error {
pci, err := ghw.PCI()
if err != nil {
return fmt.Errorf("discoverDevices(): error getting PCI info: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion cmd/sriovdp/manager_test.go → cmd/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package main
package cmd

import (
"fmt"
Expand Down
Loading

0 comments on commit 97d01f7

Please sign in to comment.