diff --git a/cmd/cni/cni.go b/cmd/cni/cni.go index 6f46c78c2bf..4e1ac4da79a 100644 --- a/cmd/cni/cni.go +++ b/cmd/cni/cni.go @@ -47,19 +47,20 @@ func cmdAdd(args *skel.CmdArgs) error { client := request.NewCniServerClient(netConf.ServerSocket) response, err := client.Add(request.CniRequest{ - CniType: netConf.Type, - PodName: podName, - PodNamespace: podNamespace, - ContainerID: args.ContainerID, - NetNs: args.Netns, - IfName: args.IfName, - Provider: netConf.Provider, - Routes: netConf.Routes, - DNS: netConf.DNS, - DeviceID: netConf.DeviceID, - VfDriver: netConf.VfDriver, - VhostUserSocketVolumeName: netConf.VhostUserSocketVolumeName, - VhostUserSocketName: netConf.VhostUserSocketName, + CniType: netConf.Type, + PodName: podName, + PodNamespace: podNamespace, + ContainerID: args.ContainerID, + NetNs: args.Netns, + IfName: args.IfName, + Provider: netConf.Provider, + Routes: netConf.Routes, + DNS: netConf.DNS, + DeviceID: netConf.DeviceID, + VfDriver: netConf.VfDriver, + VhostUserSocketVolumeName: netConf.VhostUserSocketVolumeName, + VhostUserSocketName: netConf.VhostUserSocketName, + VhostUserSocketConsumption: netConf.VhostUserSocketConsumption, }) if err != nil { return types.NewError(types.ErrTryAgainLater, "RPC failed", err.Error()) @@ -151,15 +152,16 @@ func cmdDel(args *skel.CmdArgs) error { } err = client.Del(request.CniRequest{ - CniType: netConf.Type, - PodName: podName, - PodNamespace: podNamespace, - ContainerID: args.ContainerID, - NetNs: args.Netns, - IfName: args.IfName, - Provider: netConf.Provider, - DeviceID: netConf.DeviceID, - VhostUserSocketVolumeName: netConf.VhostUserSocketVolumeName, + CniType: netConf.Type, + PodName: podName, + PodNamespace: podNamespace, + ContainerID: args.ContainerID, + NetNs: args.Netns, + IfName: args.IfName, + Provider: netConf.Provider, + DeviceID: netConf.DeviceID, + VhostUserSocketVolumeName: netConf.VhostUserSocketVolumeName, + VhostUserSocketConsumption: netConf.VhostUserSocketConsumption, }) if err != nil { return types.NewError(types.ErrTryAgainLater, "RPC failed", err.Error()) diff --git a/cmd/cni/netconf.go b/cmd/cni/netconf.go index 91e45701a3a..dd0f6c0d46a 100644 --- a/cmd/cni/netconf.go +++ b/cmd/cni/netconf.go @@ -19,8 +19,9 @@ type netConf struct { DeviceID string `json:"deviceID"` VfDriver string `json:"vf_driver"` // for dpdk - VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` - VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` + VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketConsumption string `json:"vhost_user_socket_consumption"` } func (n *netConf) postLoad() { diff --git a/cmd/cni/netconf_windows.go b/cmd/cni/netconf_windows.go index ddcd2b557e7..6dadc8ed85d 100644 --- a/cmd/cni/netconf_windows.go +++ b/cmd/cni/netconf_windows.go @@ -16,8 +16,9 @@ type netConf struct { DeviceID string `json:"deviceID"` VfDriver string `json:"vf_driver"` // for dpdk - VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` - VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` + VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketConsumption string `json:"vhost_user_socket_consumption"` } func (n *netConf) postLoad() { diff --git a/pkg/daemon/handler.go b/pkg/daemon/handler.go index b8a589fd10c..939684fcadc 100644 --- a/pkg/daemon/handler.go +++ b/pkg/daemon/handler.go @@ -145,12 +145,17 @@ func (csh cniServerHandler) handleAdd(req *restful.Request, resp *restful.Respon ifName = "eth0" } + // For Support kubevirt hotplug dpdk nic, forbidden set the volume name + if podRequest.VhostUserSocketConsumption == util.ConsumptionKubevirt { + podRequest.VhostUserSocketVolumeName = util.VhostUserSocketVolumeName + } + switch { case podRequest.DeviceID != "": nicType = util.OffloadType case podRequest.VhostUserSocketVolumeName != "": nicType = util.DpdkType - if err = createShortSharedDir(pod, podRequest.VhostUserSocketVolumeName, csh.Config.KubeletDir); err != nil { + if err = createShortSharedDir(pod, podRequest.VhostUserSocketVolumeName, podRequest.VhostUserSocketConsumption, csh.Config.KubeletDir); err != nil { klog.Error(err.Error()) if err = resp.WriteHeaderAndEntity(http.StatusInternalServerError, request.CniResponse{Err: err.Error()}); err != nil { klog.Errorf("failed to write response: %v", err) @@ -293,7 +298,7 @@ func (csh cniServerHandler) handleAdd(req *restful.Request, resp *restful.Respon case util.InternalType: podNicName, err = csh.configureNicWithInternalPort(podRequest.PodName, podRequest.PodNamespace, podRequest.Provider, podRequest.NetNs, podRequest.ContainerID, ifName, macAddr, mtu, ipAddr, gw, isDefaultRoute, detectIPConflict, routes, podRequest.DNS.Nameservers, podRequest.DNS.Search, ingress, egress, podRequest.DeviceID, nicType, latency, limit, loss, jitter, gatewayCheckMode, u2oInterconnectionIP) case util.DpdkType: - err = csh.configureDpdkNic(podRequest.PodName, podRequest.PodNamespace, podRequest.Provider, podRequest.NetNs, podRequest.ContainerID, ifName, macAddr, mtu, ipAddr, gw, ingress, egress, getShortSharedDir(pod.UID, podRequest.VhostUserSocketVolumeName), podRequest.VhostUserSocketName) + err = csh.configureDpdkNic(podRequest.PodName, podRequest.PodNamespace, podRequest.Provider, podRequest.NetNs, podRequest.ContainerID, ifName, macAddr, mtu, ipAddr, gw, ingress, egress, getShortSharedDir(pod.UID, podRequest.VhostUserSocketVolumeName), podRequest.VhostUserSocketName, podRequest.VhostUserSocketConsumption) default: err = csh.configureNic(podRequest.PodName, podRequest.PodNamespace, podRequest.Provider, podRequest.NetNs, podRequest.ContainerID, podRequest.VfDriver, ifName, macAddr, mtu, ipAddr, gw, isDefaultRoute, detectIPConflict, routes, podRequest.DNS.Nameservers, podRequest.DNS.Search, ingress, egress, podRequest.DeviceID, nicType, latency, limit, loss, jitter, gatewayCheckMode, u2oInterconnectionIP, oldPodName) } @@ -419,13 +424,18 @@ func (csh cniServerHandler) handleDel(req *restful.Request, resp *restful.Respon } } + // For Support kubevirt hotplug dpdk nic, forbidden set the volume name + if podRequest.VhostUserSocketConsumption == util.ConsumptionKubevirt { + podRequest.VhostUserSocketVolumeName = util.VhostUserSocketVolumeName + } + var nicType string switch { case podRequest.DeviceID != "": nicType = util.OffloadType case podRequest.VhostUserSocketVolumeName != "": nicType = util.DpdkType - if err = removeShortSharedDir(pod, podRequest.VhostUserSocketVolumeName); err != nil { + if err = removeShortSharedDir(pod, podRequest.VhostUserSocketVolumeName, podRequest.VhostUserSocketConsumption); err != nil { klog.Error(err.Error()) if err = resp.WriteHeaderAndEntity(http.StatusInternalServerError, request.CniResponse{Err: err.Error()}); err != nil { klog.Errorf("failed to write response: %v", err) diff --git a/pkg/daemon/handler_linux.go b/pkg/daemon/handler_linux.go index 9be5a988a00..909dad18ae6 100644 --- a/pkg/daemon/handler_linux.go +++ b/pkg/daemon/handler_linux.go @@ -22,7 +22,7 @@ func (csh cniServerHandler) validatePodRequest(_ *request.CniRequest) error { return nil } -func createShortSharedDir(pod *v1.Pod, volumeName, kubeletDir string) (err error) { +func createShortSharedDir(pod *v1.Pod, volumeName, socketConsumption, kubeletDir string) (err error) { var volume *v1.Volume for index, v := range pod.Spec.Volumes { if v.Name == volumeName { @@ -41,32 +41,50 @@ func createShortSharedDir(pod *v1.Pod, volumeName, kubeletDir string) (err error // set vhostuser dir 777 for qemu has the permission to create sock mask := syscall.Umask(0) defer syscall.Umask(mask) - if _, err = os.Stat(newSharedDir); os.IsNotExist(err) { - err = os.MkdirAll(newSharedDir, 0o777) - if err != nil { - return fmt.Errorf("createSharedDir: Failed to create dir (%s): %v", newSharedDir, err) - } - - if strings.Contains(newSharedDir, util.DefaultHostVhostuserBaseDir) { - klog.Infof("createSharedDir: Mount from %s to %s", originSharedDir, newSharedDir) - err = unix.Mount(originSharedDir, newSharedDir, "", unix.MS_BIND, "") + if _, err = os.Stat(newSharedDir); err != nil { + if os.IsNotExist(err) { + err = os.MkdirAll(newSharedDir, 0o777) if err != nil { - return fmt.Errorf("createSharedDir: Failed to bind mount: %s", err) + return fmt.Errorf("createSharedDir: Failed to create dir (%s): %v", newSharedDir, err) + } + + if strings.Contains(newSharedDir, util.DefaultHostVhostuserBaseDir) { + klog.Infof("createSharedDir: Mount from %s to %s", originSharedDir, newSharedDir) + err = unix.Mount(originSharedDir, newSharedDir, "", unix.MS_BIND, "") + if err != nil { + return fmt.Errorf("createSharedDir: Failed to bind mount: %s", err) + } } + return nil } - return nil + return err + } + if socketConsumption != util.ConsumptionKubevirt { + return fmt.Errorf("createSharedDir: voume name %s is exists", volumeName) } - return err + + return nil } -func removeShortSharedDir(pod *v1.Pod, volumeName string) (err error) { +func removeShortSharedDir(pod *v1.Pod, volumeName, socketConsumption string) (err error) { sharedDir := getShortSharedDir(pod.UID, volumeName) if _, err = os.Stat(sharedDir); os.IsNotExist(err) { - klog.Errorf("shared directory %s does not exist to unmount, %s", sharedDir, err) + klog.Infof("shared directory %s does not exist to unmount, %s", sharedDir, err) return nil } + // keep mount util dpdk sock not used by kuebvirt + if socketConsumption == util.ConsumptionKubevirt { + files, err := os.ReadDir(sharedDir) + if err != nil { + return fmt.Errorf("read file from dpdk share dir error: %s", err) + } + if len(files) != 0 { + return nil + } + } + foundMount, err := mountinfo.Mounted(sharedDir) if errors.Is(err, fs.ErrNotExist) || (err == nil && !foundMount) { klog.Infof("volume: %s not mounted, no need to unmount", sharedDir) diff --git a/pkg/daemon/handler_windows.go b/pkg/daemon/handler_windows.go index 70a32d143e9..4549a259213 100644 --- a/pkg/daemon/handler_windows.go +++ b/pkg/daemon/handler_windows.go @@ -19,12 +19,12 @@ func (csh cniServerHandler) validatePodRequest(req *request.CniRequest) error { return nil } -func createShortSharedDir(pod *v1.Pod, volumeName, kubeletDir string) error { +func createShortSharedDir(pod *v1.Pod, volumeName, socketConsumption, kubeletDir string) error { // nothing to do on Windows return nil } -func removeShortSharedDir(pod *v1.Pod, volumeName string) error { +func removeShortSharedDir(pod *v1.Pod, volumeName, socketConsumption string) error { // nothing to do on Windows return nil } diff --git a/pkg/daemon/ovs_linux.go b/pkg/daemon/ovs_linux.go index 6a2861f29e2..dea60507023 100644 --- a/pkg/daemon/ovs_linux.go +++ b/pkg/daemon/ovs_linux.go @@ -34,18 +34,24 @@ import ( var pciAddrRegexp = regexp.MustCompile(`\b([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}.\d{1}\S*)`) -func (csh cniServerHandler) configureDpdkNic(podName, podNamespace, provider, netns, containerID, ifName, _ string, _ int, ip, _, ingress, egress, shortSharedDir, socketName string) error { +func (csh cniServerHandler) configureDpdkNic(podName, podNamespace, provider, netns, containerID, ifName, _ string, _ int, ip, _, ingress, egress, shortSharedDir, socketName, socketConsumption string) error { sharedDir := filepath.Join("/var", shortSharedDir) hostNicName, _ := generateNicName(containerID, ifName) ipStr := util.GetIPWithoutMask(ip) ifaceID := ovs.PodNameToPortName(podName, podNamespace, provider) ovs.CleanDuplicatePort(ifaceID, hostNicName) - // Add veth pair host end to ovs port + + vhostServerPath := path.Join(sharedDir, socketName) + if socketConsumption == util.ConsumptionKubevirt { + vhostServerPath = path.Join(sharedDir, ifName) + } + + // Add vhostuser host end to ovs port output, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", hostNicName, "--", "set", "interface", hostNicName, "type=dpdkvhostuserclient", - fmt.Sprintf("options:vhost-server-path=%s", path.Join(sharedDir, socketName)), + fmt.Sprintf("options:vhost-server-path=%s", vhostServerPath), fmt.Sprintf("external_ids:iface-id=%s", ifaceID), fmt.Sprintf("external_ids:pod_name=%s", podName), fmt.Sprintf("external_ids:pod_namespace=%s", podNamespace), @@ -1559,7 +1565,7 @@ func turnOffNicTxChecksum(nicName string) (err error) { } func getShortSharedDir(uid types.UID, volumeName string) string { - return filepath.Join(util.DefaultHostVhostuserBaseDir, string(uid), volumeName) + return filepath.Clean(filepath.Join(util.DefaultHostVhostuserBaseDir, string(uid), volumeName)) } func linkExists(name string) (bool, error) { diff --git a/pkg/daemon/ovs_windows.go b/pkg/daemon/ovs_windows.go index 73b5a5de94c..b8b3ecd4bcb 100644 --- a/pkg/daemon/ovs_windows.go +++ b/pkg/daemon/ovs_windows.go @@ -18,7 +18,7 @@ import ( "github.com/kubeovn/kube-ovn/pkg/util" ) -func (csh cniServerHandler) configureDpdkNic(podName, podNamespace, provider, netns, containerID, ifName, mac string, mtu int, ip, gateway, ingress, egress, sharedDir, socketName string) error { +func (csh cniServerHandler) configureDpdkNic(podName, podNamespace, provider, netns, containerID, ifName, mac string, mtu int, ip, gateway, ingress, egress, sharedDir, socketName, socketConsumption string) error { return errors.New("DPDK is not supported on Windows") } diff --git a/pkg/request/cniserver.go b/pkg/request/cniserver.go index 20ce903a66d..2d3912ab32e 100644 --- a/pkg/request/cniserver.go +++ b/pkg/request/cniserver.go @@ -34,8 +34,9 @@ type CniRequest struct { DeviceID string `json:"deviceID"` // dpdk // empty dir volume for sharing vhost user unix socket - VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` - VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketVolumeName string `json:"vhost_user_socket_volume_name"` + VhostUserSocketName string `json:"vhost_user_socket_name"` + VhostUserSocketConsumption string `json:"vhost_user_socket_consumption"` } // CniResponse is the cniserver response format diff --git a/pkg/util/const.go b/pkg/util/const.go index f22645ede4b..d790be68690 100644 --- a/pkg/util/const.go +++ b/pkg/util/const.go @@ -282,4 +282,7 @@ const ( TProxyPreroutingMask = 0x90004 HealthCheckNamedVipTemplate = "%s:%s" // ip name, health check vip + + ConsumptionKubevirt = "kubevirt" + VhostUserSocketVolumeName = "vhostuser-sockets" )