Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix gateway node check for centralized ecmp subnets #4847

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ dist/images/test-server
dist/images/kube-ovn
dist/images/kube-ovn-cmd
dist/images/kube-ovn-daemon
dist/images/kube-ovn-controller
dist/images/kube-ovn-pinger
dist/images/kube-ovn-webhook
dist/windows/kube-ovn.exe
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ build-go:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/test-server -v ./test/server

.PHONY: build-go-windows
Expand All @@ -131,7 +131,7 @@ build-go-arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller

.PHONY: build-kube-ovn
build-kube-ovn: build-debug build-go
Expand Down
1 change: 1 addition & 0 deletions charts/kube-ovn/templates/controller-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ spec:
capabilities:
add:
- NET_BIND_SERVICE
- NET_RAW
env:
- name: ENABLE_SSL
value: "{{ .Values.networking.ENABLE_SSL }}"
Expand Down
5 changes: 0 additions & 5 deletions cmd/cmdmain.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (

"k8s.io/klog/v2"

"github.com/kubeovn/kube-ovn/cmd/controller"
"github.com/kubeovn/kube-ovn/cmd/health_check"
"github.com/kubeovn/kube-ovn/cmd/ovn_ic_controller"
"github.com/kubeovn/kube-ovn/cmd/ovn_leader_checker"
Expand All @@ -22,7 +21,6 @@ import (
)

const (
CmdController = "kube-ovn-controller"
CmdMonitor = "kube-ovn-monitor"
CmdSpeaker = "kube-ovn-speaker"
CmdWebhook = "kube-ovn-webhook"
Expand Down Expand Up @@ -91,9 +89,6 @@ func dumpProfile() {
func main() {
cmd := filepath.Base(os.Args[0])
switch cmd {
case CmdController:
dumpProfile()
controller.CmdMain()
case CmdMonitor:
dumpProfile()
ovn_monitor.CmdMain()
Expand Down
92 changes: 92 additions & 0 deletions cmd/controller/cmdmain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package main

import (
"fmt"
"os"
"os/signal"
"path/filepath"
"runtime/pprof"
"syscall"
"time"

"k8s.io/klog/v2"

"github.com/kubeovn/kube-ovn/cmd/pinger"
"github.com/kubeovn/kube-ovn/pkg/util"
)

const (
CmdController = "kube-ovn-controller"
CmdPinger = "kube-ovn-pinger"
)

const timeFormat = "2006-01-02_15:04:05"

func dumpProfile() {
ch1 := make(chan os.Signal, 1)
ch2 := make(chan os.Signal, 1)
signal.Notify(ch1, syscall.SIGUSR1)
signal.Notify(ch2, syscall.SIGUSR2)
go func() {
for {
<-ch1
name := fmt.Sprintf("cpu-profile-%s.pprof", time.Now().Format(timeFormat))
path := filepath.Join(os.TempDir(), name)
f, err := os.Create(path) // #nosec G303,G304
if err != nil {
klog.Errorf("failed to create cpu profile file: %v", err)
return
}
if err = pprof.StartCPUProfile(f); err != nil {
klog.Errorf("failed to start cpu profile: %v", err)
if err = f.Close(); err != nil {
klog.Errorf("failed to close file %q: %v", path, err)
}
return
}
time.Sleep(30 * time.Second)
pprof.StopCPUProfile()
if err = f.Close(); err != nil {
klog.Errorf("failed to close file %q: %v", path, err)
return
}
}
}()
go func() {
for {
<-ch2
name := fmt.Sprintf("mem-profile-%s.pprof", time.Now().Format(timeFormat))
path := filepath.Join(os.TempDir(), name)
f, err := os.Create(path) // #nosec G303,G304
if err != nil {
klog.Errorf("failed to create memory profile file: %v", err)
return
}
if err = pprof.WriteHeapProfile(f); err != nil {
klog.Errorf("failed to write memory profile file: %v", err)
if err = f.Close(); err != nil {
klog.Errorf("failed to close file %q: %v", path, err)
}
return
}
if err = f.Close(); err != nil {
klog.Errorf("failed to close file %q: %v", path, err)
return
}
}
}()
}

func main() {
cmd := filepath.Base(os.Args[0])
switch cmd {
case CmdController:
dumpProfile()
CmdMain()
case CmdPinger:
dumpProfile()
pinger.CmdMain()
default:
util.LogFatalAndExit(nil, "%s is an unknown command", cmd)
}
}
2 changes: 1 addition & 1 deletion cmd/controller/controller.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package controller
package main

import (
"context"
Expand Down
4 changes: 2 additions & 2 deletions cmd/pinger/pinger.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package pinger

import (
_ "net/http/pprof" // #nosec
Expand All @@ -14,7 +14,7 @@ import (
"github.com/kubeovn/kube-ovn/versions"
)

func main() {
func CmdMain() {
defer klog.Flush()

klog.Info(versions.String())
Expand Down
8 changes: 4 additions & 4 deletions dist/images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ COPY 01-kube-ovn.conflist /kube-ovn/01-kube-ovn.conflist
COPY kube-ovn /kube-ovn/kube-ovn
COPY kube-ovn-cmd /kube-ovn/kube-ovn-cmd
COPY kube-ovn-daemon /kube-ovn/kube-ovn-daemon
COPY kube-ovn-pinger /kube-ovn/kube-ovn-pinger
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-controller && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
COPY kube-ovn-controller /kube-ovn/kube-ovn-controller
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-speaker && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-webhook && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-healthcheck && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-leader-checker && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-ic-controller && \
ln -s /kube-ovn/kube-ovn-controller /kube-ovn/kube-ovn-pinger && \
setcap CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-cmd && \
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-pinger && \
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-controller && \
setcap CAP_NET_ADMIN,CAP_NET_RAW,CAP_NET_BIND_SERVICE,CAP_SYS_ADMIN+eip /kube-ovn/kube-ovn-daemon

FROM kubeovn/kube-ovn-base:$BASE_TAG
Expand Down
1 change: 1 addition & 0 deletions dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4733,6 +4733,7 @@ spec:
capabilities:
add:
- NET_BIND_SERVICE
- NET_RAW
env:
- name: ENABLE_SSL
value: "$ENABLE_SSL"
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,7 @@ func (c *Controller) startWorkers(ctx context.Context) {

go wait.Until(c.resyncProviderNetworkStatus, 30*time.Second, ctx.Done())
go wait.Until(c.exportSubnetMetrics, 30*time.Second, ctx.Done())
go wait.Until(c.CheckGatewayReady, 5*time.Second, ctx.Done())
go wait.Until(c.checkSubnetGateway, 5*time.Second, ctx.Done())

go wait.Until(runWorker("add ovn eip", c.addOvnEipQueue, c.handleAddOvnEip), time.Second, ctx.Done())
go wait.Until(runWorker("update ovn eip", c.updateOvnEipQueue, c.handleUpdateOvnEip), time.Second, ctx.Done())
Expand Down
40 changes: 21 additions & 19 deletions pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,27 +524,27 @@ func (c *Controller) handleUpdateNode(key string) error {
return nil
}

func (c *Controller) CheckGatewayReady() {
if err := c.checkGatewayReady(); err != nil {
klog.Errorf("failed to check gateway ready %v", err)
func (c *Controller) checkSubnetGateway() {
if err := c.checkSubnetGatewayNode(); err != nil {
klog.Errorf("failed to check subnet gateway node: %v", err)
}
}

func (c *Controller) checkGatewayReady() error {
klog.V(3).Infoln("start to check gateway status")
func (c *Controller) checkSubnetGatewayNode() error {
klog.V(3).Infoln("start to check subnet gateway node")
subnetList, err := c.subnetsLister.List(labels.Everything())
if err != nil {
klog.Errorf("failed to list subnets %v", err)
klog.Errorf("failed to list subnets: %v", err)
return err
}
nodes, err := c.nodesLister.List(labels.Everything())
if err != nil {
klog.Errorf("failed to list nodes, %v", err)
klog.Errorf("failed to list nodes: %v", err)
return err
}

for _, subnet := range subnetList {
if (subnet.Spec.Vlan != "" && !subnet.Spec.LogicalGateway) ||
if (subnet.Spec.Vlan != "" && (subnet.Spec.U2OInterconnection || !subnet.Spec.LogicalGateway)) ||
subnet.Spec.GatewayNode == "" ||
subnet.Spec.GatewayType != kubeovnv1.GWCentralizedType ||
!subnet.Spec.EnableEcmp {
Expand Down Expand Up @@ -582,24 +582,26 @@ func (c *Controller) checkGatewayReady() error {
pinger.Timeout = time.Duration(count) * time.Second
pinger.Interval = 1 * time.Second

success := false

var pingSucceeded bool
pinger.OnRecv = func(_ *goping.Packet) {
success = true
pingSucceeded = true
pinger.Stop()
}
if err = pinger.Run(); err != nil {
klog.Errorf("failed to run pinger for destination %s: %v", ip, err)
return err
}

if !nodeReady(node) {
success = false
}

if !success {
nodeIsReady := nodeReady(node)
if !pingSucceeded || !nodeIsReady {
if exist {
klog.Warningf("failed to ping ovn0 %s or node %s is not ready, delete ecmp policy route for node", ip, node.Name)
if !pingSucceeded {
klog.Warningf("failed to ping ovn0 ip %s on node %s", ip, node.Name)
}
if !nodeIsReady {
klog.Warningf("node %s is not ready", node.Name)
}
klog.Warningf("delete ecmp policy route for node %s ip %s", node.Name, ip)
nextHops.Remove(ip)
delete(nameIPMap, node.Name)
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)
Expand All @@ -609,7 +611,7 @@ func (c *Controller) checkGatewayReady() error {
}
}
} else {
klog.V(3).Infof("succeed to ping gw %s", ip)
klog.V(3).Infof("succeeded to ping ovn0 ip %s on node %s", ip, node.Name)
if !exist {
nextHops.Add(ip)
if nameIPMap == nil {
Expand All @@ -624,7 +626,7 @@ func (c *Controller) checkGatewayReady() error {
}
}
} else if exist {
klog.Infof("subnet %s gatewayNode does not contains node %v, delete policy route for node ip %s", subnet.Name, node.Name, ip)
klog.Infof("subnet %s gateway nodes does not contain node %s, delete policy route for node ip %s", subnet.Name, node.Name, ip)
nextHops.Remove(ip)
delete(nameIPMap, node.Name)
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)
Expand Down
Loading