diff --git a/charts/kube-ovn/templates/controller-deploy.yaml b/charts/kube-ovn/templates/controller-deploy.yaml index 1a7bcc311f8..598a22451cf 100644 --- a/charts/kube-ovn/templates/controller-deploy.yaml +++ b/charts/kube-ovn/templates/controller-deploy.yaml @@ -196,6 +196,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10660 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} periodSeconds: 3 timeoutSeconds: 5 livenessProbe: @@ -204,6 +205,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10660 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} initialDelaySeconds: 300 periodSeconds: 7 failureThreshold: 5 @@ -231,4 +233,3 @@ spec: secret: optional: true secretName: kube-ovn-tls - diff --git a/charts/kube-ovn/templates/monitor-deploy.yaml b/charts/kube-ovn/templates/monitor-deploy.yaml index 0bb2f4d4be7..b452759b655 100644 --- a/charts/kube-ovn/templates/monitor-deploy.yaml +++ b/charts/kube-ovn/templates/monitor-deploy.yaml @@ -67,6 +67,7 @@ spec: - --logtostderr=false - --alsologtostderr=true - --log_file_max_size=200 + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} securityContext: runAsUser: {{ include "kubeovn.runAsUser" . }} privileged: false @@ -130,6 +131,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10661 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} timeoutSeconds: 5 readinessProbe: failureThreshold: 3 @@ -141,6 +143,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10661 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} timeoutSeconds: 5 nodeSelector: kubernetes.io/os: "linux" diff --git a/charts/kube-ovn/templates/ovncni-ds.yaml b/charts/kube-ovn/templates/ovncni-ds.yaml index 63f9dcf5224..f08e4f73cc6 100644 --- a/charts/kube-ovn/templates/ovncni-ds.yaml +++ b/charts/kube-ovn/templates/ovncni-ds.yaml @@ -199,6 +199,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10665 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} timeoutSeconds: 5 livenessProbe: failureThreshold: 3 @@ -210,6 +211,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10665 - --tls={{- .Values.func.SECURE_SERVING }} + - --enable-metrics={{- .Values.networking.ENABLE_METRICS }} timeoutSeconds: 5 resources: requests: diff --git a/cmd/controller/controller.go b/cmd/controller/controller.go index 8b0a3ad3414..0be8165f26f 100644 --- a/cmd/controller/controller.go +++ b/cmd/controller/controller.go @@ -84,15 +84,36 @@ func CmdMain() { }() } - if !config.EnableMetrics { - return - } - metrics.InitKlogMetrics() - metrics.InitClientGoMetrics() - addr := util.JoinHostPort(metricsAddr, config.PprofPort) - if err := metrics.Run(ctx, config.KubeRestConfig, addr, config.SecureServing, servePprofInMetricsServer); err != nil { - util.LogFatalAndExit(err, "failed to run metrics server") + if config.EnableMetrics { + metrics.InitKlogMetrics() + metrics.InitClientGoMetrics() + addr := util.JoinHostPort(metricsAddr, config.PprofPort) + if err := metrics.Run(ctx, config.KubeRestConfig, addr, config.SecureServing, servePprofInMetricsServer); err != nil { + util.LogFatalAndExit(err, "failed to run metrics server") + } + } else { + klog.Info("metrics server is disabled") + listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(metricsAddr), Port: int(config.PprofPort)}) + if err != nil { + util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(metricsAddr, config.PprofPort)) + } + svr := manager.Server{ + Name: "health-check", + Server: &http.Server{ + Handler: http.NewServeMux(), + MaxHeaderBytes: 1 << 20, + IdleTimeout: 90 * time.Second, + ReadHeaderTimeout: 32 * time.Second, + }, + Listener: listerner, + } + go func() { + if err = svr.Start(ctx); err != nil { + util.LogFatalAndExit(err, "failed to run health check server") + } + }() } + <-ctx.Done() }() diff --git a/cmd/daemon/cniserver.go b/cmd/daemon/cniserver.go index 6b732da24cb..9bad4c0b5e4 100644 --- a/cmd/daemon/cniserver.go +++ b/cmd/daemon/cniserver.go @@ -29,9 +29,6 @@ import ( func main() { defer klog.Flush() - daemon.InitMetrics() - metrics.InitKlogMetrics() - config := daemon.ParseFlags() klog.Info(versions.String()) @@ -149,10 +146,36 @@ func main() { }() } - listenAddr := util.JoinHostPort(addr, config.PprofPort) - if err = metrics.Run(ctx, nil, listenAddr, config.SecureServing, servePprofInMetricsServer); err != nil { - util.LogFatalAndExit(err, "failed to run metrics server") + if config.EnableMetrics { + daemon.InitMetrics() + metrics.InitKlogMetrics() + listenAddr := util.JoinHostPort(addr, config.PprofPort) + if err = metrics.Run(ctx, nil, listenAddr, config.SecureServing, servePprofInMetricsServer); err != nil { + util.LogFatalAndExit(err, "failed to run metrics server") + } + } else { + klog.Info("metrics server is disabled") + listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(addr), Port: int(config.PprofPort)}) + if err != nil { + util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(addr, config.PprofPort)) + } + svr := manager.Server{ + Name: "health-check", + Server: &http.Server{ + Handler: http.NewServeMux(), + MaxHeaderBytes: 1 << 20, + IdleTimeout: 90 * time.Second, + ReadHeaderTimeout: 32 * time.Second, + }, + Listener: listerner, + } + go func() { + if err = svr.Start(ctx); err != nil { + util.LogFatalAndExit(err, "failed to run health check server") + } + }() } + <-stopCh } diff --git a/cmd/health_check/health_check.go b/cmd/health_check/health_check.go index 1017578682c..ec51bb4da81 100644 --- a/cmd/health_check/health_check.go +++ b/cmd/health_check/health_check.go @@ -15,6 +15,7 @@ import ( func CmdMain() { port := pflag.Int32("port", 0, "Target port") tls := pflag.Bool("tls", false, "Dial the server with TLS") + enableMetrics := pflag.Bool("enable-metrics", true, "Whether to support metrics query") klogFlags := flag.NewFlagSet("klog", flag.ExitOnError) klog.InitFlags(klogFlags) @@ -46,7 +47,7 @@ func CmdMain() { } addr := util.JoinHostPort(ip, *port) - if *tls { + if *enableMetrics && *tls { addr = "tls://" + addr } else { addr = "tcp://" + addr diff --git a/cmd/ovn_monitor/ovn_monitor.go b/cmd/ovn_monitor/ovn_monitor.go index 62bddddb564..06da89d0fbd 100644 --- a/cmd/ovn_monitor/ovn_monitor.go +++ b/cmd/ovn_monitor/ovn_monitor.go @@ -1,12 +1,14 @@ package ovn_monitor import ( - "os" - "strings" + "net" + "net/http" + "time" "k8s.io/klog/v2" "kernel.org/pub/linux/libs/security/libcap/cap" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" "github.com/kubeovn/kube-ovn/pkg/metrics" @@ -15,8 +17,6 @@ import ( "github.com/kubeovn/kube-ovn/versions" ) -const port = 10661 - func CmdMain() { defer klog.Flush() @@ -30,24 +30,43 @@ func CmdMain() { util.LogFatalAndExit(err, "failed to parse config") } - addr := config.ListenAddress - if os.Getenv("ENABLE_BIND_LOCAL_IP") == "true" { - if ips := strings.Split(os.Getenv("POD_IPS"), ","); len(ips) == 1 { - addr = util.JoinHostPort(ips[0], port) - } - } - - exporter := ovn.NewExporter(config) - if err = exporter.StartConnection(); err != nil { - klog.Errorf("%s failed to connect db socket properly: %s", ovn.GetExporterName(), err) - go exporter.TryClientConnection() - } - exporter.StartOvnMetrics() - ctrl.SetLogger(klog.NewKlogr()) ctx := signals.SetupSignalHandler() - if err = metrics.Run(ctx, nil, addr, config.SecureServing, false); err != nil { - util.LogFatalAndExit(err, "failed to run metrics server") + + metricsAddr := util.GetDefaultListenAddr() + if config.EnableMetrics { + exporter := ovn.NewExporter(config) + if err = exporter.StartConnection(); err != nil { + klog.Errorf("%s failed to connect db socket properly: %s", ovn.GetExporterName(), err) + go exporter.TryClientConnection() + } + exporter.StartOvnMetrics() + addr := util.JoinHostPort(metricsAddr, config.MetricsPort) + if err = metrics.Run(ctx, nil, addr, config.SecureServing, false); err != nil { + util.LogFatalAndExit(err, "failed to run metrics server") + } + } else { + klog.Info("metrics server is disabled") + listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(util.GetDefaultListenAddr()), Port: int(config.MetricsPort)}) + if err != nil { + util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(metricsAddr, config.MetricsPort)) + } + svr := manager.Server{ + Name: "health-check", + Server: &http.Server{ + Handler: http.NewServeMux(), + MaxHeaderBytes: 1 << 20, + IdleTimeout: 90 * time.Second, + ReadHeaderTimeout: 32 * time.Second, + }, + Listener: listerner, + } + go func() { + if err = svr.Start(ctx); err != nil { + util.LogFatalAndExit(err, "failed to run health check server") + } + }() } + <-ctx.Done() } diff --git a/cmd/speaker/speaker.go b/cmd/speaker/speaker.go index 1f2412da76c..c270c429f06 100644 --- a/cmd/speaker/speaker.go +++ b/cmd/speaker/speaker.go @@ -28,9 +28,11 @@ func CmdMain() { ctrl.SetLogger(klog.NewKlogr()) ctx := signals.SetupSignalHandler() go func() { - metrics.InitKlogMetrics() - if err = metrics.Run(ctx, nil, util.JoinHostPort("0.0.0.0", config.PprofPort), false, false); err != nil { - util.LogFatalAndExit(err, "failed to run metrics server") + if config.EnableMetrics { + metrics.InitKlogMetrics() + if err = metrics.Run(ctx, nil, util.JoinHostPort("0.0.0.0", config.PprofPort), false, false); err != nil { + util.LogFatalAndExit(err, "failed to run metrics server") + } } <-ctx.Done() }() diff --git a/dist/images/install.sh b/dist/images/install.sh index fc5e2efed02..28411951a51 100755 --- a/dist/images/install.sh +++ b/dist/images/install.sh @@ -22,6 +22,7 @@ ENABLE_LB_SVC=${ENABLE_LB_SVC:-false} ENABLE_NAT_GW=${ENABLE_NAT_GW:-true} ENABLE_KEEP_VM_IP=${ENABLE_KEEP_VM_IP:-true} ENABLE_ARP_DETECT_IP_CONFLICT=${ENABLE_ARP_DETECT_IP_CONFLICT:-true} +ENABLE_METRICS=${ENABLE_METRICS:-true} # comma-separated string of nodelocal DNS ip addresses NODE_LOCAL_DNS_IP=${NODE_LOCAL_DNS_IP:-} ENABLE_IC=${ENABLE_IC:-$(kubectl get node --show-labels | grep -qw "ovn.kubernetes.io/ic-gw" && echo true || echo false)} @@ -4719,6 +4720,7 @@ spec: - --log_file_max_size=200 - --enable-lb-svc=$ENABLE_LB_SVC - --keep-vm-ip=$ENABLE_KEEP_VM_IP + - --enable-metrics=$ENABLE_METRICS - --node-local-dns-ip=$NODE_LOCAL_DNS_IP - --enable-ovn-ipsec=$ENABLE_OVN_IPSEC - --secure-serving=${SECURE_SERVING} @@ -4782,6 +4784,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10660 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS periodSeconds: 3 timeoutSeconds: 5 livenessProbe: @@ -4790,6 +4793,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10660 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS initialDelaySeconds: 300 periodSeconds: 7 failureThreshold: 5 @@ -4911,6 +4915,7 @@ spec: - --alsologtostderr=true - --log_file=/var/log/kube-ovn/kube-ovn-cni.log - --log_file_max_size=200 + - --enable-metrics=$ENABLE_METRICS - --kubelet-dir=$KUBELET_DIR - --enable-tproxy=$ENABLE_TPROXY - --ovs-vsctl-concurrency=$OVS_VSCTL_CONCURRENCY @@ -5001,6 +5006,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10665 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS timeoutSeconds: 5 readinessProbe: failureThreshold: 3 @@ -5011,6 +5017,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10665 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS timeoutSeconds: 5 resources: requests: @@ -5128,6 +5135,7 @@ spec: - --alsologtostderr=true - --log_file=/var/log/kube-ovn/kube-ovn-pinger.log - --log_file_max_size=200 + - --enable-metrics=$ENABLE_METRICS imagePullPolicy: $IMAGE_PULL_POLICY securityContext: runAsUser: ${RUN_AS_USER} @@ -5279,6 +5287,7 @@ spec: - --logtostderr=false - --alsologtostderr=true - --log_file_max_size=200 + - --enable-metrics=$ENABLE_METRICS securityContext: runAsUser: ${RUN_AS_USER} privileged: false @@ -5342,6 +5351,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10661 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS timeoutSeconds: 5 readinessProbe: failureThreshold: 3 @@ -5353,6 +5363,7 @@ spec: - /kube-ovn/kube-ovn-healthcheck - --port=10661 - --tls=${SECURE_SERVING} + - --enable-metrics=$ENABLE_METRICS timeoutSeconds: 5 nodeSelector: kubernetes.io/os: "linux" diff --git a/pkg/ovnmonitor/config.go b/pkg/ovnmonitor/config.go index e48070a01bf..591923895f4 100644 --- a/pkg/ovnmonitor/config.go +++ b/pkg/ovnmonitor/config.go @@ -11,8 +11,6 @@ import ( // Configuration contains parameters information. type Configuration struct { - ListenAddress string - MetricsPath string PollTimeout int PollInterval int SystemRunDir string @@ -46,17 +44,17 @@ type Configuration struct { ServiceNorthdFilePidPath string EnableMetrics bool SecureServing bool + MetricsPort int32 } // ParseFlags get parameters information. func ParseFlags() (*Configuration, error) { var ( - argListenAddress = pflag.String("listen-address", ":10661", "Address to listen on for web interface and telemetry.") - argMetricsPath = pflag.String("telemetry-path", "/metrics", "Path under which to expose metrics.") argPollTimeout = pflag.Int("ovs.timeout", 2, "Timeout on JSON-RPC requests to OVN.") argPollInterval = pflag.Int("ovs.poll-interval", 30, "The minimum interval (in seconds) between collections from OVN server.") argEnableMetrics = pflag.Bool("enable-metrics", true, "Whether to support metrics query") argSecureServing = pflag.Bool("secure-serving", false, "Whether to serve metrics securely") + argMetricsPort = pflag.Int32("metrics-port", 10661, "The port to get metrics data") argSystemRunDir = pflag.String("system.run.dir", "/var/run/openvswitch", "OVS default run directory.") argDatabaseVswitchName = pflag.String("database.vswitch.name", "Open_vSwitch", "The name of OVS db.") @@ -111,8 +109,6 @@ func ParseFlags() (*Configuration, error) { pflag.Parse() config := &Configuration{ - ListenAddress: *argListenAddress, - MetricsPath: *argMetricsPath, PollTimeout: *argPollTimeout, PollInterval: *argPollInterval, SystemRunDir: *argSystemRunDir, @@ -147,6 +143,7 @@ func ParseFlags() (*Configuration, error) { ServiceNorthdFilePidPath: *argServiceNorthdFilePidPath, EnableMetrics: *argEnableMetrics, SecureServing: *argSecureServing, + MetricsPort: *argMetricsPort, } klog.Infof("ovn monitor config is %+v", config) diff --git a/pkg/speaker/config.go b/pkg/speaker/config.go index 5a06da6e3d7..8eb72a46698 100644 --- a/pkg/speaker/config.go +++ b/pkg/speaker/config.go @@ -58,6 +58,7 @@ type Configuration struct { EbgpMultihopTTL uint8 ExtendedNexthop bool NatGwMode bool + EnableMetrics bool NodeName string KubeConfigFile string @@ -90,6 +91,7 @@ func ParseFlags() (*Configuration, error) { argEbgpMultihopTTL = pflag.Uint8("ebgp-multihop", DefaultEbgpMultiHop, "The TTL value of EBGP peer, default: 1") argExtendedNexthop = pflag.BoolP("extended-nexthop", "", false, "Announce IPv4/IPv6 prefixes to every neighbor, no matter their AFI") argNatGwMode = pflag.BoolP("nat-gw-mode", "", false, "Make the BGP speaker announce EIPs from inside a NAT gateway, Pod IP/Service/Subnet announcements will be disabled") + argEnableMetrics = pflag.BoolP("enable-metrics", "", true, "Whether to support metrics query") ) klogFlags := flag.NewFlagSet("klog", flag.ExitOnError) klog.InitFlags(klogFlags) @@ -155,6 +157,7 @@ func ParseFlags() (*Configuration, error) { EbgpMultihopTTL: *argEbgpMultihopTTL, ExtendedNexthop: *argExtendedNexthop, NatGwMode: *argNatGwMode, + EnableMetrics: *argEnableMetrics, } if *argNeighborAddress != "" {