Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(env): Add automatic memory limit handling and move automaxprocs to pkg/util package #3806

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/controller-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const (
defaultQueueWorkers = 5
defaultGCWorkers = 1
defaultControllers = "*"
defaultMemlimitRatio = 0.0
)

// ServerOption is the main context object for the controllers.
Expand Down Expand Up @@ -92,6 +93,9 @@ type ServerOption struct {
// Case3: "-gc-controller,-job-controller,-jobflow-controller,-jobtemplate-controller,-pg-controller,-queue-controller"
// to disable specific controllers,
Controllers []string

// The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory.
MemlimitRatio float64
}

type DecryptFunc func(c *ServerOption) error
Expand Down Expand Up @@ -129,6 +133,7 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet, knownControllers []string) {
fs.Uint32Var(&s.WorkerThreadsForQueue, "worker-threads-for-queue", defaultQueueWorkers, "The number of threads syncing queue operations. The larger the number, the faster the queue processing, but requires more CPU load.")
fs.StringSliceVar(&s.Controllers, "controllers", []string{defaultControllers}, fmt.Sprintf("Specify controller gates. Use '*' for all controllers, all knownController: %s ,and we can use "+
"'-' to disable controllers, e.g. \"-job-controller,-queue-controller\" to disable job and queue controllers.", knownControllers))
fs.Float64Var(&s.MemlimitRatio, "auto-gomemlimit-ratio", defaultMemlimitRatio, "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory. The value should be greater than 0.0 and less than 1.0. Default: 0.0 (disabled).")
}

// CheckOptionOrDie checks all options and returns all errors if they are invalid.
Expand Down
7 changes: 4 additions & 3 deletions cmd/controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@ package main
import (
"fmt"
"os"
"runtime"
"sort"
"time"

"github.com/spf13/pflag"
_ "go.uber.org/automaxprocs"
utilfeature "k8s.io/apiserver/pkg/util/feature"
_ "k8s.io/client-go/plugin/pkg/client/auth"
cliflag "k8s.io/component-base/cli/flag"
Expand All @@ -32,6 +30,7 @@ import (

"volcano.sh/volcano/cmd/controller-manager/app"
"volcano.sh/volcano/cmd/controller-manager/app/options"
"volcano.sh/volcano/internal/goruntime"
"volcano.sh/volcano/pkg/controllers/framework"
_ "volcano.sh/volcano/pkg/controllers/garbagecollector"
_ "volcano.sh/volcano/pkg/controllers/job"
Expand All @@ -46,7 +45,6 @@ import (
var logFlushFreq = pflag.Duration("log-flush-frequency", 5*time.Second, "Maximum number of seconds between log flushes")

func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
klog.InitFlags(nil)

fs := pflag.CommandLine
Expand Down Expand Up @@ -88,6 +86,9 @@ func main() {
klog.StartFlushDaemon(*logFlushFreq)
defer klog.Flush()

goruntime.SetMaxProcs()
goruntime.SetMemLimit(s.MemlimitRatio)

if err := app.Run(s); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
Expand Down
6 changes: 6 additions & 0 deletions cmd/scheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ const (
defaultPercentageOfNodesToFind = 0
defaultLockObjectNamespace = "volcano-system"
defaultNodeWorkers = 20

defaultMemlimitRatio = 0.0
)

// ServerOption is the main context object for the controller manager.
Expand Down Expand Up @@ -90,6 +92,9 @@ type ServerOption struct {
// not be counted in pod pvc resource request and node.Allocatable, because the spec.drivers of csinode resource
// is always null, these provisioners usually are host path csi controllers like rancher.io/local-path and hostpath.csi.k8s.io.
IgnoredCSIProvisioners []string

// The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory.
MemlimitRatio float64
}

// DecryptFunc is custom function to parse ca file
Expand Down Expand Up @@ -146,6 +151,7 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.CacheDumpFileDir, "cache-dump-dir", "/tmp", "The target dir where the json file put at when dump cache info to json file")
fs.Uint32Var(&s.NodeWorkerThreads, "node-worker-threads", defaultNodeWorkers, "The number of threads syncing node operations.")
fs.StringSliceVar(&s.IgnoredCSIProvisioners, "ignored-provisioners", nil, "The provisioners that will be ignored during pod pvc request computation and preemption.")
fs.Float64Var(&s.MemlimitRatio, "auto-gomemlimit-ratio", defaultMemlimitRatio, "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory. The value should be greater than 0.0 and less than 1.0. Default: 0.0 (disabled).")
}

// CheckOptionOrDie check leader election flag when LeaderElection is enabled.
Expand Down
8 changes: 4 additions & 4 deletions cmd/scheduler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ package main
import (
"fmt"
"os"
"runtime"
"time"

"github.com/spf13/pflag"
_ "go.uber.org/automaxprocs"
utilfeature "k8s.io/apiserver/pkg/util/feature"
_ "k8s.io/client-go/plugin/pkg/client/auth"
cliflag "k8s.io/component-base/cli/flag"
Expand All @@ -34,6 +32,7 @@ import (

"volcano.sh/volcano/cmd/scheduler/app"
"volcano.sh/volcano/cmd/scheduler/app/options"
"volcano.sh/volcano/internal/goruntime"
commonutil "volcano.sh/volcano/pkg/util"
"volcano.sh/volcano/pkg/version"

Expand All @@ -48,8 +47,6 @@ import (
var logFlushFreq = pflag.Duration("log-flush-frequency", 5*time.Second, "Maximum number of seconds between log flushes")

func main() {
runtime.GOMAXPROCS(runtime.NumCPU())

klog.InitFlags(nil)

fs := pflag.CommandLine
Expand Down Expand Up @@ -83,6 +80,9 @@ func main() {
klog.StartFlushDaemon(*logFlushFreq)
defer klog.Flush()

goruntime.SetMaxProcs()
goruntime.SetMemLimit(s.MemlimitRatio)

if err := app.Run(s); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
Expand Down
5 changes: 5 additions & 0 deletions cmd/webhook-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const (
defaultBurst = 100
defaultEnabledAdmission = "/jobs/mutate,/jobs/validate,/podgroups/mutate,/pods/validate,/pods/mutate,/queues/mutate,/queues/validate"
defaultHealthzAddress = ":11251"
defaultMemlimitRatio = 0.0
)

// Config admission-controller server config.
Expand All @@ -56,6 +57,9 @@ type Config struct {
// HealthzBindAddress is the IP address and port for the health check server to serve on
// defaulting to :11251
HealthzBindAddress string

// The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory.
MemlimitRatio float64
}

type DecryptFunc func(c *Config) error
Expand Down Expand Up @@ -88,6 +92,7 @@ func (c *Config) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&c.ConfigPath, "admission-conf", "", "The configmap file of this webhook")
fs.BoolVar(&c.EnableHealthz, "enable-healthz", false, "Enable the health check; it is false by default")
fs.StringVar(&c.HealthzBindAddress, "healthz-address", defaultHealthzAddress, "The address to listen on for the health check server.")
fs.Float64Var(&c.MemlimitRatio, "auto-gomemlimit-ratio", defaultMemlimitRatio, "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory. The value should be greater than 0.0 and less than 1.0. Default: 0.0 (disabled).")
}

// CheckPortOrDie check valid port range.
Expand Down
7 changes: 4 additions & 3 deletions cmd/webhook-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@ package main
import (
"fmt"
"os"
"runtime"
"time"

"github.com/spf13/pflag"
_ "go.uber.org/automaxprocs"
_ "k8s.io/client-go/plugin/pkg/client/auth"

cliflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"

"volcano.sh/volcano/cmd/webhook-manager/app"
"volcano.sh/volcano/cmd/webhook-manager/app/options"
"volcano.sh/volcano/internal/goruntime"
"volcano.sh/volcano/pkg/version"
_ "volcano.sh/volcano/pkg/webhooks/admission/jobs/mutate"
_ "volcano.sh/volcano/pkg/webhooks/admission/jobs/validate"
Expand All @@ -43,7 +42,6 @@ import (
var logFlushFreq = pflag.Duration("log-flush-frequency", 5*time.Second, "Maximum number of seconds between log flushes")

func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
klog.InitFlags(nil)

config := options.NewConfig()
Expand All @@ -59,6 +57,9 @@ func main() {
klog.StartFlushDaemon(*logFlushFreq)
defer klog.Flush()

goruntime.SetMaxProcs()
goruntime.SetMemLimit(config.MemlimitRatio)

if err := config.CheckPortOrDie(); err != nil {
klog.Fatalf("Configured port is invalid: %v", err)
}
Expand Down
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module volcano.sh/volcano
go 1.22.0

require (
github.com/KimMachineGun/automemlimit v0.6.1
github.com/agiledragon/gomonkey/v2 v2.11.0
github.com/cilium/ebpf v0.9.3
github.com/containernetworking/cni v1.1.2
Expand Down Expand Up @@ -35,7 +36,7 @@ require (
k8s.io/apimachinery v0.31.1
k8s.io/apiserver v0.31.1
k8s.io/client-go v0.31.1
k8s.io/code-generator v0.31.1
k8s.io/code-generator v0.31.2
k8s.io/component-base v0.31.1
k8s.io/component-helpers v0.31.1
k8s.io/csi-translation-lib v0.31.1
Expand All @@ -53,10 +54,13 @@ require (
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/bits-and-blooms/bitset v1.2.0 // indirect
github.com/containerd/cgroups/v3 v3.0.1 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/KimMachineGun/automemlimit v0.6.1 h1:ILa9j1onAAMadBsyyUJv5cack8Y1WT26yLj/V+ulKp8=
github.com/KimMachineGun/automemlimit v0.6.1/go.mod h1:T7xYht7B8r6AG/AqFcUdc7fzd2bIdBKmepfP2S1svPY=
github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
Expand Down Expand Up @@ -26,6 +28,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.9.3 h1:5KtxXZU+scyERvkJMEm16TbScVvuuMrlhPly78ZMbSc=
github.com/cilium/ebpf v0.9.3/go.mod h1:w27N4UjpaQ9X/DGrSugxUG+H+NhgntDuPb5lCzxCn8A=
github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE=
github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw=
github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ=
github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ=
github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw=
Expand All @@ -49,6 +53,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8Yc
github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/go-elasticsearch/v7 v7.17.7 h1:pcYNfITNPusl+cLwLN6OLmVT+F73Els0nbaWOmYachs=
Expand Down Expand Up @@ -218,6 +224,8 @@ github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78/go.m
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
Expand Down
34 changes: 34 additions & 0 deletions internal/goruntime/cpu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
Copyright 2024 The Volcano Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package goruntime

import (
"strings"

"go.uber.org/automaxprocs/maxprocs"
"k8s.io/klog/v2"
)

func SetMaxProcs() {
l := func(format string, a ...interface{}) {
klog.Infof(strings.TrimPrefix(format, "maxprocs: "), a...)
}

if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil {
klog.Warningf("Failed to set GOMAXPROCS automatically. err: %v", err)
}
}
53 changes: 53 additions & 0 deletions internal/goruntime/memory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
dongjiang1989 marked this conversation as resolved.
Show resolved Hide resolved
Copyright 2024 The Volcano Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package goruntime

import (
"runtime/debug"

"github.com/KimMachineGun/automemlimit/memlimit"
"k8s.io/klog/v2"
)

const DefaultMemlimitRatio = 0.0

func SetMemLimit(memlimitRatio float64) {
if memlimitRatio >= 1.0 {
memlimitRatio = 1.0
} else if memlimitRatio <= 0.0 {
memlimitRatio = 0.0
}

// the memlimitRatio argument to 0, effectively disabling auto memory limit for all users.
if memlimitRatio == 0.0 {
return
}

if _, err := memlimit.SetGoMemLimitWithOpts(
memlimit.WithRatio(memlimitRatio),
memlimit.WithProvider(
memlimit.ApplyFallback(
memlimit.FromCgroup,
memlimit.FromSystem,
),
),
); err != nil {
klog.Warningf("Failed to set GOMEMLIMIT automatically. err: %v", err)
}

klog.Infof("GOMEMLIMIT set to %d", debug.SetMemoryLimit(-1))
}
21 changes: 21 additions & 0 deletions licenses/github.com/KimMachineGun/automemlimit/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2022 Geon Kim

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Loading
Loading