Skip to content

Commit

Permalink
fix(*): empty cpuset.mems will lead to failure of tmo
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <[email protected]>
  • Loading branch information
cheney-lin committed Nov 15, 2024
1 parent 3269ac6 commit 684f227
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -736,11 +736,7 @@ func (p *DynamicPolicy) handleAdvisorMemoryOffloading(_ *config.Configuration,
}
}

cpuSetStats, err := cgroupmgr.GetCPUSetWithAbsolutePath(absCGPath)
if err != nil {
return fmt.Errorf("GetCPUSetWithAbsolutePath failed with error: %v", err)
}
mems, err := machine.Parse(cpuSetStats.Mems)
_, mems, err := cgroupmgr.GetEffectiveCPUSetWithAbsolutePath(absCGPath)
if err != nil {
return fmt.Errorf("parse cpuSetStats failed with error: %v", err)
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/cgroup/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,10 @@ type CPUStats struct {

// CPUSetStats get cgroup cpuset data
type CPUSetStats struct {
CPUs string
Mems string
CPUs string
EffectiveCPUs string
Mems string
EffectiveMems string
}

// MemoryMetrics get memory cgroup metrics
Expand Down
39 changes: 39 additions & 0 deletions pkg/util/cgroup/manager/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io/fs"
"math"
"os"
"os/exec"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -514,3 +515,41 @@ func MemoryOffloadingWithAbsolutePath(ctx context.Context, absCgroupPath string,

return err
}

func GetEffectiveCPUSetWithAbsolutePath(absCgroupPath string) (machine.CPUSet, machine.CPUSet, error) {
_, err := os.Stat(absCgroupPath)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}

cpusetStat, err := GetCPUSetWithAbsolutePath(absCgroupPath)
if err != nil {
// if controller is disabled, we should walk the parent's dir.
if os.IsNotExist(err) {
return GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
}
return machine.CPUSet{}, machine.CPUSet{}, err
}
// if the cpus or mems is empty, they will inherit the parent's mask.
cpus, err := machine.Parse(cpusetStat.EffectiveCPUs)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
if cpus.IsEmpty() {
cpus, _, err = GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
}
mems, err := machine.Parse(cpusetStat.EffectiveMems)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
if mems.IsEmpty() {
_, mems, err = GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
}
return cpus, mems, nil
}
81 changes: 81 additions & 0 deletions pkg/util/cgroup/manager/cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func testV2Manager(t *testing.T) {
testSwapMax(t)
testMemPressure(t)
testMemoryOffloadingWithAbsolutePath(t)
testGetEffectiveCPUSetWithAbsolutePath(t)
}

func testManager(t *testing.T, version string) {
Expand Down Expand Up @@ -274,3 +275,83 @@ func testMemoryOffloadingWithAbsolutePath(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%v\n", 100), string(s))
}

func testGetEffectiveCPUSetWithAbsolutePath(t *testing.T) {
defer monkey.UnpatchAll()
monkey.Patch(common.CheckCgroup2UnifiedMode, func() bool { return true })
monkey.Patch(GetManager, func() Manager { return v2.NewManager() })
monkey.Patch(cgroups.ReadFile, func(dir, file string) (string, error) {
f := filepath.Join(dir, file)
tmp, err := ioutil.ReadFile(f)
if err != nil {
return "", err
}
return string(tmp), nil
})
monkey.Patch(cgroups.WriteFile, func(dir, file, data string) error {
f := filepath.Join(dir, file)
return ioutil.WriteFile(f, []byte(data), 0o700)
})

rootDir := os.TempDir()
dir := filepath.Join(rootDir, "tmp")
err := os.Mkdir(dir, 0o700)
assert.NoError(t, err)

tmpDir, err := ioutil.TempDir(dir, "fake-cgroup")
assert.NoError(t, err)
defer os.RemoveAll(dir)

monkey.Patch(common.GetCgroupRootPath, func(s string) string {
t.Logf("rootDir=%v", rootDir)
return rootDir
})

// tmpDir is root cgroup
cpusetCPUsFile := filepath.Join(tmpDir, "cpuset.cpus")
err = ioutil.WriteFile(cpusetCPUsFile, []byte("0-1"), 0o644)
assert.NoError(t, err)

cpusetCPUsEffectiveFile := filepath.Join(tmpDir, "cpuset.cpus.effective")
err = ioutil.WriteFile(cpusetCPUsEffectiveFile, []byte("0-1"), 0o644)
assert.NoError(t, err)

cpusetMemsFile := filepath.Join(tmpDir, "cpuset.mems")
err = ioutil.WriteFile(cpusetMemsFile, []byte("0-1"), 0o644)
assert.NoError(t, err)

cpusetMemsEffectiveFile := filepath.Join(tmpDir, "cpuset.mems.effective")
err = ioutil.WriteFile(cpusetMemsEffectiveFile, []byte("0-1"), 0o644)
assert.NoError(t, err)

cpus, mems, err := GetEffectiveCPUSetWithAbsolutePath(tmpDir)
assert.NoError(t, err)
assert.Equal(t, "0-1", cpus.String())
assert.Equal(t, "0-1", mems.String())

// kubeDir is sub cgroup
kubeDir := filepath.Join(tmpDir, "kubepods")
err = os.Mkdir(kubeDir, 0o700)
assert.NoError(t, err)

cpusetCPUsFile = filepath.Join(tmpDir, "cpuset.cpus")
err = ioutil.WriteFile(cpusetCPUsFile, []byte(""), 0o644)
assert.NoError(t, err)

cpusetCPUsEffectiveFile = filepath.Join(tmpDir, "cpuset.cpus.effective")
err = ioutil.WriteFile(cpusetCPUsEffectiveFile, []byte("0"), 0o644)
assert.NoError(t, err)

cpusetMemsFile = filepath.Join(tmpDir, "cpuset.mems")
err = ioutil.WriteFile(cpusetMemsFile, []byte(""), 0o644)
assert.NoError(t, err)

cpusetMemsEffectiveFile = filepath.Join(tmpDir, "cpuset.mems.effective")
err = ioutil.WriteFile(cpusetMemsEffectiveFile, []byte(""), 0o644)
assert.NoError(t, err)

cpus, mems, err = GetEffectiveCPUSetWithAbsolutePath(kubeDir)
assert.NoError(t, err)
assert.Equal(t, "0", cpus.String())
assert.Equal(t, "0-1", mems.String())
}
12 changes: 10 additions & 2 deletions pkg/util/cgroup/manager/v1/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,20 @@ func (m *manager) GetCPUSet(absCgroupPath string) (*common.CPUSetStats, error) {
var err error
cpusetStats.CPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.cpus")
if err != nil {
return nil, fmt.Errorf("read cpuset.cpus failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveCPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.effective_cpus")
if err != nil {
return nil, err
}

cpusetStats.Mems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems")
if err != nil {
return nil, fmt.Errorf("read cpuset.mems failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveMems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.effective_mems")
if err != nil {
return nil, err
}

return cpusetStats, nil
Expand Down
12 changes: 10 additions & 2 deletions pkg/util/cgroup/manager/v2/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,20 @@ func (m *manager) GetCPUSet(absCgroupPath string) (*common.CPUSetStats, error) {
var err error
cpusetStats.CPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.cpus")
if err != nil {
return nil, fmt.Errorf("read cpuset.cpus failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveCPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.cpus.effective")
if err != nil {
return nil, err
}

cpusetStats.Mems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems")
if err != nil {
return nil, fmt.Errorf("read cpuset.mems failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveMems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems.effective")
if err != nil {
return nil, err
}

return cpusetStats, nil
Expand Down

0 comments on commit 684f227

Please sign in to comment.