Skip to content

Commit

Permalink
fix(*): empty cpuset.mems will lead to failure of tmo
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <[email protected]>
  • Loading branch information
cheney-lin committed Nov 14, 2024
1 parent 3269ac6 commit e7597fa
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -736,11 +736,7 @@ func (p *DynamicPolicy) handleAdvisorMemoryOffloading(_ *config.Configuration,
}
}

cpuSetStats, err := cgroupmgr.GetCPUSetWithAbsolutePath(absCGPath)
if err != nil {
return fmt.Errorf("GetCPUSetWithAbsolutePath failed with error: %v", err)
}
mems, err := machine.Parse(cpuSetStats.Mems)
_, mems, err := cgroupmgr.GetEffectiveCPUSetWithAbsolutePath(absCGPath)
if err != nil {
return fmt.Errorf("parse cpuSetStats failed with error: %v", err)
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/cgroup/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,10 @@ type CPUStats struct {

// CPUSetStats get cgroup cpuset data
type CPUSetStats struct {
CPUs string
Mems string
CPUs string
EffectiveCPUs string
Mems string
EffectiveMems string
}

// MemoryMetrics get memory cgroup metrics
Expand Down
39 changes: 39 additions & 0 deletions pkg/util/cgroup/manager/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io/fs"
"math"
"os"
"os/exec"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -514,3 +515,41 @@ func MemoryOffloadingWithAbsolutePath(ctx context.Context, absCgroupPath string,

return err
}

func GetEffectiveCPUSetWithAbsolutePath(absCgroupPath string) (machine.CPUSet, machine.CPUSet, error) {
_, err := os.Stat(absCgroupPath)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}

cpusetStat, err := GetCPUSetWithAbsolutePath(absCgroupPath)
if err != nil {
// if controller is disabled, we should walk the parent's dir.
if os.IsNotExist(err) {
return GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
}
return machine.CPUSet{}, machine.CPUSet{}, err
}
// if the cpus or mems is empty, they will inherit the parent's mask.
cpus, err := machine.Parse(cpusetStat.EffectiveCPUs)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
if cpus.IsEmpty() {
cpus, _, err = GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
}
mems, err := machine.Parse(cpusetStat.EffectiveMems)
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
if mems.IsEmpty() {
_, mems, err = GetEffectiveCPUSetWithAbsolutePath(filepath.Dir(absCgroupPath))
if err != nil {
return machine.CPUSet{}, machine.CPUSet{}, err
}
}
return cpus, mems, nil
}
12 changes: 10 additions & 2 deletions pkg/util/cgroup/manager/v1/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,20 @@ func (m *manager) GetCPUSet(absCgroupPath string) (*common.CPUSetStats, error) {
var err error
cpusetStats.CPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.cpus")
if err != nil {
return nil, fmt.Errorf("read cpuset.cpus failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveCPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.effective_cpus")
if err != nil {
return nil, err
}

cpusetStats.Mems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems")
if err != nil {
return nil, fmt.Errorf("read cpuset.mems failed with error: %v", err)
return nil, err
}
cpusetStats.EffectiveMems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.effective_mems")
if err != nil {
return nil, err
}

return cpusetStats, nil
Expand Down
8 changes: 8 additions & 0 deletions pkg/util/cgroup/manager/v2/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,11 +434,19 @@ func (m *manager) GetCPUSet(absCgroupPath string) (*common.CPUSetStats, error) {
if err != nil {
return nil, fmt.Errorf("read cpuset.cpus failed with error: %v", err)
}
cpusetStats.EffectiveCPUs, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.cpus.effective")
if err != nil {
return nil, fmt.Errorf("read cpuset.cpus.effective failed with error: %v", err)
}

cpusetStats.Mems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems")
if err != nil {
return nil, fmt.Errorf("read cpuset.mems failed with error: %v", err)
}
cpusetStats.EffectiveMems, err = fscommon.GetCgroupParamString(absCgroupPath, "cpuset.mems.effective")
if err != nil {
return nil, fmt.Errorf("read cpuset.mems.effective failed with error: %v", err)
}

return cpusetStats, nil
}
Expand Down

0 comments on commit e7597fa

Please sign in to comment.