From bb5ed1b79709c865d9aa86008048f19331530041 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 1 Jul 2019 17:16:06 +0200 Subject: [PATCH] kubelet: add initial support for cgroupv2 do a conversion from the cgroups v1 limits to cgroups v2. e.g. cpu.shares on cgroups v1 has a range of [2-262144] while the equivalent on cgroups v2 is cpu.weight that uses a range [1-10000]. Signed-off-by: Giuseppe Scrivano --- pkg/kubelet/cm/cgroup_manager_linux.go | 44 ++++++++++++++++------- pkg/kubelet/cm/container_manager_linux.go | 9 +++++ pkg/kubelet/cm/helpers_linux.go | 42 ++++++++++++++++++++-- pkg/kubelet/cm/util/cgroups_linux.go | 32 ++++++++++++----- 4 files changed, 105 insertions(+), 22 deletions(-) diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 41dbed9567a..b13006e1539 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -21,6 +21,7 @@ import ( "os" "path" "path/filepath" + "strconv" "strings" "time" @@ -153,10 +154,11 @@ func (l *libcontainerAdapter) newManager(cgroups *libcontainerconfigs.Cgroup, pa if !cgroupsystemd.UseSystemd() { panic("systemd cgroup manager not available") } - return &cgroupsystemd.LegacyManager{ - Cgroups: cgroups, - Paths: paths, - }, nil + f, err := cgroupsystemd.NewSystemdCgroupsManager() + if err != nil { + return nil, err + } + return f(cgroups, paths), nil } return nil, fmt.Errorf("invalid cgroup manager configuration") } @@ -254,6 +256,7 @@ func (m *cgroupManagerImpl) Exists(name CgroupName) bool { // in https://github.com/opencontainers/runc/issues/1440 // once resolved, we can remove this code. whitelistControllers := sets.NewString("cpu", "cpuacct", "cpuset", "memory", "systemd") + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) { whitelistControllers.Insert("pids") } @@ -369,14 +372,31 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont if resourceConfig.Memory != nil { resources.Memory = *resourceConfig.Memory } - if resourceConfig.CpuShares != nil { - resources.CpuShares = *resourceConfig.CpuShares - } - if resourceConfig.CpuQuota != nil { - resources.CpuQuota = *resourceConfig.CpuQuota - } - if resourceConfig.CpuPeriod != nil { - resources.CpuPeriod = *resourceConfig.CpuPeriod + if libcontainercgroups.IsCgroup2UnifiedMode() { + if resourceConfig.CpuShares != nil { + // Convert from the range [2-262144] to [1-10000] + resources.CpuWeight = (1 + ((*resourceConfig.CpuShares-2)*9999)/262142) + } + + quota := "max" + period := "100000" + if resourceConfig.CpuQuota != nil { + quota = strconv.FormatInt(*resourceConfig.CpuQuota, 10) + } + if resourceConfig.CpuPeriod != nil { + period = strconv.FormatUint(*resourceConfig.CpuPeriod, 10) + } + resources.CpuMax = fmt.Sprintf("%s %s", quota, period) + } else { + if resourceConfig.CpuShares != nil { + resources.CpuShares = *resourceConfig.CpuShares + } + if resourceConfig.CpuQuota != nil { + resources.CpuQuota = *resourceConfig.CpuQuota + } + if resourceConfig.CpuPeriod != nil { + resources.CpuPeriod = *resourceConfig.CpuPeriod + } } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) { if resourceConfig.PidsLimit != nil { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 13c7176bdc2..be7530be7ab 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -161,6 +161,10 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) { return f, fmt.Errorf("%s - %v", localErr, err) } + if cgroups.IsCgroup2UnifiedMode() { + return f, nil + } + expectedCgroups := sets.NewString("cpu", "cpuacct", "cpuset", "memory") for _, mountPoint := range mountPoints { if mountPoint.Type == cgroupMountType { @@ -884,6 +888,11 @@ func getContainer(pid int) (string, error) { return "", err } + if cgroups.IsCgroup2UnifiedMode() { + c, _ := cgs[""] + return c, nil + } + cpu, found := cgs["cpu"] if !found { return "", cgroups.NewNotFoundError("cpu") diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index f6a1d519026..1fc784f423c 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -19,9 +19,11 @@ package cm import ( "bufio" "fmt" + "io/ioutil" "os" "path/filepath" "strconv" + "strings" libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" @@ -32,6 +34,7 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" kubefeatures "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/cm/util" ) const ( @@ -181,8 +184,8 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64) return result } -// GetCgroupSubsystems returns information about the mounted cgroup subsystems -func GetCgroupSubsystems() (*CgroupSubsystems, error) { +// getCgroupSubsystemsV1 returns information about the mounted cgroup v1 subsystems +func getCgroupSubsystemsV1() (*CgroupSubsystems, error) { // get all cgroup mounts. allCgroups, err := libcontainercgroups.GetCgroupMounts(true) if err != nil { @@ -203,6 +206,41 @@ func GetCgroupSubsystems() (*CgroupSubsystems, error) { }, nil } +// getCgroupSubsystemsV2 returns information about the enabled cgroup v2 subsystems +func getCgroupSubsystemsV2() (*CgroupSubsystems, error) { + content, err := ioutil.ReadFile(filepath.Join(util.CgroupRoot, "cgroup.controllers")) + if err != nil { + return nil, err + } + + mounts := []libcontainercgroups.Mount{} + controllers := strings.Fields(string(content)) + mountPoints := make(map[string]string, len(controllers)) + for _, controller := range controllers { + mountPoints[controller] = util.CgroupRoot + m := libcontainercgroups.Mount{ + Mountpoint: util.CgroupRoot, + Root: util.CgroupRoot, + Subsystems: []string{controller}, + } + mounts = append(mounts, m) + } + + return &CgroupSubsystems{ + Mounts: mounts, + MountPoints: mountPoints, + }, nil +} + +// GetCgroupSubsystems returns information about the mounted cgroup subsystems +func GetCgroupSubsystems() (*CgroupSubsystems, error) { + if libcontainercgroups.IsCgroup2UnifiedMode() { + return getCgroupSubsystemsV2() + } + + return getCgroupSubsystemsV1() +} + // getCgroupProcs takes a cgroup directory name as an argument // reads through the cgroup's procs file and returns a list of tgid's. // It returns an empty list if a procs file doesn't exists diff --git a/pkg/kubelet/cm/util/cgroups_linux.go b/pkg/kubelet/cm/util/cgroups_linux.go index ceeada7aa9f..b7019a95088 100644 --- a/pkg/kubelet/cm/util/cgroups_linux.go +++ b/pkg/kubelet/cm/util/cgroups_linux.go @@ -23,19 +23,35 @@ import ( libcontainerutils "github.com/opencontainers/runc/libcontainer/utils" ) +const ( + // CgroupRoot is the base path where cgroups are mounted + CgroupRoot = "/sys/fs/cgroup" +) + // GetPids gets pids of the desired cgroup // Forked from opencontainers/runc/libcontainer/cgroup/fs.Manager.GetPids() func GetPids(cgroupPath string) ([]int, error) { - dir, err := getCgroupPath(cgroupPath) - if err != nil { - return nil, err + dir := "" + + if libcontainercgroups.IsCgroup2UnifiedMode() { + path, err := filepath.Rel("/", cgroupPath) + if err != nil { + return nil, err + } + dir = filepath.Join(CgroupRoot, path) + } else { + var err error + dir, err = getCgroupV1Path(cgroupPath) + if err != nil { + return nil, err + } } return libcontainercgroups.GetPids(dir) } -// getCgroupPath gets the file path to the "devices" subsystem of the desired cgroup. +// getCgroupV1Path gets the file path to the "devices" subsystem of the desired cgroup. // cgroupPath is the path in the cgroup hierarchy. -func getCgroupPath(cgroupPath string) (string, error) { +func getCgroupV1Path(cgroupPath string) (string, error) { cgroupPath = libcontainerutils.CleanPath(cgroupPath) mnt, root, err := libcontainercgroups.FindCgroupMountpointAndRoot(cgroupPath, "devices") @@ -50,7 +66,7 @@ func getCgroupPath(cgroupPath string) (string, error) { return filepath.Join(root, mnt, cgroupPath), nil } - parentPath, err := getCgroupParentPath(mnt, root) + parentPath, err := getCgroupV1ParentPath(mnt, root) if err != nil { return "", err } @@ -58,8 +74,8 @@ func getCgroupPath(cgroupPath string) (string, error) { return filepath.Join(parentPath, cgroupPath), nil } -// getCgroupParentPath gets the parent filepath to this cgroup, for resolving relative cgroup paths. -func getCgroupParentPath(mountpoint, root string) (string, error) { +// getCgroupV1ParentPath gets the parent filepath to this cgroup, for resolving relative cgroup paths. +func getCgroupV1ParentPath(mountpoint, root string) (string, error) { // Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating // process could in container and shared pid namespace with host, and // /proc/1/cgroup could point to whole other world of cgroups.