diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 04b88a22de8..ad870baa278 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -237,20 +237,41 @@ func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string { return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) } -// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. -// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) -// and split it appropriately, using essentially the logic below. -// This was done for cgroupfs in opencontainers/runc#497 but a counterpart -// for systemd was never introduced. -func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) { - dir, base := path.Split(cgroupName.ToSystemd()) +// libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config. +func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { + config := &libcontainerconfigs.Cgroup{} + if needResources { + config.Resources = m.toResources(in.ResourceParameters) + } else { + config.Resources = &libcontainerconfigs.Resources{} + } + + if m.adapter.cgroupManagerType == libcontainerCgroupfs { + // For fs cgroup manager, we can either set Path or Name and Parent. + // Setting Path is easier. + config.Path = in.Name.ToCgroupfs() + + return config + } + + // For systemd, we have to set Name and Parent, as they are needed to talk to systemd. + // Setting Path is optional as it can be deduced from Name and Parent. + + // TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. + // It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) + // and split it appropriately, using essentially the logic below. + // This was done for cgroupfs in opencontainers/runc#497 but a counterpart + // for systemd was never introduced. + dir, base := path.Split(in.Name.ToSystemd()) if dir == "/" { dir = "-.slice" } else { dir = path.Base(dir) } - cgroupConfig.Parent = dir - cgroupConfig.Name = base + config.Parent = dir + config.Name = base + + return config } // Validate checks if all subsystem cgroups already exist @@ -316,14 +337,7 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) - libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{} - // libcontainer consumes a different field and expects a different syntax - // depending on the cgroup driver in use, so we need this conditional here. - if m.adapter.cgroupManagerType == libcontainerSystemd { - updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) - } else { - libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() - } + libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, false) manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths) if err != nil { @@ -407,8 +421,34 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont if resourceConfig.PidsLimit != nil { resources.PidsLimit = *resourceConfig.PidsLimit } - // if huge pages are enabled, we set them in libcontainer - // for each page size enumerated, set that value + + m.maybeSetHugetlb(resourceConfig, resources) + + // Ideally unified is used for all the resources when running on cgroup v2. + // It doesn't make difference for the memory.max limit, but for e.g. the cpu controller + // you can specify the correct setting without relying on the conversions performed by the OCI runtime. + if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() { + resources.Unified = make(map[string]string) + for k, v := range resourceConfig.Unified { + resources.Unified[k] = v + } + } + return resources +} + +func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { + // Check if hugetlb is supported. + if libcontainercgroups.IsCgroup2UnifiedMode() { + if !getSupportedUnifiedControllers().Has("hugetlb") { + klog.V(6).InfoS("Optional subsystem not supported: hugetlb") + return + } + } else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok { + klog.V(6).InfoS("Optional subsystem not supported: hugetlb") + return + } + + // For each page size enumerated, set that value. pageSizes := sets.NewString() for pageSize, limit := range resourceConfig.HugePageLimit { sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize) @@ -432,16 +472,6 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont Limit: uint64(0), }) } - // Ideally unified is used for all the resources when running on cgroup v2. - // It doesn't make difference for the memory.max limit, but for e.g. the cpu controller - // you can specify the correct setting without relying on the conversions performed by the OCI runtime. - if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() { - resources.Unified = make(map[string]string) - for k, v := range resourceConfig.Unified { - resources.Unified[k] = v - } - } - return resources } // Update updates the cgroup with the specified Cgroup Configuration @@ -451,13 +481,7 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) }() - // Extract the cgroup resource parameters - resourceConfig := cgroupConfig.ResourceParameters - resources := m.toResources(resourceConfig) - - libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Resources: resources, - } + libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true) unified := libcontainercgroups.IsCgroup2UnifiedMode() var paths map[string]string @@ -467,32 +491,11 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { paths = m.buildCgroupPaths(cgroupConfig.Name) } - // libcontainer consumes a different field and expects a different syntax - // depending on the cgroup driver in use, so we need this conditional here. - if m.adapter.cgroupManagerType == libcontainerSystemd { - updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) - } - - if cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil { - resources.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit - } - - if unified { - supportedControllers := getSupportedUnifiedControllers() - if !supportedControllers.Has("hugetlb") { - resources.HugetlbLimit = nil - klog.V(6).InfoS("Optional subsystem not supported: hugetlb") - } - } else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok { - resources.HugetlbLimit = nil - klog.V(6).InfoS("Optional subsystem not supported: hugetlb") - } - manager, err := m.adapter.newManager(libcontainerCgroupConfig, paths) if err != nil { return fmt.Errorf("failed to create cgroup manager: %v", err) } - return manager.Set(resources) + return manager.Set(libcontainerCgroupConfig.Resources) } // Create creates the specified cgroup @@ -502,22 +505,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) }() - resources := m.toResources(cgroupConfig.ResourceParameters) - - libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Resources: resources, - } - // libcontainer consumes a different field and expects a different syntax - // depending on the cgroup driver in use, so we need this conditional here. - if m.adapter.cgroupManagerType == libcontainerSystemd { - updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) - } else { - libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() - } - - if cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil { - libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit - } + libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true) // get the manager with the specified cgroup configuration manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil) @@ -537,8 +525,8 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { // it may confuse why we call set after we do apply, but the issue is that runc // follows a similar pattern. it's needed to ensure cpu quota is set properly. - if err := m.Update(cgroupConfig); err != nil { - utilruntime.HandleError(fmt.Errorf("cgroup update failed %v", err)) + if err := manager.Set(libcontainerCgroupConfig.Resources); err != nil { + utilruntime.HandleError(fmt.Errorf("cgroup manager.Set failed: %w", err)) } return nil diff --git a/pkg/kubelet/cm/node_container_manager_linux.go b/pkg/kubelet/cm/node_container_manager_linux.go index e2777645da1..4b35d3c4fe3 100644 --- a/pkg/kubelet/cm/node_container_manager_linux.go +++ b/pkg/kubelet/cm/node_container_manager_linux.go @@ -136,6 +136,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { // enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface. func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error { rp := getCgroupConfig(rl) + if rp == nil { + return fmt.Errorf("%q cgroup is not configured properly", cName) + } // Enforce MemoryQoS for cgroups of kube-reserved/system-reserved. For more information, // see https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos @@ -152,9 +155,6 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1. Name: cName, ResourceParameters: rp, } - if cgroupConfig.ResourceParameters == nil { - return fmt.Errorf("%q cgroup is not config properly", cgroupConfig.Name) - } klog.V(4).InfoS("Enforcing limits on cgroup", "cgroupName", cName, "cpuShares", cgroupConfig.ResourceParameters.CpuShares, "memory", cgroupConfig.ResourceParameters.Memory, "pidsLimit", cgroupConfig.ResourceParameters.PidsLimit) if err := cgroupManager.Validate(cgroupConfig.Name); err != nil { return err