Merge pull request #108597 from kolyshkin/prepare-for-runc-1.1

kubelet/cm: refactor, prepare for runc 1.1 bump
This commit is contained in:
Kubernetes Prow Robot 2022-03-23 11:20:30 -07:00 committed by GitHub
commit 75b19b242c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 67 additions and 79 deletions

View File

@ -237,20 +237,41 @@ func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string {
return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName)
} }
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. // libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config.
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup {
// and split it appropriately, using essentially the logic below. config := &libcontainerconfigs.Cgroup{}
// This was done for cgroupfs in opencontainers/runc#497 but a counterpart if needResources {
// for systemd was never introduced. config.Resources = m.toResources(in.ResourceParameters)
func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) { } else {
dir, base := path.Split(cgroupName.ToSystemd()) config.Resources = &libcontainerconfigs.Resources{}
}
if m.adapter.cgroupManagerType == libcontainerCgroupfs {
// For fs cgroup manager, we can either set Path or Name and Parent.
// Setting Path is easier.
config.Path = in.Name.ToCgroupfs()
return config
}
// For systemd, we have to set Name and Parent, as they are needed to talk to systemd.
// Setting Path is optional as it can be deduced from Name and Parent.
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
// and split it appropriately, using essentially the logic below.
// This was done for cgroupfs in opencontainers/runc#497 but a counterpart
// for systemd was never introduced.
dir, base := path.Split(in.Name.ToSystemd())
if dir == "/" { if dir == "/" {
dir = "-.slice" dir = "-.slice"
} else { } else {
dir = path.Base(dir) dir = path.Base(dir)
} }
cgroupConfig.Parent = dir config.Parent = dir
cgroupConfig.Name = base config.Name = base
return config
} }
// Validate checks if all subsystem cgroups already exist // Validate checks if all subsystem cgroups already exist
@ -316,14 +337,7 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{} libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, false)
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths) manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths)
if err != nil { if err != nil {
@ -407,8 +421,34 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
if resourceConfig.PidsLimit != nil { if resourceConfig.PidsLimit != nil {
resources.PidsLimit = *resourceConfig.PidsLimit resources.PidsLimit = *resourceConfig.PidsLimit
} }
// if huge pages are enabled, we set them in libcontainer
// for each page size enumerated, set that value m.maybeSetHugetlb(resourceConfig, resources)
// Ideally unified is used for all the resources when running on cgroup v2.
// It doesn't make difference for the memory.max limit, but for e.g. the cpu controller
// you can specify the correct setting without relying on the conversions performed by the OCI runtime.
if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() {
resources.Unified = make(map[string]string)
for k, v := range resourceConfig.Unified {
resources.Unified[k] = v
}
}
return resources
}
func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) {
// Check if hugetlb is supported.
if libcontainercgroups.IsCgroup2UnifiedMode() {
if !getSupportedUnifiedControllers().Has("hugetlb") {
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
return
}
} else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok {
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
return
}
// For each page size enumerated, set that value.
pageSizes := sets.NewString() pageSizes := sets.NewString()
for pageSize, limit := range resourceConfig.HugePageLimit { for pageSize, limit := range resourceConfig.HugePageLimit {
sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize) sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize)
@ -432,16 +472,6 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
Limit: uint64(0), Limit: uint64(0),
}) })
} }
// Ideally unified is used for all the resources when running on cgroup v2.
// It doesn't make difference for the memory.max limit, but for e.g. the cpu controller
// you can specify the correct setting without relying on the conversions performed by the OCI runtime.
if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() {
resources.Unified = make(map[string]string)
for k, v := range resourceConfig.Unified {
resources.Unified[k] = v
}
}
return resources
} }
// Update updates the cgroup with the specified Cgroup Configuration // Update updates the cgroup with the specified Cgroup Configuration
@ -451,13 +481,7 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start))
}() }()
// Extract the cgroup resource parameters libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true)
resourceConfig := cgroupConfig.ResourceParameters
resources := m.toResources(resourceConfig)
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Resources: resources,
}
unified := libcontainercgroups.IsCgroup2UnifiedMode() unified := libcontainercgroups.IsCgroup2UnifiedMode()
var paths map[string]string var paths map[string]string
@ -467,32 +491,11 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
paths = m.buildCgroupPaths(cgroupConfig.Name) paths = m.buildCgroupPaths(cgroupConfig.Name)
} }
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
}
if cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil {
resources.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
}
if unified {
supportedControllers := getSupportedUnifiedControllers()
if !supportedControllers.Has("hugetlb") {
resources.HugetlbLimit = nil
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
}
} else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok {
resources.HugetlbLimit = nil
klog.V(6).InfoS("Optional subsystem not supported: hugetlb")
}
manager, err := m.adapter.newManager(libcontainerCgroupConfig, paths) manager, err := m.adapter.newManager(libcontainerCgroupConfig, paths)
if err != nil { if err != nil {
return fmt.Errorf("failed to create cgroup manager: %v", err) return fmt.Errorf("failed to create cgroup manager: %v", err)
} }
return manager.Set(resources) return manager.Set(libcontainerCgroupConfig.Resources)
} }
// Create creates the specified cgroup // Create creates the specified cgroup
@ -502,22 +505,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start))
}() }()
resources := m.toResources(cgroupConfig.ResourceParameters) libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true)
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Resources: resources,
}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
}
// get the manager with the specified cgroup configuration // get the manager with the specified cgroup configuration
manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil) manager, err := m.adapter.newManager(libcontainerCgroupConfig, nil)
@ -537,8 +525,8 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
// it may confuse why we call set after we do apply, but the issue is that runc // it may confuse why we call set after we do apply, but the issue is that runc
// follows a similar pattern. it's needed to ensure cpu quota is set properly. // follows a similar pattern. it's needed to ensure cpu quota is set properly.
if err := m.Update(cgroupConfig); err != nil { if err := manager.Set(libcontainerCgroupConfig.Resources); err != nil {
utilruntime.HandleError(fmt.Errorf("cgroup update failed %v", err)) utilruntime.HandleError(fmt.Errorf("cgroup manager.Set failed: %w", err))
} }
return nil return nil

View File

@ -136,6 +136,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface. // enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error { func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error {
rp := getCgroupConfig(rl) rp := getCgroupConfig(rl)
if rp == nil {
return fmt.Errorf("%q cgroup is not configured properly", cName)
}
// Enforce MemoryQoS for cgroups of kube-reserved/system-reserved. For more information, // Enforce MemoryQoS for cgroups of kube-reserved/system-reserved. For more information,
// see https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos // see https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos
@ -152,9 +155,6 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
Name: cName, Name: cName,
ResourceParameters: rp, ResourceParameters: rp,
} }
if cgroupConfig.ResourceParameters == nil {
return fmt.Errorf("%q cgroup is not config properly", cgroupConfig.Name)
}
klog.V(4).InfoS("Enforcing limits on cgroup", "cgroupName", cName, "cpuShares", cgroupConfig.ResourceParameters.CpuShares, "memory", cgroupConfig.ResourceParameters.Memory, "pidsLimit", cgroupConfig.ResourceParameters.PidsLimit) klog.V(4).InfoS("Enforcing limits on cgroup", "cgroupName", cName, "cpuShares", cgroupConfig.ResourceParameters.CpuShares, "memory", cgroupConfig.ResourceParameters.Memory, "pidsLimit", cgroupConfig.ResourceParameters.PidsLimit)
if err := cgroupManager.Validate(cgroupConfig.Name); err != nil { if err := cgroupManager.Validate(cgroupConfig.Name); err != nil {
return err return err