From 8f2bc39f7224601a72ed735008f2a36659016579 Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Mon, 7 Mar 2022 07:23:09 -0800 Subject: [PATCH] kubelet: cgroups: be verbose about validation Previously, callers of `Exists()` would not know why the cGroup was or was not existing. In one call-site in particular, the `kubelet` would entirely fail to start if the cGroup validation did not succeed. In these cases we MUST explain what went wrong and pass that information clearly to the caller. Previously, some but not all of the reasons for invalidation were logged at a low log-level instead. This led to poor UX. The original method was retained on the interface so as to make this diff small. Signed-off-by: Steve Kuznetsov --- pkg/kubelet/cm/cgroup_manager_linux.go | 21 +++++++++++-------- pkg/kubelet/cm/cgroup_manager_unsupported.go | 4 ++++ pkg/kubelet/cm/container_manager_linux.go | 4 ++-- .../cm/node_container_manager_linux.go | 4 ++-- pkg/kubelet/cm/types.go | 2 ++ 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 230173690d5..04b88a22de8 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -253,21 +253,20 @@ func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupNam cgroupConfig.Name = base } -// Exists checks if all subsystem cgroups already exist -func (m *cgroupManagerImpl) Exists(name CgroupName) bool { +// Validate checks if all subsystem cgroups already exist +func (m *cgroupManagerImpl) Validate(name CgroupName) error { if libcontainercgroups.IsCgroup2UnifiedMode() { cgroupPath := m.buildCgroupUnifiedPath(name) neededControllers := getSupportedUnifiedControllers() enabledControllers, err := readUnifiedControllers(cgroupPath) if err != nil { - return false + return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) } difference := neededControllers.Difference(enabledControllers) if difference.Len() > 0 { - klog.V(4).InfoS("The cgroup has some missing controllers", "cgroupName", name, "controllers", difference) - return false + return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(difference.List(), ", ")) } - return true + return nil // valid V2 cgroup } // Get map of all cgroup paths on the system for the particular cgroup @@ -297,11 +296,15 @@ func (m *cgroupManagerImpl) Exists(name CgroupName) bool { } if len(missingPaths) > 0 { - klog.V(4).InfoS("The cgroup has some missing paths", "cgroupName", name, "paths", missingPaths) - return false + return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) } - return true + return nil // valid V1 cgroup +} + +// Exists checks if all subsystem cgroups already exist +func (m *cgroupManagerImpl) Exists(name CgroupName) bool { + return m.Validate(name) == nil } // Destroy destroys the specified cgroup diff --git a/pkg/kubelet/cm/cgroup_manager_unsupported.go b/pkg/kubelet/cm/cgroup_manager_unsupported.go index 527ddfc444a..684a1f12f29 100644 --- a/pkg/kubelet/cm/cgroup_manager_unsupported.go +++ b/pkg/kubelet/cm/cgroup_manager_unsupported.go @@ -41,6 +41,10 @@ func (m *unsupportedCgroupManager) Name(_ CgroupName) string { return "" } +func (m *unsupportedCgroupManager) Validate(_ CgroupName) error { + return errNotSupported +} + func (m *unsupportedCgroupManager) Exists(_ CgroupName) bool { return false } diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 74257cd557b..6ef9721a4ae 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -257,8 +257,8 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I // of note, we always use the cgroupfs driver when performing this check since // the input is provided in that format. // this is important because we do not want any name conversion to occur. - if !cgroupManager.Exists(cgroupRoot) { - return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist", cgroupRoot) + if err := cgroupManager.Validate(cgroupRoot); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) } klog.InfoS("Container manager verified user specified cgroup-root exists", "cgroupRoot", cgroupRoot) // Include the top level cgroup for enforcing node allocatable into cgroup-root. diff --git a/pkg/kubelet/cm/node_container_manager_linux.go b/pkg/kubelet/cm/node_container_manager_linux.go index 3217be59873..e2777645da1 100644 --- a/pkg/kubelet/cm/node_container_manager_linux.go +++ b/pkg/kubelet/cm/node_container_manager_linux.go @@ -156,8 +156,8 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1. return fmt.Errorf("%q cgroup is not config properly", cgroupConfig.Name) } klog.V(4).InfoS("Enforcing limits on cgroup", "cgroupName", cName, "cpuShares", cgroupConfig.ResourceParameters.CpuShares, "memory", cgroupConfig.ResourceParameters.Memory, "pidsLimit", cgroupConfig.ResourceParameters.PidsLimit) - if !cgroupManager.Exists(cgroupConfig.Name) { - return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name) + if err := cgroupManager.Validate(cgroupConfig.Name); err != nil { + return err } if err := cgroupManager.Update(cgroupConfig); err != nil { return err diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index 83dd8fcd205..edf97334781 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -66,6 +66,8 @@ type CgroupManager interface { Destroy(*CgroupConfig) error // Update cgroup configuration. Update(*CgroupConfig) error + // Validate checks if the cgroup is valid + Validate(name CgroupName) error // Exists checks if the cgroup already exists Exists(name CgroupName) bool // Name returns the literal cgroupfs name on the host after any driver specific conversions.