Add a warning log, event and metric for cgroup version 1

Signed-off-by: Harshal Patil <harpatil@redhat.com>
This commit is contained in:
Harshal Patil 2024-07-02 13:09:25 -04:00
parent 5c9d4fa23d
commit 68d317a8d1
9 changed files with 46 additions and 0 deletions

View File

@ -41,6 +41,10 @@ func NewCgroupManager(_ interface{}) CgroupManager {
return &unsupportedCgroupManager{}
}
func (m *unsupportedCgroupManager) Version() int {
return 0
}
func (m *unsupportedCgroupManager) Name(_ CgroupName) string {
return ""
}

View File

@ -48,6 +48,11 @@ func NewCgroupV1Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager
}
}
// Version of the cgroup implementation on the host
func (c *cgroupV1impl) Version() int {
return 1
}
// Validate checks if all subsystem cgroups are valid
func (c *cgroupV1impl) Validate(name CgroupName) error {
// Get map of all cgroup paths on the system for the particular cgroup

View File

@ -47,6 +47,11 @@ func NewCgroupV2Manager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager
}
}
// Version of the cgroup implementation on the host
func (c *cgroupV2impl) Version() int {
return 2
}
// Validate checks if all subsystem cgroups are valid
func (c *cgroupV2impl) Validate(name CgroupName) error {
cgroupPath := c.buildCgroupUnifiedPath(name)

View File

@ -42,6 +42,11 @@ import (
"k8s.io/utils/cpuset"
)
const (
// Warning message for the users still using cgroup v1
CgroupV1MaintenanceModeWarning = "Cgroup v1 support is in maintenance mode, please migrate to Cgroup v2."
)
type ActivePodsFunc func() []*v1.Pod
// Manages the containers running on a machine.
@ -159,6 +164,7 @@ type NodeConfig struct {
CPUCFSQuotaPeriod time.Duration
TopologyManagerPolicy string
TopologyManagerPolicyOptions map[string]string
CgroupVersion int
}
type NodeAllocatableConfig struct {

View File

@ -245,6 +245,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName
cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot)
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
nodeConfig.CgroupVersion = cgroupManager.Version()
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
// this does default to / when enabled, but this tests against regressions.

View File

@ -88,6 +88,8 @@ type CgroupManager interface {
GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error)
// Set resource config for the specified resource type on the cgroup
SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error
// Version of the cgroup implementation on the host
Version() int
}
// QOSContainersInfo stores the names of containers per qos

View File

@ -76,6 +76,7 @@ const (
FailedMountOnFilesystemMismatch = "FailedMountOnFilesystemMismatch"
FailedPrepareDynamicResources = "FailedPrepareDynamicResources"
PossibleMemoryBackedVolumesOnDisk = "PossibleMemoryBackedVolumesOnDisk"
CgroupV1 = "CgroupV1"
)
// Image manager event reason list

View File

@ -1636,6 +1636,8 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
os.Exit(1)
}
kl.warnCgroupV1Usage()
// Start volume manager
go kl.volumeManager.Run(kl.sourcesReady, wait.NeverStop)
@ -3021,3 +3023,12 @@ func (kl *Kubelet) PrepareDynamicResources(pod *v1.Pod) error {
func (kl *Kubelet) UnprepareDynamicResources(pod *v1.Pod) error {
return kl.containerManager.UnprepareDynamicResources(pod)
}
func (kl *Kubelet) warnCgroupV1Usage() {
cgroupVersion := kl.containerManager.GetNodeConfig().CgroupVersion
if cgroupVersion == 1 {
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.CgroupV1, cm.CgroupV1MaintenanceModeWarning)
klog.V(2).InfoS("Warning: cgroup v1", "message", cm.CgroupV1MaintenanceModeWarning)
}
metrics.CgroupVersion.Set(float64(cgroupVersion))
}

View File

@ -72,6 +72,7 @@ const (
OrphanedRuntimePodTotalKey = "orphaned_runtime_pods_total"
RestartedPodTotalKey = "restarted_pods_total"
ImagePullDurationKey = "image_pull_duration_seconds"
CgroupVersionKey = "cgroup_version"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations_total"
@ -907,6 +908,15 @@ var (
StabilityLevel: metrics.ALPHA,
},
)
CgroupVersion = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: CgroupVersionKey,
Help: "cgroup version on the hosts.",
StabilityLevel: metrics.ALPHA,
},
)
)
var registerMetrics sync.Once
@ -996,6 +1006,7 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(LifecycleHandlerHTTPFallbacks)
legacyregistry.MustRegister(LifecycleHandlerSleepTerminated)
legacyregistry.MustRegister(CgroupVersion)
})
}