From f5140d3145d7dd1eb5d4c9220772248ae22c54ec Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 10 Nov 2021 15:40:59 -0800 Subject: [PATCH] kubelet: cgroupv2 disable memcg notifications The current memory notifier on cgroupv2 relies on reading `cgroup.event_control` which is unsupported on cgroupv2. For now, let's disable the feature on cgroupv2. --- pkg/kubelet/eviction/threshold_notifier_linux.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/kubelet/eviction/threshold_notifier_linux.go b/pkg/kubelet/eviction/threshold_notifier_linux.go index 891bc4d001c..9ab907a9cd2 100644 --- a/pkg/kubelet/eviction/threshold_notifier_linux.go +++ b/pkg/kubelet/eviction/threshold_notifier_linux.go @@ -21,6 +21,7 @@ import ( "sync" "time" + libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" "golang.org/x/sys/unix" "k8s.io/klog/v2" ) @@ -46,6 +47,14 @@ var _ CgroupNotifier = &linuxCgroupNotifier{} // NewCgroupNotifier returns a linuxCgroupNotifier, which performs cgroup control operations required // to receive notifications from the cgroup when the threshold is crossed in either direction. func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) { + // cgroupv2 does not support monitoring cgroup memory thresholds using cgroup.event_control. + // Instead long term, on cgroupv2 kubelet should rely on combining usage of memory.low on root pods cgroup with inotify notifications on memory.events and or PSI pressure. + // For now, let's return a fake "disabled" cgroup notifier on cgroupv2. + // https://github.com/kubernetes/kubernetes/issues/106331 + if libcontainercgroups.IsCgroup2UnifiedMode() { + return &disabledThresholdNotifier{}, nil + } + var watchfd, eventfd, epfd, controlfd int var err error watchfd, err = unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY|unix.O_CLOEXEC, 0) @@ -183,3 +192,9 @@ func (n *linuxCgroupNotifier) Stop() { unix.Close(n.epfd) close(n.stop) } + +// disabledThresholdNotifier is a fake diasbled threshold notifier that performs no-ops. +type disabledThresholdNotifier struct{} + +func (*disabledThresholdNotifier) Start(_ chan<- struct{}) {} +func (*disabledThresholdNotifier) Stop() {}