From 2294f09e4e1e2f57be26267bfe464da3204cb63b Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Mon, 7 May 2018 17:15:23 -0700 Subject: [PATCH] add memcg notifications for allocatable cgroup --- pkg/kubelet/eviction/eviction_manager.go | 38 +++++++++++++++++++----- pkg/kubelet/eviction/types.go | 2 ++ pkg/kubelet/kubelet.go | 1 + 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index f580e2951b6..5fe729bdb5f 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -184,7 +184,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool { return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure) } -func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, hard bool, handler thresholdNotifierHandlerFunc) error { +func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, hard, allocatable bool, handler thresholdNotifierHandlerFunc) error { for _, threshold := range m.config.Thresholds { if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) { continue @@ -193,19 +193,27 @@ func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, ha if err != nil { return err } - // TODO add support for eviction from --cgroup-root cgpath, found := cgroups.MountPoints["memory"] if !found || len(cgpath) == 0 { return fmt.Errorf("memory cgroup mount point not found") } attribute := "memory.usage_in_bytes" - if summary.Node.Memory == nil || summary.Node.Memory.UsageBytes == nil || summary.Node.Memory.WorkingSetBytes == nil { + memoryStats := summary.Node.Memory + if allocatable { + cgpath += m.config.PodCgroupRoot + allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods) + if err != nil { + return err + } + memoryStats = allocatableContainer.Memory + } + if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil { return fmt.Errorf("summary was incomplete") } // Set threshold on usage to capacity - eviction_hard + inactive_file, // since we want to be notified when working_set = capacity - eviction_hard - inactiveFile := resource.NewQuantity(int64(*summary.Node.Memory.UsageBytes-*summary.Node.Memory.WorkingSetBytes), resource.BinarySI) - capacity := resource.NewQuantity(int64(*summary.Node.Memory.AvailableBytes+*summary.Node.Memory.WorkingSetBytes), resource.BinarySI) + inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI) + capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI) evictionThresholdQuantity := evictionapi.GetThresholdQuantity(threshold.Value, capacity) memcgThreshold := capacity.DeepCopy() memcgThreshold.Sub(*evictionThresholdQuantity) @@ -255,22 +263,38 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act if m.config.KernelMemcgNotification && m.notifierStopCh.Reset() { glog.V(4).Infof("eviction manager attempting to integrate with kernel memcg notification api") // start soft memory notification - err = m.startMemoryThresholdNotifier(summary, false, func(desc string) { + err = m.startMemoryThresholdNotifier(summary, false, false, func(desc string) { glog.Infof("soft memory eviction threshold crossed at %s", desc) // TODO wait grace period for soft memory limit m.synchronize(diskInfoProvider, podFunc) }) if err != nil { glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err) + } // start soft memory notification + err = m.startMemoryThresholdNotifier(summary, false, true, func(desc string) { + glog.Infof("soft allocatable memory eviction threshold crossed at %s", desc) + // TODO wait grace period for soft memory limit + m.synchronize(diskInfoProvider, podFunc) + }) + if err != nil { + glog.Warningf("eviction manager: failed to create allocatable soft memory threshold notifier: %v", err) } // start hard memory notification - err = m.startMemoryThresholdNotifier(summary, true, func(desc string) { + err = m.startMemoryThresholdNotifier(summary, true, false, func(desc string) { glog.Infof("hard memory eviction threshold crossed at %s", desc) m.synchronize(diskInfoProvider, podFunc) }) if err != nil { glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err) } + // start hard memory notification + err = m.startMemoryThresholdNotifier(summary, true, true, func(desc string) { + glog.Infof("hard allocatable memory eviction threshold crossed at %s", desc) + m.synchronize(diskInfoProvider, podFunc) + }) + if err != nil { + glog.Warningf("eviction manager: failed to create hard allocatable memory threshold notifier: %v", err) + } } // make observations and get a function to derive pod usage stats relative to those observations. diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index cc192b8b5e7..26e0432fd9e 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -48,6 +48,8 @@ type Config struct { Thresholds []evictionapi.Threshold // KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed. KernelMemcgNotification bool + // PodCgroupRoot is the cgroup which contains all pods. + PodCgroupRoot string } // Manager evaluates when an eviction threshold for node stability has been met on the node. diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 05a5064c603..b0cbdc410b8 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -438,6 +438,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod), Thresholds: thresholds, KernelMemcgNotification: experimentalKernelMemcgNotification, + PodCgroupRoot: kubeDeps.ContainerManager.GetPodCgroupRoot(), } serviceIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})