mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 15:25:57 +00:00
Merge pull request #63521 from dashpole/allocatable_memcg
Automatic merge from submit-queue (batch tested with PRs 63314, 63884, 63799, 63521, 62242). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Add memcg notifications for allocatable cgroup **What this PR does / why we need it**: Use memory cgroup notifications to trigger the eviction manager when the allocatable eviction threshold is crossed. This allows the eviction manager to respond more quickly when the allocatable cgroup's available memory becomes low. Evictions are preferable to OOMs in the cgroup since the kubelet can enforce its priorities on which pod is killed. **Which issue(s) this PR fixes**: Fixes https://github.com/kubernetes/kubernetes/issues/57901 **Special notes for your reviewer**: This adds the alloctable cgroup from the container manager to the eviction config. **Release note**: ```release-note NONE ``` /sig node /priority important-soon /kind feature I would like this to be included in the 1.11 release.
This commit is contained in:
commit
6934c4f599
@ -196,7 +196,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
|
||||
return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
|
||||
}
|
||||
|
||||
func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, hard bool, handler thresholdNotifierHandlerFunc) error {
|
||||
func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, hard, allocatable bool, handler thresholdNotifierHandlerFunc) error {
|
||||
for _, threshold := range m.config.Thresholds {
|
||||
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
|
||||
continue
|
||||
@ -205,19 +205,27 @@ func (m *managerImpl) startMemoryThresholdNotifier(summary *statsapi.Summary, ha
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO add support for eviction from --cgroup-root
|
||||
cgpath, found := cgroups.MountPoints["memory"]
|
||||
if !found || len(cgpath) == 0 {
|
||||
return fmt.Errorf("memory cgroup mount point not found")
|
||||
}
|
||||
attribute := "memory.usage_in_bytes"
|
||||
if summary.Node.Memory == nil || summary.Node.Memory.UsageBytes == nil || summary.Node.Memory.WorkingSetBytes == nil {
|
||||
memoryStats := summary.Node.Memory
|
||||
if allocatable {
|
||||
cgpath += m.config.PodCgroupRoot
|
||||
allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
memoryStats = allocatableContainer.Memory
|
||||
}
|
||||
if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil {
|
||||
return fmt.Errorf("summary was incomplete")
|
||||
}
|
||||
// Set threshold on usage to capacity - eviction_hard + inactive_file,
|
||||
// since we want to be notified when working_set = capacity - eviction_hard
|
||||
inactiveFile := resource.NewQuantity(int64(*summary.Node.Memory.UsageBytes-*summary.Node.Memory.WorkingSetBytes), resource.BinarySI)
|
||||
capacity := resource.NewQuantity(int64(*summary.Node.Memory.AvailableBytes+*summary.Node.Memory.WorkingSetBytes), resource.BinarySI)
|
||||
inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||
capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(threshold.Value, capacity)
|
||||
memcgThreshold := capacity.DeepCopy()
|
||||
memcgThreshold.Sub(*evictionThresholdQuantity)
|
||||
@ -267,22 +275,38 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
if m.config.KernelMemcgNotification && m.notifierStopCh.Reset() {
|
||||
glog.V(4).Infof("eviction manager attempting to integrate with kernel memcg notification api")
|
||||
// start soft memory notification
|
||||
err = m.startMemoryThresholdNotifier(summary, false, func(desc string) {
|
||||
err = m.startMemoryThresholdNotifier(summary, false, false, func(desc string) {
|
||||
glog.Infof("soft memory eviction threshold crossed at %s", desc)
|
||||
// TODO wait grace period for soft memory limit
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
|
||||
} // start soft memory notification
|
||||
err = m.startMemoryThresholdNotifier(summary, false, true, func(desc string) {
|
||||
glog.Infof("soft allocatable memory eviction threshold crossed at %s", desc)
|
||||
// TODO wait grace period for soft memory limit
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create allocatable soft memory threshold notifier: %v", err)
|
||||
}
|
||||
// start hard memory notification
|
||||
err = m.startMemoryThresholdNotifier(summary, true, func(desc string) {
|
||||
err = m.startMemoryThresholdNotifier(summary, true, false, func(desc string) {
|
||||
glog.Infof("hard memory eviction threshold crossed at %s", desc)
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
|
||||
}
|
||||
// start hard memory notification
|
||||
err = m.startMemoryThresholdNotifier(summary, true, true, func(desc string) {
|
||||
glog.Infof("hard allocatable memory eviction threshold crossed at %s", desc)
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create hard allocatable memory threshold notifier: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// make observations and get a function to derive pod usage stats relative to those observations.
|
||||
|
@ -48,6 +48,8 @@ type Config struct {
|
||||
Thresholds []evictionapi.Threshold
|
||||
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
|
||||
KernelMemcgNotification bool
|
||||
// PodCgroupRoot is the cgroup which contains all pods.
|
||||
PodCgroupRoot string
|
||||
}
|
||||
|
||||
// Manager evaluates when an eviction threshold for node stability has been met on the node.
|
||||
|
@ -439,6 +439,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
||||
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
|
||||
Thresholds: thresholds,
|
||||
KernelMemcgNotification: experimentalKernelMemcgNotification,
|
||||
PodCgroupRoot: kubeDeps.ContainerManager.GetPodCgroupRoot(),
|
||||
}
|
||||
|
||||
serviceIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
|
||||
|
Loading…
Reference in New Issue
Block a user