From 7e976010238fe63fc82409c5e5ff9446ee005c9c Mon Sep 17 00:00:00 2001 From: Maxime Lagresle Date: Mon, 15 Feb 2021 14:29:29 +0100 Subject: [PATCH 1/3] Prevent Kubelet stuck in DiskPressure when imagefs minReclaim is set Fixes https://github.com/kubernetes/kubernetes/issues/99094 --- pkg/kubelet/eviction/eviction_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 10d4a2c852a..a99e7d35615 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -430,7 +430,7 @@ func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Sign debugLogObservations("observations after resource reclaim", observations) // determine the set of thresholds met independent of grace period - thresholds := thresholdsMet(m.config.Thresholds, observations, false) + thresholds := thresholdsMet(m.config.Thresholds, observations, true) debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations) if len(thresholds) == 0 { From 848dc9e65f2634f2a17fe7e07cf468314f53c6f5 Mon Sep 17 00:00:00 2001 From: Maxime Lagresle Date: Mon, 22 Feb 2021 11:57:22 +0100 Subject: [PATCH 2/3] Check that minReclaim is taken into account in reclaimNodeLevelResources() --- pkg/kubelet/eviction/eviction_manager_test.go | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index fe30006e8af..3dd07b0f77f 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -886,6 +886,51 @@ func TestNodeReclaimFuncs(t *testing.T) { t.Errorf("Manager should not report disk pressure") } + // synchronize + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + + // induce hard threshold + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats) + // make GC return disk usage bellow the threshold, but not satisfying minReclaim + diskGC.summaryAfterGC = summaryStatsMaker("1.1Gi", "200Gi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure since soft threshold was met") + } + + // verify image gc was invoked + if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked { + t.Errorf("Manager should have invoked image gc") + } + + // verify a pod was killed because image gc was not enough to satisfy minReclaim + if podKiller.pod == nil { + t.Errorf("Manager should have killed a pod, but didn't") + } + + // reset state + diskGC.imageGCInvoked = false + diskGC.containerGCInvoked = false + podKiller.pod = nil + + // remove disk pressure + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + // induce disk pressure! fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats) From 63cba062eb8ce3927e2b44f4ba75ed881c30e299 Mon Sep 17 00:00:00 2001 From: Maxime Lagresle Date: Mon, 12 Apr 2021 21:20:20 +0200 Subject: [PATCH 3/3] more sensible comment --- pkg/kubelet/eviction/eviction_manager.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index a99e7d35615..f1b81e98b64 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -429,7 +429,8 @@ func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Sign observations, _ := makeSignalObservations(summary) debugLogObservations("observations after resource reclaim", observations) - // determine the set of thresholds met independent of grace period + // evaluate all thresholds independently of their grace period to see if with + // the new observations, we think we have met min reclaim goals thresholds := thresholdsMet(m.config.Thresholds, observations, true) debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)