mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #99095 from maxlaverse/fix_kubelet_stuck_in_diskpressure
Prevent Kubelet from getting stuck in DiskPressure when imagefs minReclaim is set
This commit is contained in:
commit
8b057cdfa4
@ -437,8 +437,9 @@ func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Sign
|
||||
observations, _ := makeSignalObservations(summary)
|
||||
debugLogObservations("observations after resource reclaim", observations)
|
||||
|
||||
// determine the set of thresholds met independent of grace period
|
||||
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
|
||||
// evaluate all thresholds independently of their grace period to see if with
|
||||
// the new observations, we think we have met min reclaim goals
|
||||
thresholds := thresholdsMet(m.config.Thresholds, observations, true)
|
||||
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
|
||||
|
||||
if len(thresholds) == 0 {
|
||||
|
@ -886,6 +886,51 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// synchronize
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// induce hard threshold
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
|
||||
// make GC return disk usage bellow the threshold, but not satisfying minReclaim
|
||||
diskGC.summaryAfterGC = summaryStatsMaker("1.1Gi", "200Gi", podStats)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should report disk pressure since soft threshold was met")
|
||||
}
|
||||
|
||||
// verify image gc was invoked
|
||||
if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
|
||||
t.Errorf("Manager should have invoked image gc")
|
||||
}
|
||||
|
||||
// verify a pod was killed because image gc was not enough to satisfy minReclaim
|
||||
if podKiller.pod == nil {
|
||||
t.Errorf("Manager should have killed a pod, but didn't")
|
||||
}
|
||||
|
||||
// reset state
|
||||
diskGC.imageGCInvoked = false
|
||||
diskGC.containerGCInvoked = false
|
||||
podKiller.pod = nil
|
||||
|
||||
// remove disk pressure
|
||||
fakeClock.Step(20 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// induce disk pressure!
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
|
||||
|
Loading…
Reference in New Issue
Block a user