Merge pull request #25772 from derekwaynecarr/eviction-max-grace

Automatic merge from submit-queue Add support for limiting grace period during soft eviction Adds eviction manager support in kubelet for max pod graceful termination period when a soft eviction is met. ```release-note Kubelet evicts pods when available memory falls below configured eviction thresholds ``` /cc @vishh
2025-08-07 19:23:40 +00:00 · 2016-05-21 12:43:45 -07:00 · 2016-05-21 12:43:45 -07:00 · 4bb085c927
commit 4bb085c927
parent 682c188fc8 2a1d3faf08
5 changed files with 84 additions and 2 deletions
--- a/cmd/kubelet/app/server.go
+++ b/cmd/kubelet/app/server.go
@ -191,6 +191,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
 	}
 	evictionConfig := eviction.Config{
 		PressureTransitionPeriod: s.EvictionPressureTransitionPeriod.Duration,
 		MaxPodGracePeriodSeconds: int64(s.EvictionMaxPodGracePeriod),
 		Thresholds:               thresholds,
 	}
--- a/pkg/kubelet/eviction/helpers.go
+++ b/pkg/kubelet/eviction/helpers.go
@ -551,3 +551,16 @@ func reclaimResources(thresholds []Threshold) []api.ResourceName {
 	}
 	return results
 }
 // isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
 func isSoftEviction(thresholds []Threshold, starvedResource api.ResourceName) bool {
 	for _, threshold := range thresholds {
 		if resourceToCheck := signalToResource[threshold.Signal]; resourceToCheck != starvedResource {
 			continue
 		}
 		if threshold.GracePeriod == time.Duration(0) {
 			return false
 		}
 	}
 	return true
 }
--- a/pkg/kubelet/eviction/manager.go
+++ b/pkg/kubelet/eviction/manager.go
@ -158,7 +158,7 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) {
 	// determine the set of resources under starvation
 	starvedResources := reclaimResources(thresholds)
 	if len(starvedResources) == 0 {
-		glog.Infof("eviction manager: no resources are starved")
+		glog.V(3).Infof("eviction manager: no resources are starved")
 		return
 	}
@ -167,6 +167,9 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) {
 	resourceToReclaim := starvedResources[0]
 	glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim)
 	// determine if this is a soft or hard eviction associated with the resource
 	softEviction := isSoftEviction(thresholds, resourceToReclaim)
 	// record an event about the resources we are now attempting to reclaim via eviction
 	m.recorder.Eventf(m.nodeRef, api.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
@ -199,8 +202,10 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) {
 		}
 		// record that we are evicting the pod
 		m.recorder.Eventf(pod, api.EventTypeWarning, reason, message)
 		// TODO this needs to be based on soft or hard eviction threshold being met, soft eviction will allow a configured value.
 		gracePeriodOverride := int64(0)
 		if softEviction {
 			gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
 		}
 		// this is a blocking call and should only return when the pod and its containers are killed.
 		err := m.killPodFunc(pod, status, &gracePeriodOverride)
 		if err != nil {
--- a/pkg/kubelet/eviction/manager_test.go
+++ b/pkg/kubelet/eviction/manager_test.go
@ -98,6 +98,7 @@ func TestMemoryPressure(t *testing.T) {
 	nodeRef := &api.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
 	config := Config{
 		MaxPodGracePeriodSeconds: 5,
 		PressureTransitionPeriod: time.Minute * 5,
 		Thresholds: []Threshold{
 			{
@ -105,6 +106,12 @@ func TestMemoryPressure(t *testing.T) {
 				Operator: OpLessThan,
 				Value:    quantityMustParse("1Gi"),
 			},
 			{
 				Signal:      SignalMemoryAvailable,
 				Operator:    OpLessThan,
 				Value:       quantityMustParse("2Gi"),
 				GracePeriod: time.Minute * 2,
 			},
 		},
 	}
 	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
@ -139,6 +146,56 @@ func TestMemoryPressure(t *testing.T) {
 		}
 	}
 	// induce soft threshold
 	fakeClock.Step(1 * time.Minute)
 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
 	manager.synchronize(activePodsFunc)
 	// we should have memory pressure
 	if !manager.IsUnderMemoryPressure() {
 		t.Errorf("Manager should report memory pressure since soft threshold was met")
 	}
 	// verify no pod was yet killed because there has not yet been enough time passed.
 	if podKiller.pod != nil {
 		t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod)
 	}
 	// step forward in time pass the grace period
 	fakeClock.Step(3 * time.Minute)
 	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
 	manager.synchronize(activePodsFunc)
 	// we should have memory pressure
 	if !manager.IsUnderMemoryPressure() {
 		t.Errorf("Manager should report memory pressure since soft threshold was met")
 	}
 	// verify the right pod was killed with the right grace period.
 	if podKiller.pod != pods[0] {
 		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0])
 	}
 	if podKiller.gracePeriodOverride == nil {
 		t.Errorf("Manager chose to kill pod but should have had a grace period override.")
 	}
 	observedGracePeriod := *podKiller.gracePeriodOverride
 	if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
 		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
 	}
 	// reset state
 	podKiller.pod = nil
 	podKiller.gracePeriodOverride = nil
 	// remove memory pressure
 	fakeClock.Step(20 * time.Minute)
 	summaryProvider.result = summaryStatsMaker("3Gi", podStats)
 	manager.synchronize(activePodsFunc)
 	// we should not have memory pressure
 	if manager.IsUnderMemoryPressure() {
 		t.Errorf("Manager should not report memory pressure")
 	}
 	// induce memory pressure!
 	fakeClock.Step(1 * time.Minute)
 	summaryProvider.result = summaryStatsMaker("500Mi", podStats)
@ -153,6 +210,10 @@ func TestMemoryPressure(t *testing.T) {
 	if podKiller.pod != pods[0] {
 		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0])
 	}
 	observedGracePeriod = *podKiller.gracePeriodOverride
 	if observedGracePeriod != int64(0) {
 		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 0, observedGracePeriod)
 	}
 	// the best-effort pod should not admit, burstable should
 	expected = []bool{false, true}
--- a/pkg/kubelet/eviction/types.go
+++ b/pkg/kubelet/eviction/types.go
@ -44,6 +44,8 @@ const (
 type Config struct {
 	// PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition.
 	PressureTransitionPeriod time.Duration
 	// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
 	MaxPodGracePeriodSeconds int64
 	// Thresholds define the set of conditions monitored to trigger eviction.
 	Thresholds []Threshold
 }