diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 85cf4584161..3a274e2839f 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -191,6 +191,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) { } evictionConfig := eviction.Config{ PressureTransitionPeriod: s.EvictionPressureTransitionPeriod.Duration, + MaxPodGracePeriodSeconds: int64(s.EvictionMaxPodGracePeriod), Thresholds: thresholds, } diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 7a0e3b2f4c0..d5b95ab1975 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -551,3 +551,16 @@ func reclaimResources(thresholds []Threshold) []api.ResourceName { } return results } + +// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds +func isSoftEviction(thresholds []Threshold, starvedResource api.ResourceName) bool { + for _, threshold := range thresholds { + if resourceToCheck := signalToResource[threshold.Signal]; resourceToCheck != starvedResource { + continue + } + if threshold.GracePeriod == time.Duration(0) { + return false + } + } + return true +} diff --git a/pkg/kubelet/eviction/manager.go b/pkg/kubelet/eviction/manager.go index 8a6afbc3cfb..a672ebb9b88 100644 --- a/pkg/kubelet/eviction/manager.go +++ b/pkg/kubelet/eviction/manager.go @@ -158,7 +158,7 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) { // determine the set of resources under starvation starvedResources := reclaimResources(thresholds) if len(starvedResources) == 0 { - glog.Infof("eviction manager: no resources are starved") + glog.V(3).Infof("eviction manager: no resources are starved") return } @@ -167,6 +167,9 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) { resourceToReclaim := starvedResources[0] glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim) + // determine if this is a soft or hard eviction associated with the resource + softEviction := isSoftEviction(thresholds, resourceToReclaim) + // record an event about the resources we are now attempting to reclaim via eviction m.recorder.Eventf(m.nodeRef, api.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim) @@ -199,8 +202,10 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) { } // record that we are evicting the pod m.recorder.Eventf(pod, api.EventTypeWarning, reason, message) - // TODO this needs to be based on soft or hard eviction threshold being met, soft eviction will allow a configured value. gracePeriodOverride := int64(0) + if softEviction { + gracePeriodOverride = m.config.MaxPodGracePeriodSeconds + } // this is a blocking call and should only return when the pod and its containers are killed. err := m.killPodFunc(pod, status, &gracePeriodOverride) if err != nil { diff --git a/pkg/kubelet/eviction/manager_test.go b/pkg/kubelet/eviction/manager_test.go index a936a2f18d8..c51dbd8200d 100644 --- a/pkg/kubelet/eviction/manager_test.go +++ b/pkg/kubelet/eviction/manager_test.go @@ -98,6 +98,7 @@ func TestMemoryPressure(t *testing.T) { nodeRef := &api.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ + MaxPodGracePeriodSeconds: 5, PressureTransitionPeriod: time.Minute * 5, Thresholds: []Threshold{ { @@ -105,6 +106,12 @@ func TestMemoryPressure(t *testing.T) { Operator: OpLessThan, Value: quantityMustParse("1Gi"), }, + { + Signal: SignalMemoryAvailable, + Operator: OpLessThan, + Value: quantityMustParse("2Gi"), + GracePeriod: time.Minute * 2, + }, }, } summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)} @@ -139,6 +146,56 @@ func TestMemoryPressure(t *testing.T) { } } + // induce soft threshold + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("1500Mi", podStats) + manager.synchronize(activePodsFunc) + + // we should have memory pressure + if !manager.IsUnderMemoryPressure() { + t.Errorf("Manager should report memory pressure since soft threshold was met") + } + + // verify no pod was yet killed because there has not yet been enough time passed. + if podKiller.pod != nil { + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod) + } + + // step forward in time pass the grace period + fakeClock.Step(3 * time.Minute) + summaryProvider.result = summaryStatsMaker("1500Mi", podStats) + manager.synchronize(activePodsFunc) + + // we should have memory pressure + if !manager.IsUnderMemoryPressure() { + t.Errorf("Manager should report memory pressure since soft threshold was met") + } + + // verify the right pod was killed with the right grace period. + if podKiller.pod != pods[0] { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod := *podKiller.gracePeriodOverride + if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod) + } + // reset state + podKiller.pod = nil + podKiller.gracePeriodOverride = nil + + // remove memory pressure + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker("3Gi", podStats) + manager.synchronize(activePodsFunc) + + // we should not have memory pressure + if manager.IsUnderMemoryPressure() { + t.Errorf("Manager should not report memory pressure") + } + // induce memory pressure! fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("500Mi", podStats) @@ -153,6 +210,10 @@ func TestMemoryPressure(t *testing.T) { if podKiller.pod != pods[0] { t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) } + observedGracePeriod = *podKiller.gracePeriodOverride + if observedGracePeriod != int64(0) { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) + } // the best-effort pod should not admit, burstable should expected = []bool{false, true} diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index d75406d1b45..3b4470d7c7b 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -44,6 +44,8 @@ const ( type Config struct { // PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition. PressureTransitionPeriod time.Duration + // Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. + MaxPodGracePeriodSeconds int64 // Thresholds define the set of conditions monitored to trigger eviction. Thresholds []Threshold }