Merge pull request #32213 from derekwaynecarr/log-an-event-when-eviction-cannot-kill-fast-enough

Automatic merge from submit-queue

Log an event when container runtime exceeds grace-period during eviction

While debugging flakes in eviction, I encountered scenarios where the container run-time did not evict a pod within the allowed grace period.  This could result in situations where a BE pod would not get killed fast enough and therefore a Bu pod was killed next (assuming there were no other BE pods)

/cc @mtaufen @vishh
This commit is contained in:
Kubernetes Submit Queue 2016-09-07 14:11:06 -07:00 committed by GitHub
commit 4c9ab4e856
5 changed files with 7 additions and 4 deletions

View File

@ -24,6 +24,7 @@ const (
FailedToStartContainer = "Failed"
KillingContainer = "Killing"
BackOffStartContainer = "BackOff"
ExceededGracePeriod = "ExceededGracePeriod"
// Image event reason list
PullingImage = "Pulling"

View File

@ -701,7 +701,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs()
// setup eviction manager
evictionManager, evictionAdmitHandler, err := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock)
evictionManager, evictionAdmitHandler, err := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.imageManager, kubeDeps.Recorder, nodeRef, klet.clock)
if err != nil {
return nil, fmt.Errorf("failed to initialize eviction manager: %v", err)

View File

@ -226,7 +226,7 @@ func newTestKubeletWithImageList(
Namespace: "",
}
// setup eviction manager
evictionManager, evictionAdmitHandler, err := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock)
evictionManager, evictionAdmitHandler, err := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.imageManager, fakeRecorder, nodeRef, kubelet.clock)
require.NoError(t, err, "Failed to initialize eviction manager")
kubelet.evictionManager = evictionManager

View File

@ -282,7 +282,7 @@ func (p *podWorkers) checkForUpdates(uid types.UID) {
// killPodNow returns a KillPodFunc that can be used to kill a pod.
// It is intended to be injected into other modules that need to kill a pod.
func killPodNow(podWorkers PodWorkers) eviction.KillPodFunc {
func killPodNow(podWorkers PodWorkers, recorder record.EventRecorder) eviction.KillPodFunc {
return func(pod *api.Pod, status api.PodStatus, gracePeriodOverride *int64) error {
// determine the grace period to use when killing the pod
gracePeriod := int64(0)
@ -325,6 +325,7 @@ func killPodNow(podWorkers PodWorkers) eviction.KillPodFunc {
case r := <-ch:
return r.err
case <-time.After(timeoutDuration):
recorder.Eventf(pod, api.EventTypeWarning, events.ExceededGracePeriod, "Container runtime did not kill the pod within specified grace period.")
return fmt.Errorf("timeout waiting to kill pod")
}
}

View File

@ -331,8 +331,9 @@ func TestFakePodWorkers(t *testing.T) {
// TestKillPodNowFunc tests the blocking kill pod function works with pod workers as expected.
func TestKillPodNowFunc(t *testing.T) {
fakeRecorder := &record.FakeRecorder{}
podWorkers, processed := createPodWorkers()
killPodFunc := killPodNow(podWorkers)
killPodFunc := killPodNow(podWorkers, fakeRecorder)
pod := newPod("test", "test")
gracePeriodOverride := int64(0)
err := killPodFunc(pod, api.PodStatus{Phase: api.PodFailed, Reason: "reason", Message: "message"}, &gracePeriodOverride)