From 2b81e751e107cbd6925d93a8acb54795d8bfe113 Mon Sep 17 00:00:00 2001 From: Janet Kuo Date: Fri, 26 Feb 2016 12:02:05 -0800 Subject: [PATCH 1/2] Better choices of what pods to kill --- pkg/controller/controller_utils.go | 52 +++++++++++++++++++++---- pkg/controller/controller_utils_test.go | 24 +++++++++++- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index fc8d2520f8b..371835ebdad 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/labels" "k8s.io/kubernetes/pkg/runtime" "k8s.io/kubernetes/pkg/util" + "k8s.io/kubernetes/pkg/util/integer" ) const CreatedByAnnotation = "kubernetes.io/created-by" @@ -409,22 +410,59 @@ func (s ActivePods) Len() int { return len(s) } func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s ActivePods) Less(i, j int) bool { - // Unassigned < assigned - if s[i].Spec.NodeName == "" && s[j].Spec.NodeName != "" { - return true + // 1. Unassigned < assigned + // If only one of the pods is unassigned, the unassigned one is smaller + if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) { + return len(s[i].Spec.NodeName) == 0 } - // PodPending < PodUnknown < PodRunning + // 2. PodPending < PodUnknown < PodRunning m := map[api.PodPhase]int{api.PodPending: 0, api.PodUnknown: 1, api.PodRunning: 2} if m[s[i].Status.Phase] != m[s[j].Status.Phase] { return m[s[i].Status.Phase] < m[s[j].Status.Phase] } - // Not ready < ready - if !api.IsPodReady(s[i]) && api.IsPodReady(s[j]) { - return true + // 3. Not ready < ready + // If only one of the pods is not ready, the not ready one is smaller + if api.IsPodReady(s[i]) != api.IsPodReady(s[j]) { + return !api.IsPodReady(s[i]) + } + // TODO: take availability into account when we push minReadySeconds information from deployment into pods, + // see https://github.com/kubernetes/kubernetes/issues/22065 + // 4. Been ready for less time < more time + // If both pods are ready, the latest ready one is smaller + if api.IsPodReady(s[i]) && api.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) { + return podReadyTime(s[i]).After(podReadyTime(s[j]).Time) + } + // 5. Pods with containers with higher restart counts < lower restart counts + if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { + return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) + } + // 6. Newer pods < older pods + if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) { + return s[i].CreationTimestamp.After(s[j].CreationTimestamp.Time) } return false } +func podReadyTime(pod *api.Pod) unversioned.Time { + if api.IsPodReady(pod) { + for _, c := range pod.Status.Conditions { + // we only care about pod ready conditions + if c.Type == api.PodReady && c.Status == api.ConditionTrue { + return c.LastTransitionTime + } + } + } + return unversioned.Time{} +} + +func maxContainerRestarts(pod *api.Pod) int { + maxRestarts := 0 + for _, c := range pod.Status.ContainerStatuses { + maxRestarts = integer.IntMax(maxRestarts, c.RestartCount) + } + return maxRestarts +} + // FilterActivePods returns pods that have not terminated. func FilterActivePods(pods []api.Pod) []*api.Pod { var result []*api.Pod diff --git a/pkg/controller/controller_utils_test.go b/pkg/controller/controller_utils_test.go index 9ad8ded5c85..bac720313fb 100644 --- a/pkg/controller/controller_utils_test.go +++ b/pkg/controller/controller_utils_test.go @@ -245,7 +245,7 @@ func TestActivePodFiltering(t *testing.T) { } func TestSortingActivePods(t *testing.T) { - numPods := 5 + numPods := 8 // This rc is not needed by the test, only the newPodList to give the pods labels/a namespace. rc := newReplicationController(0) podList := newPodList(nil, numPods, api.PodRunning, rc) @@ -267,9 +267,29 @@ func TestSortingActivePods(t *testing.T) { pods[3].Spec.NodeName = "foo" pods[3].Status.Phase = api.PodRunning // pods[4] is running and ready. + now := unversioned.Now() pods[4].Spec.NodeName = "foo" pods[4].Status.Phase = api.PodRunning - pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}} + pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: now}} + pods[4].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}} + // pods[5] is running ready for a longer time than pods[4]. + then := unversioned.Time{Time: now.AddDate(0, -1, 0)} + pods[5].Spec.NodeName = "foo" + pods[5].Status.Phase = api.PodRunning + pods[5].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} + pods[5].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}} + // pods[6] has lower container restart count than pods[5]. + pods[6].Spec.NodeName = "foo" + pods[6].Status.Phase = api.PodRunning + pods[6].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} + pods[6].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}} + pods[6].CreationTimestamp = now + // pods[7] is older than pods[6]. + pods[7].Spec.NodeName = "foo" + pods[7].Status.Phase = api.PodRunning + pods[7].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} + pods[7].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}} + pods[7].CreationTimestamp = then getOrder := func(pods []*api.Pod) []string { names := make([]string, len(pods)) From 9185c05a919106be011308e488a2088ad449a8ce Mon Sep 17 00:00:00 2001 From: Janet Kuo Date: Fri, 26 Feb 2016 15:32:49 -0800 Subject: [PATCH 2/2] When comparing timestamp of pods, kill pods with zero timestamps first --- pkg/controller/controller_utils.go | 17 ++++++++++++---- pkg/controller/controller_utils_test.go | 27 +++++++++++++++---------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 371835ebdad..8a60fe35361 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -427,22 +427,31 @@ func (s ActivePods) Less(i, j int) bool { } // TODO: take availability into account when we push minReadySeconds information from deployment into pods, // see https://github.com/kubernetes/kubernetes/issues/22065 - // 4. Been ready for less time < more time + // 4. Been ready for empty time < less time < more time // If both pods are ready, the latest ready one is smaller if api.IsPodReady(s[i]) && api.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) { - return podReadyTime(s[i]).After(podReadyTime(s[j]).Time) + return afterOrZero(podReadyTime(s[i]), podReadyTime(s[j])) } // 5. Pods with containers with higher restart counts < lower restart counts if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) } - // 6. Newer pods < older pods + // 6. Empty creation time pods < newer pods < older pods if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) { - return s[i].CreationTimestamp.After(s[j].CreationTimestamp.Time) + return afterOrZero(s[i].CreationTimestamp, s[j].CreationTimestamp) } return false } +// afterOrZero checks if time t1 is after time t2; if one of them +// is zero, the zero time is seen as after non-zero time. +func afterOrZero(t1, t2 unversioned.Time) bool { + if t1.Time.IsZero() || t2.Time.IsZero() { + return t1.Time.IsZero() + } + return t1.After(t2.Time) +} + func podReadyTime(pod *api.Pod) unversioned.Time { if api.IsPodReady(pod) { for _, c := range pod.Status.Conditions { diff --git a/pkg/controller/controller_utils_test.go b/pkg/controller/controller_utils_test.go index bac720313fb..7799985d9d7 100644 --- a/pkg/controller/controller_utils_test.go +++ b/pkg/controller/controller_utils_test.go @@ -245,7 +245,7 @@ func TestActivePodFiltering(t *testing.T) { } func TestSortingActivePods(t *testing.T) { - numPods := 8 + numPods := 9 // This rc is not needed by the test, only the newPodList to give the pods labels/a namespace. rc := newReplicationController(0) podList := newPodList(nil, numPods, api.PodRunning, rc) @@ -266,30 +266,35 @@ func TestSortingActivePods(t *testing.T) { // pods[3] is running but not ready. pods[3].Spec.NodeName = "foo" pods[3].Status.Phase = api.PodRunning - // pods[4] is running and ready. + // pods[4] is running and ready but without LastTransitionTime. now := unversioned.Now() pods[4].Spec.NodeName = "foo" pods[4].Status.Phase = api.PodRunning - pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: now}} + pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}} pods[4].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}} - // pods[5] is running ready for a longer time than pods[4]. - then := unversioned.Time{Time: now.AddDate(0, -1, 0)} + // pods[5] is running and ready and with LastTransitionTime. pods[5].Spec.NodeName = "foo" pods[5].Status.Phase = api.PodRunning - pods[5].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} + pods[5].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: now}} pods[5].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}} - // pods[6] has lower container restart count than pods[5]. + // pods[6] is running ready for a longer time than pods[5]. + then := unversioned.Time{Time: now.AddDate(0, -1, 0)} pods[6].Spec.NodeName = "foo" pods[6].Status.Phase = api.PodRunning pods[6].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} - pods[6].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}} - pods[6].CreationTimestamp = now - // pods[7] is older than pods[6]. + pods[6].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 3}, {RestartCount: 0}} + // pods[7] has lower container restart count than pods[6]. pods[7].Spec.NodeName = "foo" pods[7].Status.Phase = api.PodRunning pods[7].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} pods[7].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}} - pods[7].CreationTimestamp = then + pods[7].CreationTimestamp = now + // pods[8] is older than pods[7]. + pods[8].Spec.NodeName = "foo" + pods[8].Status.Phase = api.PodRunning + pods[8].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue, LastTransitionTime: then}} + pods[8].Status.ContainerStatuses = []api.ContainerStatus{{RestartCount: 2}, {RestartCount: 1}} + pods[8].CreationTimestamp = then getOrder := func(pods []*api.Pod) []string { names := make([]string, len(pods))