diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index c863749ced8..2b16017e5a6 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -38,6 +38,7 @@ import ( "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/eviction" nodeutil "k8s.io/kubernetes/pkg/util/node" "k8s.io/kubernetes/pkg/util/taints" ) @@ -172,7 +173,7 @@ func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) { klog.V(4).Infof("Garbage collecting %v pods that are terminating on node tainted with node.kubernetes.io/out-of-service", deleteCount) // sort only when necessary - sort.Sort(byCreationTimestamp(terminatingPods)) + sort.Sort(byEvictionAndCreationTimestamp(terminatingPods)) var wait sync.WaitGroup for i := 0; i < deleteCount; i++ { wait.Add(1) @@ -206,7 +207,7 @@ func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) { klog.InfoS("Garbage collecting pods", "numPods", deleteCount) // sort only when necessary - sort.Sort(byCreationTimestamp(terminatedPods)) + sort.Sort(byEvictionAndCreationTimestamp(terminatedPods)) var wait sync.WaitGroup for i := 0; i < deleteCount; i++ { wait.Add(1) @@ -308,13 +309,20 @@ func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods [ } } -// byCreationTimestamp sorts a list by creation timestamp, using their names as a tie breaker. -type byCreationTimestamp []*v1.Pod +// byEvictionAndCreationTimestamp sorts a list by Evicted status and then creation timestamp, +// using their names as a tie breaker. +// Evicted pods will be deleted first to avoid impact on terminated pods created by controllers. +type byEvictionAndCreationTimestamp []*v1.Pod -func (o byCreationTimestamp) Len() int { return len(o) } -func (o byCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o byEvictionAndCreationTimestamp) Len() int { return len(o) } +func (o byEvictionAndCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } -func (o byCreationTimestamp) Less(i, j int) bool { +func (o byEvictionAndCreationTimestamp) Less(i, j int) bool { + iEvicted, jEvicted := eviction.PodIsEvicted(o[i].Status), eviction.PodIsEvicted(o[j].Status) + // Evicted pod is smaller + if iEvicted != jEvicted { + return iEvicted + } if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { return o[i].Name < o[j].Name } diff --git a/pkg/controller/podgc/gc_controller_test.go b/pkg/controller/podgc/gc_controller_test.go index e021a87fffe..d8802d53732 100644 --- a/pkg/controller/podgc/gc_controller_test.go +++ b/pkg/controller/podgc/gc_controller_test.go @@ -39,6 +39,7 @@ import ( "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/testutil" "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/eviction" testingclock "k8s.io/utils/clock/testing" ) @@ -55,8 +56,9 @@ func NewFromClient(kubeClient clientset.Interface, terminatedPodThreshold int) ( func TestGCTerminated(t *testing.T) { type nameToPhase struct { - name string - phase v1.PodPhase + name string + phase v1.PodPhase + reason string } testCases := []struct { @@ -127,6 +129,24 @@ func TestGCTerminated(t *testing.T) { threshold: 5, deletedPodNames: sets.NewString(), }, + { + pods: []nameToPhase{ + {name: "a", phase: v1.PodFailed}, + {name: "b", phase: v1.PodSucceeded}, + {name: "c", phase: v1.PodFailed, reason: eviction.Reason}, + }, + threshold: 1, + deletedPodNames: sets.NewString("c", "a"), + }, + { + pods: []nameToPhase{ + {name: "a", phase: v1.PodRunning}, + {name: "b", phase: v1.PodSucceeded}, + {name: "c", phase: v1.PodFailed, reason: eviction.Reason}, + }, + threshold: 1, + deletedPodNames: sets.NewString("c"), + }, } for _, test := range testCases { @@ -140,7 +160,7 @@ func TestGCTerminated(t *testing.T) { creationTime = creationTime.Add(1 * time.Hour) pods = append(pods, &v1.Pod{ ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime}}, - Status: v1.PodStatus{Phase: pod.phase}, + Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason}, Spec: v1.PodSpec{NodeName: "node"}, }) }