From 3833c0c349b53f08b4e063065d848854837b743c Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 13 Jan 2023 12:08:52 +0100 Subject: [PATCH] PodGC should not add DisruptionTarget condition for pods which are in terminal phase --- pkg/controller/podgc/gc_controller.go | 47 +++++----------------- pkg/controller/podgc/gc_controller_test.go | 5 ++- test/integration/podgc/podgc_test.go | 35 +++++++++++++--- 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index f6efaf7aed3..c863749ced8 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -329,32 +329,20 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(pod.Namespace, pod.Name)) if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - // Extact the pod status as PodGC may or may not own the pod phase, if - // it owns the phase then we need to send the field back if the condition - // is added. - podApply, err := corev1apply.ExtractPodStatus(pod, fieldManager) - if err != nil { - return nil - } - - // Set the status in case PodGC does not own any status fields yet - if podApply.Status == nil { - podApply.WithStatus(corev1apply.PodStatus()) - } - - updated := false - if condition != nil { - updatePodCondition(podApply.Status, condition) - updated = true - } // Mark the pod as failed - this is especially important in case the pod // is orphaned, in which case the pod would remain in the Running phase // forever as there is no kubelet running to change the phase. if pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed { + podApply := corev1apply.Pod(pod.Name, pod.Namespace).WithStatus(corev1apply.PodStatus()) + // we don't need to extract the pod apply configuration and can send + // only phase and the DisruptionTarget condition as PodGC would not + // own other fields. If the DisruptionTarget condition is owned by + // PodGC it means that it is in the Failed phase, so sending the + // condition will not be re-attempted. podApply.Status.WithPhase(v1.PodFailed) - updated = true - } - if updated { + if condition != nil { + podApply.Status.WithConditions(condition) + } if _, err := gcc.kubeClient.CoreV1().Pods(pod.Namespace).ApplyStatus(ctx, podApply, metav1.ApplyOptions{FieldManager: fieldManager, Force: true}); err != nil { return err } @@ -362,20 +350,3 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont } return gcc.kubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) } - -func updatePodCondition(podStatusApply *corev1apply.PodStatusApplyConfiguration, condition *corev1apply.PodConditionApplyConfiguration) { - if conditionIndex, _ := findPodConditionApplyByType(podStatusApply.Conditions, *condition.Type); conditionIndex < 0 { - podStatusApply.WithConditions(condition) - } else { - podStatusApply.Conditions[conditionIndex] = *condition - } -} - -func findPodConditionApplyByType(conditionApplyList []corev1apply.PodConditionApplyConfiguration, cType v1.PodConditionType) (int, *corev1apply.PodConditionApplyConfiguration) { - for index, conditionApply := range conditionApplyList { - if *conditionApply.Type == cType { - return index, &conditionApply - } - } - return -1, nil -} diff --git a/pkg/controller/podgc/gc_controller_test.go b/pkg/controller/podgc/gc_controller_test.go index 1dddd4f1588..e021a87fffe 100644 --- a/pkg/controller/podgc/gc_controller_test.go +++ b/pkg/controller/podgc/gc_controller_test.go @@ -239,10 +239,11 @@ func TestGCOrphaned(t *testing.T) { pods: []*v1.Pod{ makePod("a", "deleted", v1.PodFailed), makePod("b", "deleted", v1.PodSucceeded), + makePod("c", "deleted", v1.PodRunning), }, itemsInQueue: 1, - deletedPodNames: sets.NewString("a", "b"), - patchedPodNames: sets.NewString("a", "b"), + deletedPodNames: sets.NewString("a", "b", "c"), + patchedPodNames: sets.NewString("c"), enablePodDisruptionConditions: true, }, { diff --git a/test/integration/podgc/podgc_test.go b/test/integration/podgc/podgc_test.go index e08a96a4400..c3e5d15ecf1 100644 --- a/test/integration/podgc/podgc_test.go +++ b/test/integration/podgc/podgc_test.go @@ -20,6 +20,8 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -39,16 +41,36 @@ import ( func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { tests := map[string]struct { enablePodDisruptionConditions bool + phase v1.PodPhase wantPhase v1.PodPhase + wantDisruptionTarget *v1.PodCondition }{ "PodDisruptionConditions enabled": { enablePodDisruptionConditions: true, + phase: v1.PodPending, wantPhase: v1.PodFailed, + wantDisruptionTarget: &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: "DeletionByPodGC", + Message: "PodGC: node no longer exists", + }, }, "PodDisruptionConditions disabled": { enablePodDisruptionConditions: false, + phase: v1.PodPending, wantPhase: v1.PodPending, }, + "PodDisruptionConditions enabled; succeeded pod": { + enablePodDisruptionConditions: true, + phase: v1.PodSucceeded, + wantPhase: v1.PodSucceeded, + }, + "PodDisruptionConditions enabled; failed pod": { + enablePodDisruptionConditions: true, + phase: v1.PodFailed, + wantPhase: v1.PodFailed, + }, } for name, test := range tests { @@ -97,6 +119,11 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { } defer testutils.RemovePodFinalizers(testCtx.ClientSet, t, []*v1.Pod{pod}) + pod.Status.Phase = test.phase + if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(testCtx.Ctx, pod, metav1.UpdateOptions{}); err != nil { + t.Fatalf("Error %v, while setting phase %v for pod: %v", err, test.phase, klog.KObj(pod)) + } + // we delete the node to orphan the pod err = cs.CoreV1().Nodes().Delete(testCtx.Ctx, pod.Spec.NodeName, metav1.DeleteOptions{}) if err != nil { @@ -110,11 +137,9 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { if err != nil { t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod)) } - _, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) - if test.enablePodDisruptionConditions == true && cond == nil { - t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget) - } else if test.enablePodDisruptionConditions == false && cond != nil { - t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(pod), v1.DisruptionTarget) + _, gotDisruptionTarget := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) + if diff := cmp.Diff(test.wantDisruptionTarget, gotDisruptionTarget, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" { + t.Errorf("Pod %v has unexpected DisruptionTarget condition: %s", klog.KObj(pod), diff) } if pod.Status.Phase != test.wantPhase { t.Errorf("Unexpected phase for pod %q. Got: %q, want: %q", klog.KObj(pod), pod.Status.Phase, test.wantPhase)