From 850bc09e9b78e49ec2459515379ae042a142e5c9 Mon Sep 17 00:00:00 2001 From: carlory Date: Wed, 10 Jul 2024 16:16:11 +0800 Subject: [PATCH] clean up codes after PodDisruptionConditions was promoted to GA and locked to default --- pkg/controller/job/job_controller.go | 5 ++- pkg/controller/podgc/gc_controller.go | 31 +++++++------------ .../tainteviction/taint_eviction.go | 30 ++++++++---------- pkg/kubelet/eviction/eviction_manager.go | 13 +++----- pkg/kubelet/kubelet_pods.go | 16 +++++----- .../nodeshutdown_manager_linux.go | 14 ++++----- pkg/kubelet/preemption/preemption.go | 16 ++++------ pkg/kubelet/status/status_manager.go | 6 ++-- pkg/kubelet/types/pod_status.go | 7 +---- pkg/registry/core/pod/storage/eviction.go | 4 +-- .../defaultpreemption/default_preemption.go | 11 ++----- .../default_preemption_test.go | 1 - .../framework/plugins/feature/feature.go | 1 - pkg/scheduler/framework/plugins/registry.go | 1 - .../framework/preemption/preemption.go | 30 ++++++++---------- .../rbac/bootstrappolicy/controller_policy.go | 13 ++------ .../testdata/controller-roles.yaml | 21 +++++-------- test/integration/podgc/podgc_test.go | 19 ++---------- 18 files changed, 85 insertions(+), 154 deletions(-) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index dc694bd4772..ae2cb1ab975 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -1876,9 +1876,8 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio } func isPodFailed(p *v1.Pod, job *batch.Job) bool { - if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil { - // When PodDisruptionConditions is enabled, orphan Pods and unschedulable - // terminating Pods are marked as Failed. So we only need to check the phase. + if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil { + // Orphan Pods and unschedulable terminating Pods are marked as Failed. So we only need to check the phase. return p.Status.Phase == v1.PodFailed } if p.Status.Phase == v1.PodFailed { diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index a856af96dcb..8739bd2fd8e 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -29,7 +29,6 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" - utilfeature "k8s.io/apiserver/pkg/util/feature" coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" corelisters "k8s.io/client-go/listers/core/v1" @@ -38,7 +37,6 @@ import ( "k8s.io/klog/v2" apipod "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/controller/podgc/metrics" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/eviction" nodeutil "k8s.io/kubernetes/pkg/util/node" utilpod "k8s.io/kubernetes/pkg/util/pod" @@ -343,23 +341,18 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont logger := klog.FromContext(ctx) logger.Info("PodGC is force deleting Pod", "pod", klog.KObj(pod)) // Patch the pod to make sure it is transitioned to the Failed phase before deletion. - // This is needed for the JobPodReplacementPolicy feature to make sure Job replacement pods are created. - // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-apps/3939-allow-replacement-when-fully-terminated#risks-and-mitigations - // for more details. - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) || utilfeature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) { - - // Mark the pod as failed - this is especially important in case the pod - // is orphaned, in which case the pod would remain in the Running phase - // forever as there is no kubelet running to change the phase. - if pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed { - newStatus := pod.Status.DeepCopy() - newStatus.Phase = v1.PodFailed - if condition != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - apipod.UpdatePodCondition(newStatus, condition) - } - if _, _, _, err := utilpod.PatchPodStatus(ctx, gcc.kubeClient, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { - return err - } + // + // Mark the pod as failed - this is especially important in case the pod + // is orphaned, in which case the pod would remain in the Running phase + // forever as there is no kubelet running to change the phase. + if pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed { + newStatus := pod.Status.DeepCopy() + newStatus.Phase = v1.PodFailed + if condition != nil { + apipod.UpdatePodCondition(newStatus, condition) + } + if _, _, _, err := utilpod.PatchPodStatus(ctx, gcc.kubeClient, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { + return err } } return gcc.kubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) diff --git a/pkg/controller/tainteviction/taint_eviction.go b/pkg/controller/tainteviction/taint_eviction.go index 18a0bfdda7c..48ab6f0ec51 100644 --- a/pkg/controller/tainteviction/taint_eviction.go +++ b/pkg/controller/tainteviction/taint_eviction.go @@ -30,7 +30,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apiserver/pkg/util/feature" corev1informers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -45,7 +44,6 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/controller/tainteviction/metrics" controllerutil "k8s.io/kubernetes/pkg/controller/util/node" - "k8s.io/kubernetes/pkg/features" utilpod "k8s.io/kubernetes/pkg/util/pod" ) @@ -129,23 +127,21 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced } func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, name, ns string) (err error) { - if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - pod, err := c.CoreV1().Pods(ns).Get(ctx, name, metav1.GetOptions{}) - if err != nil { + pod, err := c.CoreV1().Pods(ns).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } + newStatus := pod.Status.DeepCopy() + updated := apipod.UpdatePodCondition(newStatus, &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: "DeletionByTaintManager", + Message: "Taint manager: deleting due to NoExecute taint", + }) + if updated { + if _, _, _, err := utilpod.PatchPodStatus(ctx, c, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { return err } - newStatus := pod.Status.DeepCopy() - updated := apipod.UpdatePodCondition(newStatus, &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: "DeletionByTaintManager", - Message: "Taint manager: deleting due to NoExecute taint", - }) - if updated { - if _, _, _, err := utilpod.PatchPodStatus(ctx, c, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { - return err - } - } } return c.CoreV1().Pods(ns).Delete(ctx, name, metav1.DeleteOptions{}) } diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 3aa0023b837..ed946a0e056 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -411,14 +411,11 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act gracePeriodOverride = m.config.MaxPodGracePeriodSeconds } message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc, thresholds, observations) - var condition *v1.PodCondition - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - condition = &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: v1.PodReasonTerminationByKubelet, - Message: message, - } + condition := &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: v1.PodReasonTerminationByKubelet, + Message: message, } if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) { metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc() diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 5e56018da1a..e8da6206ec4 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1835,15 +1835,13 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po } } - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - // copy over the pod disruption conditions from state which is already - // updated during the eviciton (due to either node resource pressure or - // node graceful shutdown). We do not re-generate the conditions based - // on the container statuses as they are added based on one-time events. - cType := v1.DisruptionTarget - if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil { - s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition) - } + // copy over the pod disruption conditions from state which is already + // updated during the eviciton (due to either node resource pressure or + // node graceful shutdown). We do not re-generate the conditions based + // on the container statuses as they are added based on one-time events. + cType := v1.DisruptionTarget + if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil { + s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition) } // set all Kubelet-owned conditions diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go index 2eaf0f82dc1..a5fc6f9583f 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go @@ -381,14 +381,12 @@ func (m *managerImpl) processShutdownEvent() error { } status.Message = nodeShutdownMessage status.Reason = nodeShutdownReason - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - podutil.UpdatePodCondition(status, &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: v1.PodReasonTerminationByKubelet, - Message: nodeShutdownMessage, - }) - } + podutil.UpdatePodCondition(status, &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: v1.PodReasonTerminationByKubelet, + Message: nodeShutdownMessage, + }) }); err != nil { m.logger.V(1).Info("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err) } else { diff --git a/pkg/kubelet/preemption/preemption.go b/pkg/kubelet/preemption/preemption.go index e4d0cbd931b..bfe2cb84507 100644 --- a/pkg/kubelet/preemption/preemption.go +++ b/pkg/kubelet/preemption/preemption.go @@ -21,13 +21,11 @@ import ( "math" v1 "k8s.io/api/core/v1" - utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/api/v1/resource" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/kubelet/lifecycle" @@ -106,14 +104,12 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, status.Phase = v1.PodFailed status.Reason = events.PreemptContainer status.Message = message - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - podutil.UpdatePodCondition(status, &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: v1.PodReasonTerminationByKubelet, - Message: "Pod was preempted by Kubelet to accommodate a critical pod.", - }) - } + podutil.UpdatePodCondition(status, &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: v1.PodReasonTerminationByKubelet, + Message: "Pod was preempted by Kubelet to accommodate a critical pod.", + }) }) if err != nil { klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod)) diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index cc60a56718a..5b6e7ea5a7a 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -639,10 +639,8 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp // Set PodScheduledCondition.LastTransitionTime. updateLastTransitionTime(&status, &oldStatus, v1.PodScheduled) - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - // Set DisruptionTarget.LastTransitionTime. - updateLastTransitionTime(&status, &oldStatus, v1.DisruptionTarget) - } + // Set DisruptionTarget.LastTransitionTime. + updateLastTransitionTime(&status, &oldStatus, v1.DisruptionTarget) // ensure that the start time does not change across updates. if oldStatus.StartTime != nil && !oldStatus.StartTime.IsZero() { diff --git a/pkg/kubelet/types/pod_status.go b/pkg/kubelet/types/pod_status.go index 4a54d0ce3fa..593e9ce4ec6 100644 --- a/pkg/kubelet/types/pod_status.go +++ b/pkg/kubelet/types/pod_status.go @@ -47,10 +47,5 @@ func PodConditionByKubelet(conditionType v1.PodConditionType) bool { // PodConditionSharedByKubelet returns if the pod condition type is shared by kubelet func PodConditionSharedByKubelet(conditionType v1.PodConditionType) bool { - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - if conditionType == v1.DisruptionTarget { - return true - } - } - return false + return conditionType == v1.DisruptionTarget } diff --git a/pkg/registry/core/pod/storage/eviction.go b/pkg/registry/core/pod/storage/eviction.go index 4082c805c4f..411c7bb6e44 100644 --- a/pkg/registry/core/pod/storage/eviction.go +++ b/pkg/registry/core/pod/storage/eviction.go @@ -33,14 +33,12 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/registry/rest" "k8s.io/apiserver/pkg/util/dryrun" - "k8s.io/apiserver/pkg/util/feature" policyclient "k8s.io/client-go/kubernetes/typed/policy/v1" "k8s.io/client-go/util/retry" pdbhelper "k8s.io/component-helpers/apps/poddisruptionbudget" podutil "k8s.io/kubernetes/pkg/api/pod" api "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/policy" - "k8s.io/kubernetes/pkg/features" ) const ( @@ -307,7 +305,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje } func addConditionAndDeletePod(r *EvictionREST, ctx context.Context, name string, validation rest.ValidateObjectFunc, options *metav1.DeleteOptions) error { - if !dryrun.IsDryRun(options.DryRun) && feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { + if !dryrun.IsDryRun(options.DryRun) { getLatestPod := func(_ context.Context, _, oldObj runtime.Object) (runtime.Object, error) { // Throwaway the newObj. We care only about the latest pod obtained from etcd (oldObj). // So we can add DisruptionTarget condition in conditionAppender without conflicts. diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 4ead255aca6..847bc409a70 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -253,7 +253,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil { podPriority := corev1helpers.PodPriority(pod) for _, p := range nodeInfo.Pods { - if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod, pl.fts.EnablePodDisruptionConditions) { + if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod) { // There is a terminating pod on the nominated node. return false, "not eligible due to a terminating pod on the nominated node." } @@ -268,17 +268,12 @@ func (pl *DefaultPreemption) OrderedScoreFuncs(ctx context.Context, nodesToVicti return nil } -// podTerminatingByPreemption returns the pod's terminating state if feature PodDisruptionConditions is not enabled. -// Otherwise, it additionally checks if the termination state is caused by scheduler preemption. -func podTerminatingByPreemption(p *v1.Pod, enablePodDisruptionConditions bool) bool { +// podTerminatingByPreemption returns true if the pod is in the termination state caused by scheduler preemption. +func podTerminatingByPreemption(p *v1.Pod) bool { if p.DeletionTimestamp == nil { return false } - if !enablePodDisruptionConditions { - return true - } - for _, condition := range p.Status.Conditions { if condition.Type == v1.DisruptionTarget { return condition.Status == v1.ConditionTrue && condition.Reason == v1.PodReasonPreemptionByScheduler diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index 6842dbe6bf7..91c8d18772a 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -1466,7 +1466,6 @@ func TestPodEligibleToPreemptOthers(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - test.fts.EnablePodDisruptionConditions = true logger, ctx := ktesting.NewTestContext(t) ctx, cancel := context.WithCancel(ctx) defer cancel() diff --git a/pkg/scheduler/framework/plugins/feature/feature.go b/pkg/scheduler/framework/plugins/feature/feature.go index 77158c12e05..af28abfc901 100644 --- a/pkg/scheduler/framework/plugins/feature/feature.go +++ b/pkg/scheduler/framework/plugins/feature/feature.go @@ -24,7 +24,6 @@ type Features struct { EnableVolumeCapacityPriority bool EnableNodeInclusionPolicyInPodTopologySpread bool EnableMatchLabelKeysInPodTopologySpread bool - EnablePodDisruptionConditions bool EnableInPlacePodVerticalScaling bool EnableSidecarContainers bool EnableSchedulingQueueHint bool diff --git a/pkg/scheduler/framework/plugins/registry.go b/pkg/scheduler/framework/plugins/registry.go index 27df67b1e75..3edbdde5c46 100644 --- a/pkg/scheduler/framework/plugins/registry.go +++ b/pkg/scheduler/framework/plugins/registry.go @@ -50,7 +50,6 @@ func NewInTreeRegistry() runtime.Registry { EnableVolumeCapacityPriority: feature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority), EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread), EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread), - EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions), EnableInPlacePodVerticalScaling: feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling), EnableSidecarContainers: feature.DefaultFeatureGate.Enabled(features.SidecarContainers), EnableSchedulingQueueHint: feature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints), diff --git a/pkg/scheduler/framework/preemption/preemption.go b/pkg/scheduler/framework/preemption/preemption.go index 29864adb52f..4bd99bb2be2 100644 --- a/pkg/scheduler/framework/preemption/preemption.go +++ b/pkg/scheduler/framework/preemption/preemption.go @@ -28,14 +28,12 @@ import ( policy "k8s.io/api/policy/v1" "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/apiserver/pkg/util/feature" corelisters "k8s.io/client-go/listers/core/v1" policylisters "k8s.io/client-go/listers/policy/v1" corev1helpers "k8s.io/component-helpers/scheduling/corev1" "k8s.io/klog/v2" extenderv1 "k8s.io/kube-scheduler/extender/v1" apipod "k8s.io/kubernetes/pkg/api/v1/pod" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework/parallelize" "k8s.io/kubernetes/pkg/scheduler/metrics" @@ -362,21 +360,19 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1. waitingPod.Reject(pluginName, "preempted") logger.V(2).Info("Preemptor pod rejected a waiting pod", "preemptor", klog.KObj(pod), "waitingPod", klog.KObj(victim), "node", c.Name()) } else { - if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - condition := &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: v1.PodReasonPreemptionByScheduler, - Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", pod.Spec.SchedulerName), - } - newStatus := pod.Status.DeepCopy() - updated := apipod.UpdatePodCondition(newStatus, condition) - if updated { - if err := util.PatchPodStatus(ctx, cs, victim, newStatus); err != nil { - logger.Error(err, "Could not add DisruptionTarget condition due to preemption", "pod", klog.KObj(victim), "preemptor", klog.KObj(pod)) - errCh.SendErrorWithCancel(err, cancel) - return - } + condition := &v1.PodCondition{ + Type: v1.DisruptionTarget, + Status: v1.ConditionTrue, + Reason: v1.PodReasonPreemptionByScheduler, + Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", pod.Spec.SchedulerName), + } + newStatus := pod.Status.DeepCopy() + updated := apipod.UpdatePodCondition(newStatus, condition) + if updated { + if err := util.PatchPodStatus(ctx, cs, victim, newStatus); err != nil { + logger.Error(err, "Could not add DisruptionTarget condition due to preemption", "pod", klog.KObj(victim), "preemptor", klog.KObj(pod)) + errCh.SendErrorWithCancel(err, cancel) + return } } if err := util.DeletePod(ctx, cs, victim); err != nil { diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index b39ac933193..c2c6ac442f4 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -135,13 +135,11 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) rbacv1helpers.NewRule("get", "list", "watch").Groups(policyGroup).Resources("poddisruptionbudgets").RuleOrDie(), rbacv1helpers.NewRule("get", "list", "watch").Groups(appsGroup).Resources("statefulsets").RuleOrDie(), rbacv1helpers.NewRule("update").Groups(policyGroup).Resources("poddisruptionbudgets/status").RuleOrDie(), + rbacv1helpers.NewRule("patch", "update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), rbacv1helpers.NewRule("get").Groups("*").Resources("*/scale").RuleOrDie(), eventsRule(), }, } - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - role.Rules = append(role.Rules, rbacv1helpers.NewRule("patch", "update").Groups(legacyGroup).Resources("pods/status").RuleOrDie()) - } return role }()) addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ @@ -269,13 +267,10 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) rbacv1helpers.NewRule("patch", "update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(), // used for pod deletion rbacv1helpers.NewRule("patch", "update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), - rbacv1helpers.NewRule("list", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), + rbacv1helpers.NewRule("list", "get", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), eventsRule(), }, } - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - role.Rules = append(role.Rules, rbacv1helpers.NewRule("get").Groups(legacyGroup).Resources("pods").RuleOrDie()) - } return role }()) addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ @@ -307,11 +302,9 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) Rules: []rbacv1.PolicyRule{ rbacv1helpers.NewRule("list", "watch", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), rbacv1helpers.NewRule("get", "list").Groups(legacyGroup).Resources("nodes").RuleOrDie(), + rbacv1helpers.NewRule("patch").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), }, } - if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) { - role.Rules = append(role.Rules, rbacv1helpers.NewRule("patch").Groups(legacyGroup).Resources("pods/status").RuleOrDie()) - } return role }()) addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml index f17fc954f88..72471fad12e 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml @@ -415,6 +415,13 @@ items: - poddisruptionbudgets/status verbs: - update + - apiGroups: + - "" + resources: + - pods/status + verbs: + - patch + - update - apiGroups: - '*' resources: @@ -430,13 +437,6 @@ items: - create - patch - update - - apiGroups: - - "" - resources: - - pods/status - verbs: - - patch - - update - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -930,6 +930,7 @@ items: - pods verbs: - delete + - get - list - apiGroups: - "" @@ -940,12 +941,6 @@ items: - create - patch - update - - apiGroups: - - "" - resources: - - pods - verbs: - - get - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/test/integration/podgc/podgc_test.go b/test/integration/podgc/podgc_test.go index c6f64066d30..3175e2ddc17 100644 --- a/test/integration/podgc/podgc_test.go +++ b/test/integration/podgc/podgc_test.go @@ -40,10 +40,9 @@ import ( // TestPodGcOrphanedPodsWithFinalizer tests deletion of orphaned pods func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { tests := map[string]struct { - enableJobPodReplacementPolicy bool - phase v1.PodPhase - wantPhase v1.PodPhase - wantDisruptionTarget *v1.PodCondition + phase v1.PodPhase + wantPhase v1.PodPhase + wantDisruptionTarget *v1.PodCondition }{ "pending pod": { phase: v1.PodPending, @@ -55,17 +54,6 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { Message: "PodGC: node no longer exists", }, }, - "pending pod; PodReplacementPolicy enabled": { - enableJobPodReplacementPolicy: true, - phase: v1.PodPending, - wantPhase: v1.PodFailed, - wantDisruptionTarget: &v1.PodCondition{ - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - Reason: "DeletionByPodGC", - Message: "PodGC: node no longer exists", - }, - }, "succeeded pod": { phase: v1.PodSucceeded, wantPhase: v1.PodSucceeded, @@ -78,7 +66,6 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy) testCtx := setup(t, "podgc-orphaned") cs := testCtx.ClientSet