Merge pull request #56864 from juanvallejo/jvallejo/add-selector-kubectl-drain

Automatic merge from submit-queue (batch tested with PRs 55751, 57337, 56406, 56864, 57347). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Add pod-selector kubectl drain **Release note**: ```release-note Added the ability to select pods in a chosen node to be drained, based on given pod label-selector ``` This patch adds the ability to select pods in a chosen node to be drained, based on given pod label-selector. Related downstream issue: https://github.com/openshift/origin/issues/17554 Further, it removes explicit, specific, pod-controller check. The `drain` command currently fails if a pod has a controller of a `kind` [not explicitly handled in the command itself](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/cmd/drain.go#L331). This causes `drain` to be unusable if a node contains pods managed by third-party, or "unknown" controllers. Based on [this comment](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/cmd/drain.go#L353), the expectation was to fail if a pod's controller was not found for whatever reason. I believe that the `drain` command should not care about the existence of a pod controller. It should only care whether a pod has one, and act according to that controller kind. This solves a downstream bug: https://github.com/openshift/origin/issues/17563 cc @fabianofranz @deads2k @kubernetes/sig-cli-misc
2025-09-10 05:30:26 +00:00 · 2017-12-18 18:50:45 -08:00
parent 58ccf1e84e 2f1108451f
commit 3ef674188d
2 changed files with 91 additions and 52 deletions
--- a/hack/make-rules/test-cmd-util.sh
+++ b/hack/make-rules/test-cmd-util.sh
@@ -4300,6 +4300,51 @@ run_cluster_management_tests() {

  kube::test::get_object_assert nodes "{{range.items}}{{$id_field}}:{{end}}" '127.0.0.1:'

+  # create test pods we can work with
+  kubectl create -f - "${kube_flags[@]}" << __EOF__
+{
+  "kind": "Pod",
+  "apiVersion": "v1",
+  "metadata": {
+    "name": "test-pod-1",
+    "labels": {
+      "e": "f"
+    }
+  },
+  "spec": {
+    "containers": [
+      {
+        "name": "container-1",
+        "resources": {},
+        "image": "test-image"
+      }
+    ]
+  }
+}
+__EOF__
+
+  kubectl create -f - "${kube_flags[@]}" << __EOF__
+{
+  "kind": "Pod",
+  "apiVersion": "v1",
+  "metadata": {
+    "name": "test-pod-2",
+    "labels": {
+      "c": "d"
+    }
+  },
+  "spec": {
+    "containers": [
+      {
+        "name": "container-1",
+        "resources": {},
+        "image": "test-image"
+      }
+    ]
+  }
+}
+__EOF__
+
  ### kubectl cordon update with --dry-run does not mark node unschedulable
  # Pre-condition: node is schedulable
  kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.unschedulable}}" '<no value>'
@@ -4314,6 +4359,20 @@ run_cluster_management_tests() {
  kube::test::get_object_assert nodes "{{range.items}}{{$id_field}}:{{end}}" '127.0.0.1:'
  kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.unschedulable}}" '<no value>'

+  ### kubectl drain with --pod-selector only evicts pods that match the given selector
+  # Pre-condition: node is schedulable
+  kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.unschedulable}}" '<no value>'
+  # Pre-condition: test-pod-1 and test-pod-2 exist
+  kube::test::get_object_assert "pods" "{{range .items}}{{.metadata.name}},{{end}}" 'test-pod-1,test-pod-2,'
+  kubectl drain "127.0.0.1" --pod-selector 'e in (f)'
+  # only "test-pod-1" should have been matched and deleted - test-pod-2 should still exist
+  kube::test::get_object_assert "pods/test-pod-2" "{{.metadata.name}}" 'test-pod-2'
+  # delete pod no longer in use
+  kubectl delete pod/test-pod-2
+  # Post-condition: node is schedulable
+  kubectl uncordon "127.0.0.1"
+  kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.unschedulable}}" '<no value>'
+
  ### kubectl uncordon update with --dry-run is a no-op
  # Pre-condition: node is already schedulable
  kube::test::get_object_assert "nodes 127.0.0.1" "{{.spec.unschedulable}}" '<no value>'
--- a/pkg/kubectl/cmd/drain.go
+++ b/pkg/kubectl/cmd/drain.go
@@ -33,6 +33,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/json"
@@ -61,6 +62,7 @@ type DrainOptions struct {
 	backOff            clockwork.Clock
 	DeleteLocalData    bool
 	Selector           string
+	PodSelector        string
 	mapper             meta.RESTMapper
 	nodeInfos          []*resource.Info
 	Out                io.Writer
@@ -197,6 +199,8 @@ func NewCmdDrain(f cmdutil.Factory, out, errOut io.Writer) *cobra.Command {
 	cmd.Flags().IntVar(&options.GracePeriodSeconds, "grace-period", -1, "Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used.")
 	cmd.Flags().DurationVar(&options.Timeout, "timeout", 0, "The length of time to wait before giving up, zero means infinite")
 	cmd.Flags().StringVarP(&options.Selector, "selector", "l", options.Selector, "Selector (label query) to filter on")
+	cmd.Flags().StringVarP(&options.PodSelector, "pod-selector", "", options.PodSelector, "Label selector to filter pods on the node")
+
 	cmdutil.AddDryRunFlag(cmd)
 	return cmd
 }
@@ -223,6 +227,12 @@ func (o *DrainOptions) SetupDrain(cmd *cobra.Command, args []string) error {
 		return err
 	}

+	if len(o.PodSelector) > 0 {
+		if _, err := labels.Parse(o.PodSelector); err != nil {
+			return errors.New("--pod-selector=<pod_selector> must be a valid label selector")
+		}
+	}
+
 	o.restClient, err = o.Factory.RESTClient()
 	if err != nil {
 		return err
@@ -328,38 +338,8 @@ func (o *DrainOptions) deleteOrEvictPodsSimple(nodeInfo *resource.Info) error {
 	return err
 }

-func (o *DrainOptions) getController(namespace string, controllerRef *metav1.OwnerReference) (interface{}, error) {
-	switch controllerRef.Kind {
-	case "ReplicationController":
-		return o.client.CoreV1().ReplicationControllers(namespace).Get(controllerRef.Name, metav1.GetOptions{})
-	case "DaemonSet":
-		return o.client.ExtensionsV1beta1().DaemonSets(namespace).Get(controllerRef.Name, metav1.GetOptions{})
-	case "Job":
-		return o.client.BatchV1().Jobs(namespace).Get(controllerRef.Name, metav1.GetOptions{})
-	case "ReplicaSet":
-		return o.client.ExtensionsV1beta1().ReplicaSets(namespace).Get(controllerRef.Name, metav1.GetOptions{})
-	case "StatefulSet":
-		return o.client.AppsV1beta1().StatefulSets(namespace).Get(controllerRef.Name, metav1.GetOptions{})
-	}
-	return nil, fmt.Errorf("Unknown controller kind %q", controllerRef.Kind)
-}
-
-func (o *DrainOptions) getPodController(pod corev1.Pod) (*metav1.OwnerReference, error) {
-	controllerRef := metav1.GetControllerOf(&pod)
-	if controllerRef == nil {
-		return nil, nil
-	}
-
-	// We assume the only reason for an error is because the controller is
-	// gone/missing, not for any other cause.
-	// TODO(mml): something more sophisticated than this
-	// TODO(juntee): determine if it's safe to remove getController(),
-	// so that drain can work for controller types that we don't know about
-	_, err := o.getController(pod.Namespace, controllerRef)
-	if err != nil {
-		return nil, err
-	}
-	return controllerRef, nil
+func (o *DrainOptions) getPodController(pod corev1.Pod) *metav1.OwnerReference {
+	return metav1.GetControllerOf(&pod)
 }

 func (o *DrainOptions) unreplicatedFilter(pod corev1.Pod) (bool, *warning, *fatal) {
@@ -368,21 +348,15 @@ func (o *DrainOptions) unreplicatedFilter(pod corev1.Pod) (bool, *warning, *fata
 		return true, nil, nil
 	}

-	controllerRef, err := o.getPodController(pod)
-	if err != nil {
-		// if we're forcing, remove orphaned pods with a warning
-		if apierrors.IsNotFound(err) && o.Force {
-			return true, &warning{err.Error()}, nil
-		}
-		return false, nil, &fatal{err.Error()}
-	}
+	controllerRef := o.getPodController(pod)
 	if controllerRef != nil {
 		return true, nil, nil
 	}
-	if !o.Force {
-		return false, nil, &fatal{kUnmanagedFatal}
+	if o.Force {
+		return true, &warning{kUnmanagedWarning}, nil
 	}
-	return true, &warning{kUnmanagedWarning}, nil
+
+	return false, nil, &fatal{kUnmanagedFatal}
 }

 func (o *DrainOptions) daemonsetFilter(pod corev1.Pod) (bool, *warning, *fatal) {
@@ -393,23 +367,23 @@ func (o *DrainOptions) daemonsetFilter(pod corev1.Pod) (bool, *warning, *fatal)
 	// The exception is for pods that are orphaned (the referencing
 	// management resource - including DaemonSet - is not found).
 	// Such pods will be deleted if --force is used.
-	controllerRef, err := o.getPodController(pod)
-	if err != nil {
-		// if we're forcing, remove orphaned pods with a warning
+	controllerRef := o.getPodController(pod)
+	if controllerRef == nil || controllerRef.Kind != "DaemonSet" {
+		return true, nil, nil
+	}
+
+	if _, err := o.client.ExtensionsV1beta1().DaemonSets(pod.Namespace).Get(controllerRef.Name, metav1.GetOptions{}); err != nil {
+		// remove orphaned pods with a warning if --force is used
 		if apierrors.IsNotFound(err) && o.Force {
 			return true, &warning{err.Error()}, nil
 		}
 		return false, nil, &fatal{err.Error()}
 	}
-	if controllerRef == nil || controllerRef.Kind != "DaemonSet" {
-		return true, nil, nil
-	}
-	if _, err := o.client.ExtensionsV1beta1().DaemonSets(pod.Namespace).Get(controllerRef.Name, metav1.GetOptions{}); err != nil {
-		return false, nil, &fatal{err.Error()}
-	}
+
 	if !o.IgnoreDaemonsets {
 		return false, nil, &fatal{kDaemonsetFatal}
 	}
+
 	return false, &warning{kDaemonsetWarning}, nil
 }

@@ -455,7 +429,13 @@ func (ps podStatuses) Message() string {
 // getPodsForDeletion receives resource info for a node, and returns all the pods from the given node that we
 // are planning on deleting. If there are any pods preventing us from deleting, we return that list in an error.
 func (o *DrainOptions) getPodsForDeletion(nodeInfo *resource.Info) (pods []corev1.Pod, err error) {
+	labelSelector, err := labels.Parse(o.PodSelector)
+	if err != nil {
+		return pods, err
+	}
+
 	podList, err := o.client.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{
+		LabelSelector: labelSelector.String(),
 		FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeInfo.Name}).String()})
 	if err != nil {
 		return pods, err