diff --git a/contrib/completions/bash/kubectl b/contrib/completions/bash/kubectl index 8be1c80483a..6902e6e3c40 100644 --- a/contrib/completions/bash/kubectl +++ b/contrib/completions/bash/kubectl @@ -1204,6 +1204,7 @@ _kubectl_drain() flags+=("--force") flags+=("--grace-period=") + flags+=("--ignore-daemonsets") flags+=("--alsologtostderr") flags+=("--api-version=") flags+=("--certificate-authority=") diff --git a/docs/man/man1/kubectl-drain.1 b/docs/man/man1/kubectl-drain.1 index 512969f2e2f..d8514b45278 100644 --- a/docs/man/man1/kubectl-drain.1 +++ b/docs/man/man1/kubectl-drain.1 @@ -18,9 +18,12 @@ Drain node in preparation for maintenance. .PP The given node will be marked unschedulable to prevent new pods from arriving. Then drain deletes all pods except mirror pods (which cannot be deleted through -the API server). If there are any pods that are neither mirror pods nor -managed by a ReplicationController, Job, or DaemonSet, then drain will not -delete any pods unless you use \-\-force. +the API server). If there are DaemonSet\-managed pods, drain will not proceed +without \-\-ignore\-daemonsets, and regardless it will not delete any +DaemonSet\-managed pods, because those pods would be immediately replaced by the +DaemonSet controller, which ignores unschedulable marknigs. If there are any +pods that are neither mirror pods nor managed\-\-by ReplicationController, +DaemonSet or Job\-\-, then drain will not delete any pods unless you use \-\-force. .PP When you are ready to put the node back into service, use kubectl uncordon, which @@ -36,6 +39,10 @@ will make the node schedulable again. \fB\-\-grace\-period\fP=\-1 Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used. +.PP +\fB\-\-ignore\-daemonsets\fP=false + Ignore DaemonSet\-managed pods. + .SH OPTIONS INHERITED FROM PARENT COMMANDS .PP diff --git a/docs/user-guide/kubectl/kubectl_drain.md b/docs/user-guide/kubectl/kubectl_drain.md index 6a586aa1f22..1b4199c9106 100644 --- a/docs/user-guide/kubectl/kubectl_drain.md +++ b/docs/user-guide/kubectl/kubectl_drain.md @@ -38,9 +38,12 @@ Drain node in preparation for maintenance. The given node will be marked unschedulable to prevent new pods from arriving. Then drain deletes all pods except mirror pods (which cannot be deleted through -the API server). If there are any pods that are neither mirror pods nor -managed by a ReplicationController, Job, or DaemonSet, then drain will not -delete any pods unless you use --force. +the API server). If there are DaemonSet-managed pods, drain will not proceed +without --ignore-daemonsets, and regardless it will not delete any +DaemonSet-managed pods, because those pods would be immediately replaced by the +DaemonSet controller, which ignores unschedulable marknigs. If there are any +pods that are neither mirror pods nor managed--by ReplicationController, +DaemonSet or Job--, then drain will not delete any pods unless you use --force. When you are ready to put the node back into service, use kubectl uncordon, which will make the node schedulable again. @@ -66,6 +69,7 @@ $ kubectl drain foo --grace-period=900 ``` --force[=false]: Continue even if there are pods not managed by a ReplicationController, Job, or DaemonSet. --grace-period=-1: Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used. + --ignore-daemonsets[=false]: Ignore DaemonSet-managed pods. ``` ### Options inherited from parent commands @@ -100,7 +104,7 @@ $ kubectl drain foo --grace-period=900 * [kubectl](kubectl.md) - kubectl controls the Kubernetes cluster manager -###### Auto generated by spf13/cobra on 28-Jan-2016 +###### Auto generated by spf13/cobra on 2-Feb-2016 [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/user-guide/kubectl/kubectl_drain.md?pixel)]() diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index d3afecf3d7c..8a96260b77c 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -140,6 +140,7 @@ host-pid-sources hostname-override http-check-frequency http-port +ignore-daemonsets ignore-not-found image-gc-high-threshold image-gc-low-threshold diff --git a/pkg/kubectl/cmd/drain.go b/pkg/kubectl/cmd/drain.go index e05985d74a6..89cba686db3 100644 --- a/pkg/kubectl/cmd/drain.go +++ b/pkg/kubectl/cmd/drain.go @@ -17,6 +17,7 @@ limitations under the License. package cmd import ( + "errors" "fmt" "io" "reflect" @@ -41,6 +42,7 @@ type DrainOptions struct { factory *cmdutil.Factory Force bool GracePeriodSeconds int + IgnoreDaemonsets bool mapper meta.RESTMapper nodeInfo *resource.Info out io.Writer @@ -98,9 +100,12 @@ const ( The given node will be marked unschedulable to prevent new pods from arriving. Then drain deletes all pods except mirror pods (which cannot be deleted through -the API server). If there are any pods that are neither mirror pods nor -managed by a ReplicationController, Job, or DaemonSet, then drain will not -delete any pods unless you use --force. +the API server). If there are DaemonSet-managed pods, drain will not proceed +without --ignore-daemonsets, and regardless it will not delete any +DaemonSet-managed pods, because those pods would be immediately replaced by the +DaemonSet controller, which ignores unschedulable marknigs. If there are any +pods that are neither mirror pods nor managed--by ReplicationController, +DaemonSet or Job--, then drain will not delete any pods unless you use --force. When you are ready to put the node back into service, use kubectl uncordon, which will make the node schedulable again. @@ -127,6 +132,7 @@ func NewCmdDrain(f *cmdutil.Factory, out io.Writer) *cobra.Command { }, } cmd.Flags().BoolVar(&options.Force, "force", false, "Continue even if there are pods not managed by a ReplicationController, Job, or DaemonSet.") + cmd.Flags().BoolVar(&options.IgnoreDaemonsets, "ignore-daemonsets", false, "Ignore DaemonSet-managed pods.") cmd.Flags().IntVar(&options.GracePeriodSeconds, "grace-period", -1, "Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used.") return cmd } @@ -196,6 +202,7 @@ func (o *DrainOptions) getPodsForDeletion() ([]api.Pod, error) { return pods, err } unreplicatedPodNames := []string{} + daemonSetPodNames := []string{} for _, pod := range podList.Items { _, found := pod.ObjectMeta.Annotations[types.ConfigMirrorAnnotationKey] @@ -204,6 +211,7 @@ func (o *DrainOptions) getPodsForDeletion() ([]api.Pod, error) { continue } replicated := false + daemonset_pod := false creatorRef, found := pod.ObjectMeta.Annotations[controller.CreatedByAnnotation] if found { @@ -227,7 +235,11 @@ func (o *DrainOptions) getPodsForDeletion() ([]api.Pod, error) { // gone/missing, not for any other cause. TODO(mml): something more // sophisticated than this if err == nil && ds != nil { - replicated = true + // Otherwise, treat daemonset-managed pods as unmanaged since + // DaemonSet Controller currently ignores the unschedulable bit. + // FIXME(mml): Add link to the issue concerning a proper way to drain + // daemonset pods, probably using taints. + daemonset_pod = true } } else if sr.Reference.Kind == "Job" { job, err := o.client.Jobs(sr.Reference.Namespace).Get(sr.Reference.Name) @@ -240,24 +252,63 @@ func (o *DrainOptions) getPodsForDeletion() ([]api.Pod, error) { } } } - if replicated || o.Force { - pods = append(pods, pod) - } - if !replicated { + + switch { + case daemonset_pod: + daemonSetPodNames = append(daemonSetPodNames, pod.Name) + case !replicated: unreplicatedPodNames = append(unreplicatedPodNames, pod.Name) + if o.Force { + pods = append(pods, pod) + } + default: + pods = append(pods, pod) } } - if len(unreplicatedPodNames) > 0 { - joined := strings.Join(unreplicatedPodNames, ", ") - if !o.Force { - return pods, fmt.Errorf("refusing to continue due to pods managed by neither a ReplicationController, nor a Job, nor a DaemonSet: %s (use --force to override)", joined) - } - fmt.Fprintf(o.out, "WARNING: About to delete these pods managed by neither a ReplicationController, nor a Job, nor a DaemonSet: %s\n", joined) + daemonSetErrors := !o.IgnoreDaemonsets && len(daemonSetPodNames) > 0 + unreplicatedErrors := !o.Force && len(unreplicatedPodNames) > 0 + + switch { + case daemonSetErrors && unreplicatedErrors: + return []api.Pod{}, errors.New(unmanagedMsg(unreplicatedPodNames, daemonSetPodNames, true)) + case daemonSetErrors && !unreplicatedErrors: + return []api.Pod{}, errors.New(unmanagedMsg([]string{}, daemonSetPodNames, true)) + case unreplicatedErrors && !daemonSetErrors: + return []api.Pod{}, errors.New(unmanagedMsg(unreplicatedPodNames, []string{}, true)) } + + if len(unreplicatedPodNames) > 0 { + fmt.Fprintf(o.out, "WARNING: About to delete these %s\n", unmanagedMsg(unreplicatedPodNames, []string{}, false)) + } + if len(daemonSetPodNames) > 0 { + fmt.Fprintf(o.out, "WARNING: Skipping %s\n", unmanagedMsg([]string{}, daemonSetPodNames, false)) + } + return pods, nil } +// Helper for generating errors or warnings about unmanaged pods. +func unmanagedMsg(unreplicatedNames []string, daemonSetNames []string, include_guidance bool) string { + msgs := []string{} + if len(unreplicatedNames) > 0 { + msg := fmt.Sprintf("pods not managed by ReplicationController, Job, or DaemonSet: %s", strings.Join(unreplicatedNames, ",")) + if include_guidance { + msg += " (use --force to override)" + } + msgs = append(msgs, msg) + } + if len(daemonSetNames) > 0 { + msg := fmt.Sprintf("DaemonSet-managed pods: %s", strings.Join(daemonSetNames, ",")) + if include_guidance { + msg += " (use --ignore-daemonsets to ignore)" + } + msgs = append(msgs, msg) + } + + return strings.Join(msgs, " and ") +} + // deletePods deletes the pods on the api server func (o *DrainOptions) deletePods(pods []api.Pod) error { deleteOptions := api.DeleteOptions{} diff --git a/pkg/kubectl/cmd/drain_test.go b/pkg/kubectl/cmd/drain_test.go index 57c5b10ebad..b0aa36649ae 100644 --- a/pkg/kubectl/cmd/drain_test.go +++ b/pkg/kubectl/cmd/drain_test.go @@ -323,8 +323,18 @@ func TestDrain(t *testing.T) { pods: []api.Pod{ds_pod}, rcs: []api.ReplicationController{rc}, args: []string{"node"}, + expectFatal: true, + expectDelete: false, + }, + { + description: "DS-managed pod with --ignore-daemonsets", + node: node, + expected: cordoned_node, + pods: []api.Pod{ds_pod}, + rcs: []api.ReplicationController{rc}, + args: []string{"node", "--ignore-daemonsets"}, expectFatal: false, - expectDelete: true, + expectDelete: false, }, { description: "Job-managed pod",