diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index c45e17ab30a..9641cbde14c 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -1022,30 +1022,6 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error { return dsc.updateDaemonSetStatus(ds, hash) } -// hasIntentionalPredicatesReasons checks if any of the given predicate failure reasons -// is intentional. -func hasIntentionalPredicatesReasons(reasons []algorithm.PredicateFailureReason) bool { - for _, r := range reasons { - switch reason := r.(type) { - case *predicates.PredicateFailureError: - switch reason { - // intentional - case - predicates.ErrNodeSelectorNotMatch, - predicates.ErrPodNotMatchHostName, - predicates.ErrNodeLabelPresenceViolated, - // this one is probably intentional since it's a workaround for not having - // pod hard anti affinity. - predicates.ErrPodNotFitsHostPorts, - // DaemonSet is expected to respect taints and tolerations - predicates.ErrTaintsTolerationsNotMatch: - return true - } - } - } - return false -} - // nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a // summary. Returned booleans are: // * wantToRun: @@ -1064,7 +1040,14 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten // Because these bools require an && of all their required conditions, we start // with all bools set to true and set a bool to false if a condition is not met. - // A bool should probably not be set to true after this line. + // A bool should probably not be set to true after this line. We can + // return early if we are: + // + // 1. return false, false, false, err + // 2. return false, false, false, nil + // + // Otherwise if a condition is not met, we should set one of these + // bools to false. wantToRun, shouldSchedule, shouldContinueRunning = true, true, true // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { @@ -1135,22 +1118,37 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten return false, false, false, err } - // Return directly if there is any intentional predicate failure reason, so that daemonset controller skips - // checking other predicate failures, such as InsufficientResourceError and unintentional errors. - if hasIntentionalPredicatesReasons(reasons) { - return false, false, false, nil - } + var insufficientResourceErr error for _, r := range reasons { glog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { case *predicates.InsufficientResourceError: - dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) - shouldSchedule = false + insufficientResourceErr = reason case *predicates.PredicateFailureError: var emitEvent bool + // we try to partition predicates into two partitions here: intentional on the part of the operator and not. switch reason { - // unintentional predicates reasons need to be fired out to event. + // intentional + case + predicates.ErrNodeSelectorNotMatch, + predicates.ErrPodNotMatchHostName, + predicates.ErrNodeLabelPresenceViolated, + // this one is probably intentional since it's a workaround for not having + // pod hard anti affinity. + predicates.ErrPodNotFitsHostPorts: + return false, false, false, nil + case predicates.ErrTaintsTolerationsNotMatch: + // DaemonSet is expected to respect taints and tolerations + fitsNoExecute, _, err := predicates.PodToleratesNodeNoExecuteTaints(newPod, nil, nodeInfo) + if err != nil { + return false, false, false, err + } + if !fitsNoExecute { + return false, false, false, nil + } + wantToRun, shouldSchedule = false, false + // unintentional case predicates.ErrDiskConflict, predicates.ErrVolumeZoneConflict, @@ -1178,6 +1176,12 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten } } } + // only emit this event if insufficient resource is the only thing + // preventing the daemon pod from scheduling + if shouldSchedule && insufficientResourceErr != nil { + dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, insufficientResourceErr.Error()) + shouldSchedule = false + } return } diff --git a/pkg/controller/daemon/daemoncontroller_test.go b/pkg/controller/daemon/daemoncontroller_test.go index 5a853ccd575..e65e40c94fb 100644 --- a/pkg/controller/daemon/daemoncontroller_test.go +++ b/pkg/controller/daemon/daemoncontroller_test.go @@ -60,6 +60,7 @@ var ( var ( noScheduleTolerations = []v1.Toleration{{Key: "dedicated", Value: "user1", Effect: "NoSchedule"}} noScheduleTaints = []v1.Taint{{Key: "dedicated", Value: "user1", Effect: "NoSchedule"}} + noExecuteTaints = []v1.Taint{{Key: "dedicated", Value: "user1", Effect: "NoExecute"}} ) var ( @@ -1079,10 +1080,46 @@ func TestDaemonKillFailedPods(t *testing.T) { } } -// DaemonSet should not launch a pod on a tainted node when the pod doesn't tolerate that taint. -func TestTaintedNodeDaemonDoesNotLaunchUntoleratePod(t *testing.T) { +// Daemonset should not remove a running pod from a node if the pod doesn't +// tolerate the nodes NoSchedule taint +func TestNoScheduleTaintedDoesntEvicitRunningIntolerantPod(t *testing.T) { for _, strategy := range updateStrategies() { - ds := newDaemonSet("untolerate") + ds := newDaemonSet("intolerant") + ds.Spec.UpdateStrategy = *strategy + manager, podControl, _ := newTestController(ds) + + node := newNode("tainted", nil) + manager.nodeStore.Add(node) + setNodeTaint(node, noScheduleTaints) + manager.podStore.Add(newPod("keep-running-me", "tainted", simpleDaemonSetLabel, ds)) + manager.dsStore.Add(ds) + + syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0) + } +} + +// Daemonset should remove a running pod from a node if the pod doesn't +// tolerate the nodes NoExecute taint +func TestNoExecuteTaintedDoesEvicitRunningIntolerantPod(t *testing.T) { + for _, strategy := range updateStrategies() { + ds := newDaemonSet("intolerant") + ds.Spec.UpdateStrategy = *strategy + manager, podControl, _ := newTestController(ds) + + node := newNode("tainted", nil) + manager.nodeStore.Add(node) + setNodeTaint(node, noExecuteTaints) + manager.podStore.Add(newPod("stop-running-me", "tainted", simpleDaemonSetLabel, ds)) + manager.dsStore.Add(ds) + + syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 1) + } +} + +// DaemonSet should not launch a pod on a tainted node when the pod doesn't tolerate that taint. +func TestTaintedNodeDaemonDoesNotLaunchIntolerantPod(t *testing.T) { + for _, strategy := range updateStrategies() { + ds := newDaemonSet("intolerant") ds.Spec.UpdateStrategy = *strategy manager, podControl, _ := newTestController(ds) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index dbb214c943d..99b7482d5cf 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -1247,15 +1247,26 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node // PodToleratesNodeTaints checks if a pod tolertaions can tolerate the node taints func PodToleratesNodeTaints(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { + return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool { + // PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints. + return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute + }) +} + +// PodToleratesNodeNoExecuteTaints checks if a pod tolertaions can tolerate the node's NoExecute taints +func PodToleratesNodeNoExecuteTaints(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { + return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool { + return t.Effect == v1.TaintEffectNoExecute + }) +} + +func podToleratesNodeTaints(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo, filter func(t *v1.Taint) bool) (bool, []algorithm.PredicateFailureReason, error) { taints, err := nodeInfo.Taints() if err != nil { return false, nil, err } - if v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, func(t *v1.Taint) bool { - // PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints. - return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute - }) { + if v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, filter) { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrTaintsTolerationsNotMatch}, nil