From d462b4cbc83c71a0c2020d18023751ebc2dced31 Mon Sep 17 00:00:00 2001 From: Mike Danese Date: Wed, 28 Jun 2017 11:29:54 +0200 Subject: [PATCH 1/3] Partially revert "Do not fire InsufficientResourceError when there are intentional reasons." This partially reverts commit 2b311fefba9cc106ed1a8cebc6c61e32814fd9e5. We drop the changes to the DaemonSet controller but leave the test. By reverting the changes, we make it easier to return different values of shouldContinueRunning for intentional predicate failures, rather then lumping all intentional predicate failures together. The test should continue to pass after the fix. --- pkg/controller/daemon/daemoncontroller.go | 44 +++++++---------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index c45e17ab30a..5f9cc6b2867 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -1022,30 +1022,6 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error { return dsc.updateDaemonSetStatus(ds, hash) } -// hasIntentionalPredicatesReasons checks if any of the given predicate failure reasons -// is intentional. -func hasIntentionalPredicatesReasons(reasons []algorithm.PredicateFailureReason) bool { - for _, r := range reasons { - switch reason := r.(type) { - case *predicates.PredicateFailureError: - switch reason { - // intentional - case - predicates.ErrNodeSelectorNotMatch, - predicates.ErrPodNotMatchHostName, - predicates.ErrNodeLabelPresenceViolated, - // this one is probably intentional since it's a workaround for not having - // pod hard anti affinity. - predicates.ErrPodNotFitsHostPorts, - // DaemonSet is expected to respect taints and tolerations - predicates.ErrTaintsTolerationsNotMatch: - return true - } - } - } - return false -} - // nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a // summary. Returned booleans are: // * wantToRun: @@ -1135,12 +1111,6 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten return false, false, false, err } - // Return directly if there is any intentional predicate failure reason, so that daemonset controller skips - // checking other predicate failures, such as InsufficientResourceError and unintentional errors. - if hasIntentionalPredicatesReasons(reasons) { - return false, false, false, nil - } - for _, r := range reasons { glog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { @@ -1149,8 +1119,20 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten shouldSchedule = false case *predicates.PredicateFailureError: var emitEvent bool + // we try to partition predicates into two partitions here: intentional on the part of the operator and not. switch reason { - // unintentional predicates reasons need to be fired out to event. + // intentional + case + predicates.ErrNodeSelectorNotMatch, + predicates.ErrPodNotMatchHostName, + predicates.ErrNodeLabelPresenceViolated, + // this one is probably intentional since it's a workaround for not having + // pod hard anti affinity. + predicates.ErrPodNotFitsHostPorts, + // DaemonSet is expected to respect taints and tolerations + predicates.ErrTaintsTolerationsNotMatch: + wantToRun, shouldSchedule, shouldContinueRunning = false, false, false + // unintentional case predicates.ErrDiskConflict, predicates.ErrVolumeZoneConflict, From 1aede99abaecf175375430dd1054dd1340553cf0 Mon Sep 17 00:00:00 2001 From: Mike Danese Date: Wed, 28 Jun 2017 11:42:49 +0200 Subject: [PATCH 2/3] fix #45780 slightly differently --- pkg/controller/daemon/daemoncontroller.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index 5f9cc6b2867..9b154990d7f 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -1111,12 +1111,13 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten return false, false, false, err } + var insufficientResourceErr error + for _, r := range reasons { glog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { case *predicates.InsufficientResourceError: - dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) - shouldSchedule = false + insufficientResourceErr = reason case *predicates.PredicateFailureError: var emitEvent bool // we try to partition predicates into two partitions here: intentional on the part of the operator and not. @@ -1128,10 +1129,11 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten predicates.ErrNodeLabelPresenceViolated, // this one is probably intentional since it's a workaround for not having // pod hard anti affinity. - predicates.ErrPodNotFitsHostPorts, - // DaemonSet is expected to respect taints and tolerations - predicates.ErrTaintsTolerationsNotMatch: + predicates.ErrPodNotFitsHostPorts: wantToRun, shouldSchedule, shouldContinueRunning = false, false, false + case predicates.ErrTaintsTolerationsNotMatch: + // DaemonSet is expected to respect taints and tolerations + wantToRun, shouldSchedule, shouldContinueRunning = false, false, true // unintentional case predicates.ErrDiskConflict, @@ -1160,6 +1162,12 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten } } } + // only emit this event if insufficient resource is the only thing + // preventing the daemon pod from scheduling + if shouldSchedule && insufficientResourceErr != nil { + dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, insufficientResourceErr.Error()) + shouldSchedule = false + } return } From 8e6c2ea4d055b53f5cf1886af1c7e5a224d6afd1 Mon Sep 17 00:00:00 2001 From: Mike Danese Date: Wed, 28 Jun 2017 11:27:24 +0200 Subject: [PATCH 3/3] support NoExecute and NoSchedule taints correctly in DaemonSet controller And add some unit tests. --- pkg/controller/daemon/daemoncontroller.go | 20 +++++++-- .../daemon/daemoncontroller_test.go | 43 +++++++++++++++++-- .../algorithm/predicates/predicates.go | 19 ++++++-- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index 9b154990d7f..9641cbde14c 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -1040,7 +1040,14 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten // Because these bools require an && of all their required conditions, we start // with all bools set to true and set a bool to false if a condition is not met. - // A bool should probably not be set to true after this line. + // A bool should probably not be set to true after this line. We can + // return early if we are: + // + // 1. return false, false, false, err + // 2. return false, false, false, nil + // + // Otherwise if a condition is not met, we should set one of these + // bools to false. wantToRun, shouldSchedule, shouldContinueRunning = true, true, true // If the daemon set specifies a node name, check that it matches with node.Name. if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) { @@ -1130,10 +1137,17 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten // this one is probably intentional since it's a workaround for not having // pod hard anti affinity. predicates.ErrPodNotFitsHostPorts: - wantToRun, shouldSchedule, shouldContinueRunning = false, false, false + return false, false, false, nil case predicates.ErrTaintsTolerationsNotMatch: // DaemonSet is expected to respect taints and tolerations - wantToRun, shouldSchedule, shouldContinueRunning = false, false, true + fitsNoExecute, _, err := predicates.PodToleratesNodeNoExecuteTaints(newPod, nil, nodeInfo) + if err != nil { + return false, false, false, err + } + if !fitsNoExecute { + return false, false, false, nil + } + wantToRun, shouldSchedule = false, false // unintentional case predicates.ErrDiskConflict, diff --git a/pkg/controller/daemon/daemoncontroller_test.go b/pkg/controller/daemon/daemoncontroller_test.go index 5a853ccd575..e65e40c94fb 100644 --- a/pkg/controller/daemon/daemoncontroller_test.go +++ b/pkg/controller/daemon/daemoncontroller_test.go @@ -60,6 +60,7 @@ var ( var ( noScheduleTolerations = []v1.Toleration{{Key: "dedicated", Value: "user1", Effect: "NoSchedule"}} noScheduleTaints = []v1.Taint{{Key: "dedicated", Value: "user1", Effect: "NoSchedule"}} + noExecuteTaints = []v1.Taint{{Key: "dedicated", Value: "user1", Effect: "NoExecute"}} ) var ( @@ -1079,10 +1080,46 @@ func TestDaemonKillFailedPods(t *testing.T) { } } -// DaemonSet should not launch a pod on a tainted node when the pod doesn't tolerate that taint. -func TestTaintedNodeDaemonDoesNotLaunchUntoleratePod(t *testing.T) { +// Daemonset should not remove a running pod from a node if the pod doesn't +// tolerate the nodes NoSchedule taint +func TestNoScheduleTaintedDoesntEvicitRunningIntolerantPod(t *testing.T) { for _, strategy := range updateStrategies() { - ds := newDaemonSet("untolerate") + ds := newDaemonSet("intolerant") + ds.Spec.UpdateStrategy = *strategy + manager, podControl, _ := newTestController(ds) + + node := newNode("tainted", nil) + manager.nodeStore.Add(node) + setNodeTaint(node, noScheduleTaints) + manager.podStore.Add(newPod("keep-running-me", "tainted", simpleDaemonSetLabel, ds)) + manager.dsStore.Add(ds) + + syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0) + } +} + +// Daemonset should remove a running pod from a node if the pod doesn't +// tolerate the nodes NoExecute taint +func TestNoExecuteTaintedDoesEvicitRunningIntolerantPod(t *testing.T) { + for _, strategy := range updateStrategies() { + ds := newDaemonSet("intolerant") + ds.Spec.UpdateStrategy = *strategy + manager, podControl, _ := newTestController(ds) + + node := newNode("tainted", nil) + manager.nodeStore.Add(node) + setNodeTaint(node, noExecuteTaints) + manager.podStore.Add(newPod("stop-running-me", "tainted", simpleDaemonSetLabel, ds)) + manager.dsStore.Add(ds) + + syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 1) + } +} + +// DaemonSet should not launch a pod on a tainted node when the pod doesn't tolerate that taint. +func TestTaintedNodeDaemonDoesNotLaunchIntolerantPod(t *testing.T) { + for _, strategy := range updateStrategies() { + ds := newDaemonSet("intolerant") ds.Spec.UpdateStrategy = *strategy manager, podControl, _ := newTestController(ds) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index dbb214c943d..99b7482d5cf 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -1247,15 +1247,26 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node // PodToleratesNodeTaints checks if a pod tolertaions can tolerate the node taints func PodToleratesNodeTaints(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { + return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool { + // PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints. + return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute + }) +} + +// PodToleratesNodeNoExecuteTaints checks if a pod tolertaions can tolerate the node's NoExecute taints +func PodToleratesNodeNoExecuteTaints(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { + return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool { + return t.Effect == v1.TaintEffectNoExecute + }) +} + +func podToleratesNodeTaints(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo, filter func(t *v1.Taint) bool) (bool, []algorithm.PredicateFailureReason, error) { taints, err := nodeInfo.Taints() if err != nil { return false, nil, err } - if v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, func(t *v1.Taint) bool { - // PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints. - return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute - }) { + if v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, filter) { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrTaintsTolerationsNotMatch}, nil