diff --git a/pkg/controller/daemon/daemon_controller_test.go b/pkg/controller/daemon/daemon_controller_test.go index 881586c9bbf..ce4c6c5e7fb 100644 --- a/pkg/controller/daemon/daemon_controller_test.go +++ b/pkg/controller/daemon/daemon_controller_test.go @@ -1545,6 +1545,7 @@ func TestNodeShouldRunDaemonPod(t *testing.T) { predicateName string podsOnNode []*v1.Pod nodeCondition []v1.NodeCondition + nodeUnschedulable bool ds *apps.DaemonSet wantToRun, shouldSchedule, shouldContinueRunning bool err error @@ -1800,6 +1801,24 @@ func TestNodeShouldRunDaemonPod(t *testing.T) { shouldSchedule: true, shouldContinueRunning: true, }, + { + predicateName: "ShouldRunDaemonPodOnUnscheduableNode", + ds: &apps.DaemonSet{ + Spec: apps.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: simpleDaemonSetLabel}, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: simpleDaemonSetLabel, + }, + Spec: resourcePodSpec("", "50M", "0.5"), + }, + }, + }, + nodeUnschedulable: true, + wantToRun: true, + shouldSchedule: true, + shouldContinueRunning: true, + }, } for i, c := range cases { @@ -1807,6 +1826,7 @@ func TestNodeShouldRunDaemonPod(t *testing.T) { node := newNode("test-node", simpleDaemonSetLabel) node.Status.Conditions = append(node.Status.Conditions, c.nodeCondition...) node.Status.Allocatable = allocatableResources("100M", "1") + node.Spec.Unschedulable = c.nodeUnschedulable manager, _, _, err := newTestController() if err != nil { t.Fatalf("error creating DaemonSets controller: %v", err) diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index d5a6c602623..23c31e53754 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -69,6 +69,8 @@ const ( NoDiskConflictPred = "NoDiskConflict" // PodToleratesNodeTaintsPred defines the name of predicate PodToleratesNodeTaints. PodToleratesNodeTaintsPred = "PodToleratesNodeTaints" + // CheckNodeUnschedulablePred defines the name of predicate CheckNodeUnschedulablePredicate. + CheckNodeUnschedulablePred = "CheckNodeUnschedulable" // PodToleratesNodeNoExecuteTaintsPred defines the name of predicate PodToleratesNodeNoExecuteTaints. PodToleratesNodeNoExecuteTaintsPred = "PodToleratesNodeNoExecuteTaints" // CheckNodeLabelPresencePred defines the name of predicate CheckNodeLabelPresence. @@ -125,7 +127,7 @@ const ( // The order is based on the restrictiveness & complexity of predicates. // Design doc: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/predicates-ordering.md var ( - predicatesOrdering = []string{CheckNodeConditionPred, + predicatesOrdering = []string{CheckNodeConditionPred, CheckNodeUnschedulablePred, GeneralPred, HostNamePred, PodFitsHostPortsPred, MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred, PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred, @@ -1446,8 +1448,8 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node return nil, nil } -// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints -func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { +// CheckNodeUnschedulablePredicate checks if a pod can be scheduled on a node with Unschedulable spec. +func CheckNodeUnschedulablePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { if nodeInfo == nil || nodeInfo.Node() == nil { return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil } @@ -1456,6 +1458,15 @@ func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeI return false, []algorithm.PredicateFailureReason{ErrNodeUnschedulable}, nil } + return true, nil, nil +} + +// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints +func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { + if nodeInfo == nil || nodeInfo.Node() == nil { + return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil + } + return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool { // PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints. return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute diff --git a/pkg/scheduler/algorithmprovider/defaults/defaults.go b/pkg/scheduler/algorithmprovider/defaults/defaults.go index 35e044254be..6ac2f1849ad 100644 --- a/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -186,12 +186,15 @@ func ApplyFeatureGates() { // if you just want remove specific provider, call func RemovePredicateKeyFromAlgoProvider() factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeConditionPred) + // Fit is determined based on whether a node has Unschedulable spec + factory.RegisterMandatoryFitPredicate(predicates.CheckNodeUnschedulablePred, predicates.CheckNodeUnschedulablePredicate) // Fit is determined based on whether a pod can tolerate all of the node's taints factory.RegisterMandatoryFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints) - // Insert Key "PodToleratesNodeTaints" To All Algorithm Provider + // Insert Key "PodToleratesNodeTaints" and "CheckNodeUnschedulable" To All Algorithm Provider // The key will insert to all providers which in algorithmProviderMap[] // if you just want insert to specific provider, call func InsertPredicateKeyToAlgoProvider() factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.PodToleratesNodeTaintsPred) + factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.CheckNodeUnschedulablePred) glog.Warningf("TaintNodesByCondition is enabled, PodToleratesNodeTaints predicate is mandatory") }