From 877218768019e104f64fbac13307b66ab8c0ba3e Mon Sep 17 00:00:00 2001 From: John Schnake Date: Tue, 6 Aug 2019 15:08:29 -0500 Subject: [PATCH 1/2] Add new flag for whitelisting node taints Adds a new flag which allows users to specify a regexp which will effectively whitelist certain taints and allow the test framework to startup tests despite having tainted nodes. Fixes an issue where e2e tests were unable to be run in tainted environments due to the framework waiting for all the nodes to be schedulable without any tolerations. --- test/e2e/framework/BUILD | 6 +- test/e2e/framework/node/BUILD | 16 +- test/e2e/framework/node/resource.go | 66 +++++-- test/e2e/framework/node/wait.go | 77 ++++++++ test/e2e/framework/node/wait_test.go | 251 +++++++++++++++++++++++++++ test/e2e/framework/test_context.go | 15 ++ test/e2e/framework/util.go | 102 +---------- 7 files changed, 421 insertions(+), 112 deletions(-) create mode 100644 test/e2e/framework/node/wait_test.go diff --git a/test/e2e/framework/BUILD b/test/e2e/framework/BUILD index 4e826b2bdca..76535a44eb6 100644 --- a/test/e2e/framework/BUILD +++ b/test/e2e/framework/BUILD @@ -1,5 +1,3 @@ -package(default_visibility = ["//visibility:public"]) - load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( @@ -28,6 +26,7 @@ go_library( "util.go", ], importpath = "k8s.io/kubernetes/test/e2e/framework", + visibility = ["//visibility:public"], deps = [ "//pkg/api/v1/pod:go_default_library", "//pkg/apis/core:go_default_library", @@ -39,8 +38,6 @@ go_library( "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/sysctl:go_default_library", "//pkg/master/ports:go_default_library", - "//pkg/scheduler/algorithm/predicates:go_default_library", - "//pkg/scheduler/nodeinfo:go_default_library", "//pkg/util/taints:go_default_library", "//pkg/version:go_default_library", "//pkg/volume/util:go_default_library", @@ -155,4 +152,5 @@ filegroup( "//test/e2e/framework/volume:all-srcs", ], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/test/e2e/framework/node/BUILD b/test/e2e/framework/node/BUILD index 87c35cb4b62..bd1e3a5b893 100644 --- a/test/e2e/framework/node/BUILD +++ b/test/e2e/framework/node/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "go_default_library", @@ -37,3 +37,17 @@ filegroup( tags = ["automanaged"], visibility = ["//visibility:public"], ) + +go_test( + name = "go_default_test", + srcs = ["wait_test.go"], + embed = [":go_default_library"], + deps = [ + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", + "//staging/src/k8s.io/client-go/testing:go_default_library", + ], +) diff --git a/test/e2e/framework/node/resource.go b/test/e2e/framework/node/resource.go index 3531e2462db..c5fddeba776 100644 --- a/test/e2e/framework/node/resource.go +++ b/test/e2e/framework/node/resource.go @@ -19,6 +19,7 @@ package node import ( "fmt" "net" + "regexp" "time" v1 "k8s.io/api/core/v1" @@ -342,7 +343,7 @@ func GetReadySchedulableNodesOrDie(c clientset.Interface) (nodes *v1.NodeList, e // previous tests may have cause failures of some nodes. Let's skip // 'Not Ready' nodes, just in case (there is no need to fail the test). Filter(nodes, func(node v1.Node) bool { - return isNodeSchedulable(&node) && isNodeUntainted(&node) + return IsNodeSchedulable(&node) && IsNodeUntainted(&node) }) return nodes, nil } @@ -357,7 +358,7 @@ func GetReadyNodesIncludingTainted(c clientset.Interface) (nodes *v1.NodeList, e return nil, fmt.Errorf("listing schedulable nodes error: %s", err) } Filter(nodes, func(node v1.Node) bool { - return isNodeSchedulable(&node) + return IsNodeSchedulable(&node) }) return nodes, nil } @@ -373,16 +374,22 @@ func GetMasterAndWorkerNodes(c clientset.Interface) (sets.String, *v1.NodeList, for _, n := range all.Items { if system.DeprecatedMightBeMasterNode(n.Name) { masters.Insert(n.Name) - } else if isNodeSchedulable(&n) && isNodeUntainted(&n) { + } else if IsNodeSchedulable(&n) && IsNodeUntainted(&n) { nodes.Items = append(nodes.Items, n) } } return masters, nodes, nil } -// Test whether a fake pod can be scheduled on "node", given its current taints. +// IsNodeUntainted tests whether a fake pod can be scheduled on "node", given its current taints. // TODO: need to discuss wether to return bool and error type -func isNodeUntainted(node *v1.Node) bool { +func IsNodeUntainted(node *v1.Node) bool { + return isNodeUntaintedWhitelist(node, nil) +} + +// isNodeUntaintedWhitelist tests whether a fake pod can be scheduled on "node" +// but allows for taints matching the regexp of whitelisted values. +func isNodeUntaintedWhitelist(node *v1.Node, ignoreTaints *regexp.Regexp) bool { fakePod := &v1.Pod{ TypeMeta: metav1.TypeMeta{ Kind: "Pod", @@ -401,8 +408,22 @@ func isNodeUntainted(node *v1.Node) bool { }, }, } + nodeInfo := schedulernodeinfo.NewNodeInfo() - nodeInfo.SetNode(node) + + if ignoreTaints != nil { + nodeCopy := node.DeepCopy() + nodeCopy.Spec.Taints = []v1.Taint{} + for _, v := range node.Spec.Taints { + if !ignoreTaints.MatchString(v.Key) { + nodeCopy.Spec.Taints = append(nodeCopy.Spec.Taints, v) + } + } + nodeInfo.SetNode(nodeCopy) + } else { + nodeInfo.SetNode(node) + } + fit, _, err := predicates.PodToleratesNodeTaints(fakePod, nil, nodeInfo) if err != nil { e2elog.Failf("Can't test predicates for node %s: %v", node.Name, err) @@ -411,15 +432,38 @@ func isNodeUntainted(node *v1.Node) bool { return fit } -// Node is schedulable if: +// IsNodeSchedulable returns true if: // 1) doesn't have "unschedulable" field set -// 2) it's Ready condition is set to true -// 3) doesn't have NetworkUnavailable condition set to true -func isNodeSchedulable(node *v1.Node) bool { +// 2) it also returns true from IsNodeReady +func IsNodeSchedulable(node *v1.Node) bool { + if node == nil { + return false + } + return !node.Spec.Unschedulable && IsNodeReady(node) +} + +// IsNodeReady returns true if: +// 1) it's Ready condition is set to true +// 2) doesn't have NetworkUnavailable condition set to true +func IsNodeReady(node *v1.Node) bool { nodeReady := IsConditionSetAsExpected(node, v1.NodeReady, true) networkReady := IsConditionUnset(node, v1.NodeNetworkUnavailable) || IsConditionSetAsExpectedSilent(node, v1.NodeNetworkUnavailable, false) - return !node.Spec.Unschedulable && nodeReady && networkReady + return nodeReady && networkReady +} + +// hasWhitelistedTaint returns true if the node contains at least +// one taint with a key matching the regexp. +func hasWhitelistedTaint(node *v1.Node, whitelist *regexp.Regexp) bool { + if whitelist == nil { + return false + } + for _, v := range node.Spec.Taints { + if whitelist.MatchString(v.Key) { + return true + } + } + return false } // PodNodePairs return podNode pairs for all pods in a namespace diff --git a/test/e2e/framework/node/wait.go b/test/e2e/framework/node/wait.go index 9934c24687e..b4882f52b6a 100644 --- a/test/e2e/framework/node/wait.go +++ b/test/e2e/framework/node/wait.go @@ -206,3 +206,80 @@ func checkWaitListSchedulableNodes(c clientset.Interface) (*v1.NodeList, error) } return nodes, nil } + +// CheckReadyForTests returns a method usable in polling methods which will check that the nodes are +// in a testable state based on schedulability. +func CheckReadyForTests(c clientset.Interface, whitelistedTaints *regexp.Regexp, allowedNotReadyNodes, largeClusterThreshold int) func() (bool, error) { + attempt := 0 + var notSchedulable []*v1.Node + return func() (bool, error) { + attempt++ + notSchedulable = nil + opts := metav1.ListOptions{ + ResourceVersion: "0", + FieldSelector: fields.Set{"spec.unschedulable": "false"}.AsSelector().String(), + } + nodes, err := c.CoreV1().Nodes().List(opts) + if err != nil { + e2elog.Logf("Unexpected error listing nodes: %v", err) + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + for i := range nodes.Items { + node := &nodes.Items[i] + if !readyForTests(node, whitelistedTaints) { + notSchedulable = append(notSchedulable, node) + } + } + // Framework allows for nodes to be non-ready, + // to make it possible e.g. for incorrect deployment of some small percentage + // of nodes (which we allow in cluster validation). Some nodes that are not + // provisioned correctly at startup will never become ready (e.g. when something + // won't install correctly), so we can't expect them to be ready at any point. + // + // However, we only allow non-ready nodes with some specific reasons. + if len(notSchedulable) > 0 { + // In large clusters, log them only every 10th pass. + if len(nodes.Items) < largeClusterThreshold || attempt%10 == 0 { + e2elog.Logf("Unschedulable nodes:") + for i := range notSchedulable { + whitelist := "" + if whitelistedTaints != nil { + whitelist = whitelistedTaints.String() + } + e2elog.Logf("-> %s Ready=%t Network=%t Taints=%v WhitelistedTaints:%v", + notSchedulable[i].Name, + IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeReady, true), + IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeNetworkUnavailable, false), + notSchedulable[i].Spec.Taints, + whitelist, + ) + + } + e2elog.Logf("================================") + } + } + return len(notSchedulable) <= allowedNotReadyNodes, nil + } +} + +// readyForTests determines whether or not we should continue waiting for the nodes +// to enter a testable state. By default this means it is schedulable, NodeReady, and untainted. +// Nodes with taints matching the whitelistedTaintRegexp are permitted to have that taint and +// also have their node.Spec.Unschedulable field ignored for the purposes of this function. +func readyForTests(node *v1.Node, whitelistedTaints *regexp.Regexp) bool { + if hasWhitelistedTaint(node, whitelistedTaints) { + // If the node has one of the whitelisted taints; just check that it is ready + // and don't require node.Spec.Unschedulable to be set either way. + if !IsNodeReady(node) || !isNodeUntaintedWhitelist(node, whitelistedTaints) { + return false + } + } else { + if !IsNodeSchedulable(node) || !IsNodeUntainted(node) { + return false + } + } + return true +} diff --git a/test/e2e/framework/node/wait_test.go b/test/e2e/framework/node/wait_test.go new file mode 100644 index 00000000000..9e2089963e5 --- /dev/null +++ b/test/e2e/framework/node/wait_test.go @@ -0,0 +1,251 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "errors" + "regexp" + "testing" + + v1 "k8s.io/api/core/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +// TestCheckReadyForTests specifically is concerned about the multi-node logic +// since single node checks are in TestReadyForTests. +func TestCheckReadyForTests(t *testing.T) { + // This is a duplicate definition of the constant in pkg/controller/service/service_controller.go + labelNodeRoleMaster := "node-role.kubernetes.io/master" + + fromVanillaNode := func(f func(*v1.Node)) v1.Node { + vanillaNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + {Type: v1.NodeReady, Status: v1.ConditionTrue}, + }, + }, + } + f(vanillaNode) + return *vanillaNode + } + + tcs := []struct { + desc string + whitelist *regexp.Regexp + allowedNotReadyNodes int + nodes []v1.Node + nodeListErr error + expected bool + expectedErr string + }{ + { + desc: "Vanilla node should pass", + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + }, + expected: true, + }, { + desc: "Default value for whitelist regexp tolerates master taint", + whitelist: regexp.MustCompile(`^node-role\.kubernetes\.io/master$`), + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: labelNodeRoleMaster, Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: true, + }, { + desc: "Tainted node should fail if effect is TaintEffectNoExecute", + whitelist: regexp.MustCompile("bar"), + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoExecute}} + })}, + expected: false, + }, { + desc: "Tainted node can be allowed via allowedNotReadyNodes", + whitelist: regexp.MustCompile("bar"), + allowedNotReadyNodes: 1, + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoExecute}} + })}, + expected: true, + }, { + desc: "Multi-node, all OK", + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) {}), + }, + expected: true, + }, { + desc: "Multi-node, single blocking node blocks", + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: false, + }, { + desc: "Multi-node, single blocking node allowed via allowedNotReadyNodes", + allowedNotReadyNodes: 1, + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: true, + }, { + desc: "Multi-node, single blocking node allowed via whitelisted taint", + whitelist: regexp.MustCompile("foo"), + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: true, + }, { + desc: "Errors from node list are reported", + nodeListErr: errors.New("Forced error"), + expected: false, + expectedErr: "Forced error", + }, { + desc: "Retryable errors from node list are reported but still return false", + nodeListErr: apierrs.NewTimeoutError("Retryable error", 10), + expected: false, + }, + } + + // Only determines some logging functionality; not relevant so set to a large value. + testLargeClusterThreshold := 1000 + + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + c := fake.NewSimpleClientset() + c.PrependReactor("list", "nodes", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + nodeList := &v1.NodeList{Items: tc.nodes} + return true, nodeList, tc.nodeListErr + }) + checkFunc := CheckReadyForTests(c, tc.whitelist, tc.allowedNotReadyNodes, testLargeClusterThreshold) + out, err := checkFunc() + if out != tc.expected { + t.Errorf("Expected %v but got %v", tc.expected, out) + } + switch { + case err == nil && len(tc.expectedErr) > 0: + t.Errorf("Expected error %q nil", tc.expectedErr) + case err != nil && err.Error() != tc.expectedErr: + t.Errorf("Expected error %q but got %q", tc.expectedErr, err.Error()) + } + }) + } +} + +func TestReadyForTests(t *testing.T) { + fromVanillaNode := func(f func(*v1.Node)) *v1.Node { + vanillaNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + {Type: v1.NodeReady, Status: v1.ConditionTrue}, + }, + }, + } + f(vanillaNode) + return vanillaNode + } + _ = fromVanillaNode + tcs := []struct { + desc string + node *v1.Node + whitelist *regexp.Regexp + expected bool + }{ + { + desc: "Vanilla node should pass", + node: fromVanillaNode(func(n *v1.Node) { + }), + expected: true, + }, { + desc: "Vanilla node should pass with non-applicable whitelist", + whitelist: regexp.MustCompile("foo"), + node: fromVanillaNode(func(n *v1.Node) { + }), + expected: true, + }, { + desc: "Tainted node should pass if effect is TaintEffectPreferNoSchedule", + whitelist: regexp.MustCompile("bar"), + node: fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectPreferNoSchedule}} + }), + expected: true, + }, { + desc: "Tainted node should fail if effect is TaintEffectNoExecute", + whitelist: regexp.MustCompile("bar"), + node: fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoExecute}} + }), + expected: false, + }, { + desc: "Tainted node should fail", + whitelist: regexp.MustCompile("bar"), + node: fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + expected: false, + }, { + desc: "Tainted node should pass if whitelisted", + whitelist: regexp.MustCompile("foo"), + node: fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + expected: true, + }, { + desc: "Node with network not ready fails", + whitelist: regexp.MustCompile("foo"), + node: fromVanillaNode(func(n *v1.Node) { + n.Status.Conditions = append(n.Status.Conditions, + v1.NodeCondition{Type: v1.NodeNetworkUnavailable, Status: v1.ConditionTrue}, + ) + }), + expected: false, + }, { + desc: "Node fails unless NodeReady status", + whitelist: regexp.MustCompile("foo"), + node: fromVanillaNode(func(n *v1.Node) { + n.Status.Conditions = []v1.NodeCondition{} + }), + expected: false, + }, + } + + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + out := readyForTests(tc.node, tc.whitelist) + if out != tc.expected { + t.Errorf("Expected %v but got %v", tc.expected, out) + } + }) + } +} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 7757635968f..2cd7fd2a7d7 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -21,6 +21,7 @@ import ( "fmt" "io/ioutil" "os" + "regexp" "sort" "strings" "time" @@ -166,6 +167,12 @@ type TestContextType struct { // The Default IP Family of the cluster ("ipv4" or "ipv6") IPFamily string + + // WhitelistedTaint is the string given by the user to specify taints which should not stop the test framework from running tests. + WhitelistedTaint string + + // WhitelistedTaintRegexp is the *regexp.Regexp parsed from the WhitelistedTaint. + WhitelistedTaintRegexp *regexp.Regexp } // NodeKillerConfig describes configuration of NodeKiller -- a utility to @@ -283,6 +290,7 @@ func RegisterCommonFlags(flags *flag.FlagSet) { flags.StringVar(&TestContext.ImageServiceEndpoint, "image-service-endpoint", "", "The image service endpoint of cluster VM instances.") flags.StringVar(&TestContext.DockershimCheckpointDir, "dockershim-checkpoint-dir", "/var/lib/dockershim/sandbox", "The directory for dockershim to store sandbox checkpoints.") flags.StringVar(&TestContext.KubernetesAnywherePath, "kubernetes-anywhere-path", "/workspace/k8s.io/kubernetes-anywhere", "Which directory kubernetes-anywhere is installed to.") + flags.StringVar(&TestContext.WhitelistedTaint, "whitelist-taint-regexp", `^node-role\.kubernetes\.io/master$`, "Nodes with taints which match this regexp will not block the test framework from starting tests.") flags.BoolVar(&TestContext.ListImages, "list-images", false, "If true, will show list of images used for runnning tests.") } @@ -424,6 +432,13 @@ func AfterReadingAllFlags(t *TestContextType) { t.AllowedNotReadyNodes = t.CloudConfig.NumNodes / 100 } + if len(TestContext.WhitelistedTaint) > 0 { + TestContext.WhitelistedTaintRegexp = regexp.MustCompile(TestContext.WhitelistedTaint) + klog.Infof("Tolerating taints matching regexp %q when considering if nodes are ready", TestContext.WhitelistedTaintRegexp.String()) + } else { + klog.Info("No taints will be tolerated when considering if nodes are ready") + } + // Make sure that all test runs have a valid TestContext.CloudConfig.Provider. // TODO: whether and how long this code is needed is getting discussed // in https://github.com/kubernetes/kubernetes/issues/70194. diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index b825aae8d4c..086d7571a98 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -76,8 +76,6 @@ import ( "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/master/ports" - "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" - schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" taintutils "k8s.io/kubernetes/pkg/util/taints" "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper" e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" @@ -1893,47 +1891,6 @@ func waitListSchedulableNodesOrDie(c clientset.Interface) *v1.NodeList { return nodes } -// Node is schedulable if: -// 1) doesn't have "unschedulable" field set -// 2) it's Ready condition is set to true -// 3) doesn't have NetworkUnavailable condition set to true -func isNodeSchedulable(node *v1.Node) bool { - nodeReady := e2enode.IsConditionSetAsExpected(node, v1.NodeReady, true) - networkReady := e2enode.IsConditionUnset(node, v1.NodeNetworkUnavailable) || - e2enode.IsConditionSetAsExpectedSilent(node, v1.NodeNetworkUnavailable, false) - return !node.Spec.Unschedulable && nodeReady && networkReady -} - -// Test whether a fake pod can be scheduled on "node", given its current taints. -func isNodeUntainted(node *v1.Node) bool { - fakePod := &v1.Pod{ - TypeMeta: metav1.TypeMeta{ - Kind: "Pod", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "fake-not-scheduled", - Namespace: "fake-not-scheduled", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "fake-not-scheduled", - Image: "fake-not-scheduled", - }, - }, - }, - } - nodeInfo := schedulernodeinfo.NewNodeInfo() - nodeInfo.SetNode(node) - fit, _, err := predicates.PodToleratesNodeTaints(fakePod, nil, nodeInfo) - if err != nil { - e2elog.Failf("Can't test predicates for node %s: %v", node.Name, err) - return false - } - return fit -} - // GetReadySchedulableNodesOrDie addresses the common use case of getting nodes you can do work on. // 1) Needs to be schedulable. // 2) Needs to be ready. @@ -1944,7 +1901,7 @@ func GetReadySchedulableNodesOrDie(c clientset.Interface) (nodes *v1.NodeList) { // previous tests may have cause failures of some nodes. Let's skip // 'Not Ready' nodes, just in case (there is no need to fail the test). e2enode.Filter(nodes, func(node v1.Node) bool { - return isNodeSchedulable(&node) && isNodeUntainted(&node) + return e2enode.IsNodeSchedulable(&node) && e2enode.IsNodeUntainted(&node) }) return nodes } @@ -1954,58 +1911,11 @@ func GetReadySchedulableNodesOrDie(c clientset.Interface) (nodes *v1.NodeList) { func WaitForAllNodesSchedulable(c clientset.Interface, timeout time.Duration) error { e2elog.Logf("Waiting up to %v for all (but %d) nodes to be schedulable", timeout, TestContext.AllowedNotReadyNodes) - var notSchedulable []*v1.Node - attempt := 0 - return wait.PollImmediate(30*time.Second, timeout, func() (bool, error) { - attempt++ - notSchedulable = nil - opts := metav1.ListOptions{ - ResourceVersion: "0", - FieldSelector: fields.Set{"spec.unschedulable": "false"}.AsSelector().String(), - } - nodes, err := c.CoreV1().Nodes().List(opts) - if err != nil { - e2elog.Logf("Unexpected error listing nodes: %v", err) - if testutils.IsRetryableAPIError(err) { - return false, nil - } - return false, err - } - for i := range nodes.Items { - node := &nodes.Items[i] - if _, hasMasterRoleLabel := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; hasMasterRoleLabel { - // Kops clusters have masters with spec.unscheduable = false and - // node-role.kubernetes.io/master NoSchedule taint. - // Don't wait for them. - continue - } - if !isNodeSchedulable(node) || !isNodeUntainted(node) { - notSchedulable = append(notSchedulable, node) - } - } - // Framework allows for nodes to be non-ready, - // to make it possible e.g. for incorrect deployment of some small percentage - // of nodes (which we allow in cluster validation). Some nodes that are not - // provisioned correctly at startup will never become ready (e.g. when something - // won't install correctly), so we can't expect them to be ready at any point. - // - // However, we only allow non-ready nodes with some specific reasons. - if len(notSchedulable) > 0 { - // In large clusters, log them only every 10th pass. - if len(nodes.Items) < largeClusterThreshold || attempt%10 == 0 { - e2elog.Logf("Unschedulable nodes:") - for i := range notSchedulable { - e2elog.Logf("-> %s Ready=%t Network=%t Taints=%v", - notSchedulable[i].Name, - e2enode.IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeReady, true), - e2enode.IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeNetworkUnavailable, false), - notSchedulable[i].Spec.Taints) - } - e2elog.Logf("================================") - } - } - return len(notSchedulable) <= TestContext.AllowedNotReadyNodes, nil - }) + return wait.PollImmediate( + 30*time.Second, + timeout, + e2enode.CheckReadyForTests(c, TestContext.WhitelistedTaintRegexp, TestContext.AllowedNotReadyNodes, largeClusterThreshold), + ) } // GetPodSecretUpdateTimeout reuturns the timeout duration for updating pod secret. From 3c53481d5cf2a779981c83f88e42fd08ed42160f Mon Sep 17 00:00:00 2001 From: John Schnake Date: Thu, 5 Sep 2019 11:48:41 -0500 Subject: [PATCH 2/2] Move from regexp to csv string --- test/e2e/framework/node/resource.go | 42 ++++++++++----- test/e2e/framework/node/wait.go | 22 ++++---- test/e2e/framework/node/wait_test.go | 78 +++++++++++++++++----------- test/e2e/framework/test_context.go | 17 ++---- test/e2e/framework/util.go | 2 +- 5 files changed, 93 insertions(+), 68 deletions(-) diff --git a/test/e2e/framework/node/resource.go b/test/e2e/framework/node/resource.go index c5fddeba776..adf1cbaf9e1 100644 --- a/test/e2e/framework/node/resource.go +++ b/test/e2e/framework/node/resource.go @@ -19,7 +19,7 @@ package node import ( "fmt" "net" - "regexp" + "strings" "time" v1 "k8s.io/api/core/v1" @@ -384,12 +384,12 @@ func GetMasterAndWorkerNodes(c clientset.Interface) (sets.String, *v1.NodeList, // IsNodeUntainted tests whether a fake pod can be scheduled on "node", given its current taints. // TODO: need to discuss wether to return bool and error type func IsNodeUntainted(node *v1.Node) bool { - return isNodeUntaintedWhitelist(node, nil) + return isNodeUntaintedWithNonblocking(node, "") } -// isNodeUntaintedWhitelist tests whether a fake pod can be scheduled on "node" -// but allows for taints matching the regexp of whitelisted values. -func isNodeUntaintedWhitelist(node *v1.Node, ignoreTaints *regexp.Regexp) bool { +// isNodeUntaintedWithNonblocking tests whether a fake pod can be scheduled on "node" +// but allows for taints in the list of non-blocking taints. +func isNodeUntaintedWithNonblocking(node *v1.Node, nonblockingTaints string) bool { fakePod := &v1.Pod{ TypeMeta: metav1.TypeMeta{ Kind: "Pod", @@ -411,11 +411,19 @@ func isNodeUntaintedWhitelist(node *v1.Node, ignoreTaints *regexp.Regexp) bool { nodeInfo := schedulernodeinfo.NewNodeInfo() - if ignoreTaints != nil { + // Simple lookup for nonblocking taints based on comma-delimited list. + nonblockingTaintsMap := map[string]struct{}{} + for _, t := range strings.Split(nonblockingTaints, ",") { + if strings.TrimSpace(t) != "" { + nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{} + } + } + + if len(nonblockingTaintsMap) > 0 { nodeCopy := node.DeepCopy() nodeCopy.Spec.Taints = []v1.Taint{} for _, v := range node.Spec.Taints { - if !ignoreTaints.MatchString(v.Key) { + if _, isNonblockingTaint := nonblockingTaintsMap[v.Key]; !isNonblockingTaint { nodeCopy.Spec.Taints = append(nodeCopy.Spec.Taints, v) } } @@ -452,17 +460,27 @@ func IsNodeReady(node *v1.Node) bool { return nodeReady && networkReady } -// hasWhitelistedTaint returns true if the node contains at least +// hasNonblockingTaint returns true if the node contains at least // one taint with a key matching the regexp. -func hasWhitelistedTaint(node *v1.Node, whitelist *regexp.Regexp) bool { - if whitelist == nil { +func hasNonblockingTaint(node *v1.Node, nonblockingTaints string) bool { + if node == nil { return false } - for _, v := range node.Spec.Taints { - if whitelist.MatchString(v.Key) { + + // Simple lookup for nonblocking taints based on comma-delimited list. + nonblockingTaintsMap := map[string]struct{}{} + for _, t := range strings.Split(nonblockingTaints, ",") { + if strings.TrimSpace(t) != "" { + nonblockingTaintsMap[strings.TrimSpace(t)] = struct{}{} + } + } + + for _, taint := range node.Spec.Taints { + if _, hasNonblockingTaint := nonblockingTaintsMap[taint.Key]; hasNonblockingTaint { return true } } + return false } diff --git a/test/e2e/framework/node/wait.go b/test/e2e/framework/node/wait.go index b4882f52b6a..0d4c9c8d74e 100644 --- a/test/e2e/framework/node/wait.go +++ b/test/e2e/framework/node/wait.go @@ -209,7 +209,7 @@ func checkWaitListSchedulableNodes(c clientset.Interface) (*v1.NodeList, error) // CheckReadyForTests returns a method usable in polling methods which will check that the nodes are // in a testable state based on schedulability. -func CheckReadyForTests(c clientset.Interface, whitelistedTaints *regexp.Regexp, allowedNotReadyNodes, largeClusterThreshold int) func() (bool, error) { +func CheckReadyForTests(c clientset.Interface, nonblockingTaints string, allowedNotReadyNodes, largeClusterThreshold int) func() (bool, error) { attempt := 0 var notSchedulable []*v1.Node return func() (bool, error) { @@ -229,7 +229,7 @@ func CheckReadyForTests(c clientset.Interface, whitelistedTaints *regexp.Regexp, } for i := range nodes.Items { node := &nodes.Items[i] - if !readyForTests(node, whitelistedTaints) { + if !readyForTests(node, nonblockingTaints) { notSchedulable = append(notSchedulable, node) } } @@ -245,16 +245,12 @@ func CheckReadyForTests(c clientset.Interface, whitelistedTaints *regexp.Regexp, if len(nodes.Items) < largeClusterThreshold || attempt%10 == 0 { e2elog.Logf("Unschedulable nodes:") for i := range notSchedulable { - whitelist := "" - if whitelistedTaints != nil { - whitelist = whitelistedTaints.String() - } - e2elog.Logf("-> %s Ready=%t Network=%t Taints=%v WhitelistedTaints:%v", + e2elog.Logf("-> %s Ready=%t Network=%t Taints=%v NonblockingTaints:%v", notSchedulable[i].Name, IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeReady, true), IsConditionSetAsExpectedSilent(notSchedulable[i], v1.NodeNetworkUnavailable, false), notSchedulable[i].Spec.Taints, - whitelist, + nonblockingTaints, ) } @@ -267,13 +263,13 @@ func CheckReadyForTests(c clientset.Interface, whitelistedTaints *regexp.Regexp, // readyForTests determines whether or not we should continue waiting for the nodes // to enter a testable state. By default this means it is schedulable, NodeReady, and untainted. -// Nodes with taints matching the whitelistedTaintRegexp are permitted to have that taint and +// Nodes with taints nonblocking taints are permitted to have that taint and // also have their node.Spec.Unschedulable field ignored for the purposes of this function. -func readyForTests(node *v1.Node, whitelistedTaints *regexp.Regexp) bool { - if hasWhitelistedTaint(node, whitelistedTaints) { - // If the node has one of the whitelisted taints; just check that it is ready +func readyForTests(node *v1.Node, nonblockingTaints string) bool { + if hasNonblockingTaint(node, nonblockingTaints) { + // If the node has one of the nonblockingTaints taints; just check that it is ready // and don't require node.Spec.Unschedulable to be set either way. - if !IsNodeReady(node) || !isNodeUntaintedWhitelist(node, whitelistedTaints) { + if !IsNodeReady(node) || !isNodeUntaintedWithNonblocking(node, nonblockingTaints) { return false } } else { diff --git a/test/e2e/framework/node/wait_test.go b/test/e2e/framework/node/wait_test.go index 9e2089963e5..f039017a049 100644 --- a/test/e2e/framework/node/wait_test.go +++ b/test/e2e/framework/node/wait_test.go @@ -18,7 +18,6 @@ package node import ( "errors" - "regexp" "testing" v1 "k8s.io/api/core/v1" @@ -50,7 +49,7 @@ func TestCheckReadyForTests(t *testing.T) { tcs := []struct { desc string - whitelist *regexp.Regexp + nonblockingTaints string allowedNotReadyNodes int nodes []v1.Node nodeListErr error @@ -64,8 +63,8 @@ func TestCheckReadyForTests(t *testing.T) { }, expected: true, }, { - desc: "Default value for whitelist regexp tolerates master taint", - whitelist: regexp.MustCompile(`^node-role\.kubernetes\.io/master$`), + desc: "Default value for nonblocking taints tolerates master taint", + nonblockingTaints: `node-role.kubernetes.io/master`, nodes: []v1.Node{ fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: labelNodeRoleMaster, Effect: v1.TaintEffectNoSchedule}} @@ -73,8 +72,8 @@ func TestCheckReadyForTests(t *testing.T) { }, expected: true, }, { - desc: "Tainted node should fail if effect is TaintEffectNoExecute", - whitelist: regexp.MustCompile("bar"), + desc: "Tainted node should fail if effect is TaintEffectNoExecute", + nonblockingTaints: "bar", nodes: []v1.Node{ fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoExecute}} @@ -82,7 +81,7 @@ func TestCheckReadyForTests(t *testing.T) { expected: false, }, { desc: "Tainted node can be allowed via allowedNotReadyNodes", - whitelist: regexp.MustCompile("bar"), + nonblockingTaints: "bar", allowedNotReadyNodes: 1, nodes: []v1.Node{ fromVanillaNode(func(n *v1.Node) { @@ -116,8 +115,8 @@ func TestCheckReadyForTests(t *testing.T) { }, expected: true, }, { - desc: "Multi-node, single blocking node allowed via whitelisted taint", - whitelist: regexp.MustCompile("foo"), + desc: "Multi-node, single blocking node allowed via nonblocking taint", + nonblockingTaints: "foo", nodes: []v1.Node{ fromVanillaNode(func(n *v1.Node) {}), fromVanillaNode(func(n *v1.Node) { @@ -125,6 +124,32 @@ func TestCheckReadyForTests(t *testing.T) { }), }, expected: true, + }, { + desc: "Multi-node, both blocking nodes allowed via separate nonblocking taints", + nonblockingTaints: "foo,bar", + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "bar", Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: true, + }, { + desc: "Multi-node, one blocking node allowed via nonblocking taints still blocked", + nonblockingTaints: "foo,notbar", + nodes: []v1.Node{ + fromVanillaNode(func(n *v1.Node) {}), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} + }), + fromVanillaNode(func(n *v1.Node) { + n.Spec.Taints = []v1.Taint{{Key: "bar", Effect: v1.TaintEffectNoSchedule}} + }), + }, + expected: false, }, { desc: "Errors from node list are reported", nodeListErr: errors.New("Forced error"), @@ -147,7 +172,7 @@ func TestCheckReadyForTests(t *testing.T) { nodeList := &v1.NodeList{Items: tc.nodes} return true, nodeList, tc.nodeListErr }) - checkFunc := CheckReadyForTests(c, tc.whitelist, tc.allowedNotReadyNodes, testLargeClusterThreshold) + checkFunc := CheckReadyForTests(c, tc.nonblockingTaints, tc.allowedNotReadyNodes, testLargeClusterThreshold) out, err := checkFunc() if out != tc.expected { t.Errorf("Expected %v but got %v", tc.expected, out) @@ -177,10 +202,10 @@ func TestReadyForTests(t *testing.T) { } _ = fromVanillaNode tcs := []struct { - desc string - node *v1.Node - whitelist *regexp.Regexp - expected bool + desc string + node *v1.Node + nonblockingTaints string + expected bool }{ { desc: "Vanilla node should pass", @@ -188,42 +213,38 @@ func TestReadyForTests(t *testing.T) { }), expected: true, }, { - desc: "Vanilla node should pass with non-applicable whitelist", - whitelist: regexp.MustCompile("foo"), + desc: "Vanilla node should pass with non-applicable nonblocking taint", + nonblockingTaints: "foo", node: fromVanillaNode(func(n *v1.Node) { }), expected: true, }, { - desc: "Tainted node should pass if effect is TaintEffectPreferNoSchedule", - whitelist: regexp.MustCompile("bar"), + desc: "Tainted node should pass if effect is TaintEffectPreferNoSchedule", node: fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectPreferNoSchedule}} }), expected: true, }, { - desc: "Tainted node should fail if effect is TaintEffectNoExecute", - whitelist: regexp.MustCompile("bar"), + desc: "Tainted node should fail if effect is TaintEffectNoExecute", node: fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoExecute}} }), expected: false, }, { - desc: "Tainted node should fail", - whitelist: regexp.MustCompile("bar"), + desc: "Tainted node should fail", node: fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} }), expected: false, }, { - desc: "Tainted node should pass if whitelisted", - whitelist: regexp.MustCompile("foo"), + desc: "Tainted node should pass if nonblocking", + nonblockingTaints: "foo", node: fromVanillaNode(func(n *v1.Node) { n.Spec.Taints = []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}} }), expected: true, }, { - desc: "Node with network not ready fails", - whitelist: regexp.MustCompile("foo"), + desc: "Node with network not ready fails", node: fromVanillaNode(func(n *v1.Node) { n.Status.Conditions = append(n.Status.Conditions, v1.NodeCondition{Type: v1.NodeNetworkUnavailable, Status: v1.ConditionTrue}, @@ -231,8 +252,7 @@ func TestReadyForTests(t *testing.T) { }), expected: false, }, { - desc: "Node fails unless NodeReady status", - whitelist: regexp.MustCompile("foo"), + desc: "Node fails unless NodeReady status", node: fromVanillaNode(func(n *v1.Node) { n.Status.Conditions = []v1.NodeCondition{} }), @@ -242,7 +262,7 @@ func TestReadyForTests(t *testing.T) { for _, tc := range tcs { t.Run(tc.desc, func(t *testing.T) { - out := readyForTests(tc.node, tc.whitelist) + out := readyForTests(tc.node, tc.nonblockingTaints) if out != tc.expected { t.Errorf("Expected %v but got %v", tc.expected, out) } diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 2cd7fd2a7d7..688a72de346 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -21,7 +21,6 @@ import ( "fmt" "io/ioutil" "os" - "regexp" "sort" "strings" "time" @@ -168,11 +167,8 @@ type TestContextType struct { // The Default IP Family of the cluster ("ipv4" or "ipv6") IPFamily string - // WhitelistedTaint is the string given by the user to specify taints which should not stop the test framework from running tests. - WhitelistedTaint string - - // WhitelistedTaintRegexp is the *regexp.Regexp parsed from the WhitelistedTaint. - WhitelistedTaintRegexp *regexp.Regexp + // NonblockingTaints is the comma-delimeted string given by the user to specify taints which should not stop the test framework from running tests. + NonblockingTaints string } // NodeKillerConfig describes configuration of NodeKiller -- a utility to @@ -290,7 +286,7 @@ func RegisterCommonFlags(flags *flag.FlagSet) { flags.StringVar(&TestContext.ImageServiceEndpoint, "image-service-endpoint", "", "The image service endpoint of cluster VM instances.") flags.StringVar(&TestContext.DockershimCheckpointDir, "dockershim-checkpoint-dir", "/var/lib/dockershim/sandbox", "The directory for dockershim to store sandbox checkpoints.") flags.StringVar(&TestContext.KubernetesAnywherePath, "kubernetes-anywhere-path", "/workspace/k8s.io/kubernetes-anywhere", "Which directory kubernetes-anywhere is installed to.") - flags.StringVar(&TestContext.WhitelistedTaint, "whitelist-taint-regexp", `^node-role\.kubernetes\.io/master$`, "Nodes with taints which match this regexp will not block the test framework from starting tests.") + flags.StringVar(&TestContext.NonblockingTaints, "non-blocking-taints", `node-role.kubernetes.io/master`, "Nodes with taints in this comma-delimited list will not block the test framework from starting tests.") flags.BoolVar(&TestContext.ListImages, "list-images", false, "If true, will show list of images used for runnning tests.") } @@ -432,12 +428,7 @@ func AfterReadingAllFlags(t *TestContextType) { t.AllowedNotReadyNodes = t.CloudConfig.NumNodes / 100 } - if len(TestContext.WhitelistedTaint) > 0 { - TestContext.WhitelistedTaintRegexp = regexp.MustCompile(TestContext.WhitelistedTaint) - klog.Infof("Tolerating taints matching regexp %q when considering if nodes are ready", TestContext.WhitelistedTaintRegexp.String()) - } else { - klog.Info("No taints will be tolerated when considering if nodes are ready") - } + klog.Infof("Tolerating taints %q when considering if nodes are ready", TestContext.NonblockingTaints) // Make sure that all test runs have a valid TestContext.CloudConfig.Provider. // TODO: whether and how long this code is needed is getting discussed diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 086d7571a98..c6f734fd645 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1914,7 +1914,7 @@ func WaitForAllNodesSchedulable(c clientset.Interface, timeout time.Duration) er return wait.PollImmediate( 30*time.Second, timeout, - e2enode.CheckReadyForTests(c, TestContext.WhitelistedTaintRegexp, TestContext.AllowedNotReadyNodes, largeClusterThreshold), + e2enode.CheckReadyForTests(c, TestContext.NonblockingTaints, TestContext.AllowedNotReadyNodes, largeClusterThreshold), ) }