mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #88152 from fengzixu/master
cleanup: move the test of TaintBasedEvictions features to sig-node
This commit is contained in:
commit
34c4407d34
28
test/conformance/testdata/conformance.yaml
vendored
28
test/conformance/testdata/conformance.yaml
vendored
@ -420,6 +420,20 @@
|
||||
send events when scheduling and running a Pod.
|
||||
release: v1.9
|
||||
file: test/e2e/node/events.go
|
||||
- testname: Pod Eviction, Toleration limits
|
||||
codename: '[k8s.io] [sig-node] NoExecuteTaintManager Multiple Pods [Serial] evicts
|
||||
pods with minTolerationSeconds [Disruptive] [Conformance]'
|
||||
description: In a multi-pods scenario with tolerationSeconds, the pods MUST be evicted
|
||||
as per the toleration time limit.
|
||||
release: v1.16
|
||||
file: test/e2e/node/taints.go
|
||||
- testname: Taint, Pod Eviction on taint removal
|
||||
codename: '[k8s.io] [sig-node] NoExecuteTaintManager Single Pod [Serial] removing
|
||||
taint cancels eviction [Disruptive] [Conformance]'
|
||||
description: The Pod with toleration timeout scheduled on a tainted Node MUST not
|
||||
be evicted if the taint is removed before toleration time ends.
|
||||
release: v1.16
|
||||
file: test/e2e/node/taints.go
|
||||
- testname: Pods, QOS
|
||||
codename: '[k8s.io] [sig-node] Pods Extended [k8s.io] Pods Set QOS Class should
|
||||
be set on Pods with matching resource requests and limits for memory and cpu [Conformance]'
|
||||
@ -1694,20 +1708,6 @@
|
||||
validate the pod resources are applied to the Limitrange
|
||||
release: v1.18
|
||||
file: test/e2e/scheduling/limit_range.go
|
||||
- testname: Pod Eviction, Toleration limits
|
||||
codename: '[sig-scheduling] NoExecuteTaintManager Multiple Pods [Serial] evicts
|
||||
pods with minTolerationSeconds [Disruptive] [Conformance]'
|
||||
description: In a multi-pods scenario with tolerationSeconds, the pods MUST be evicted
|
||||
as per the toleration time limit.
|
||||
release: v1.16
|
||||
file: test/e2e/scheduling/taints.go
|
||||
- testname: Taint, Pod Eviction on taint removal
|
||||
codename: '[sig-scheduling] NoExecuteTaintManager Single Pod [Serial] removing taint
|
||||
cancels eviction [Disruptive] [Conformance]'
|
||||
description: The Pod with toleration timeout scheduled on a tainted Node MUST not
|
||||
be evicted if the taint is removed before toleration time ends.
|
||||
release: v1.16
|
||||
file: test/e2e/scheduling/taints.go
|
||||
- testname: Scheduler, resource limits
|
||||
codename: '[sig-scheduling] SchedulerPredicates [Serial] validates resource limits
|
||||
of pods that are allowed to run [Conformance]'
|
||||
|
@ -17,6 +17,7 @@ go_library(
|
||||
"runtimeclass.go",
|
||||
"security_context.go",
|
||||
"ssh.go",
|
||||
"taints.go",
|
||||
"ttlafterfinished.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/test/e2e/node",
|
||||
@ -34,11 +35,13 @@ go_library(
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
|
||||
"//test/e2e/framework:go_default_library",
|
||||
"//test/e2e/framework/job:go_default_library",
|
||||
"//test/e2e/framework/kubectl:go_default_library",
|
||||
@ -58,6 +61,7 @@ go_library(
|
||||
"//test/utils/image:go_default_library",
|
||||
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
||||
"//vendor/github.com/onsi/gomega:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/assert:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
package node
|
||||
|
||||
import (
|
||||
"context"
|
@ -10,7 +10,6 @@ go_library(
|
||||
"predicates.go",
|
||||
"preemption.go",
|
||||
"priorities.go",
|
||||
"taints.go",
|
||||
"ubernetes_lite.go",
|
||||
"ubernetes_lite_volumes.go",
|
||||
],
|
||||
|
@ -61,6 +61,7 @@ filegroup(
|
||||
"//test/integration/master:all-srcs",
|
||||
"//test/integration/metrics:all-srcs",
|
||||
"//test/integration/namespace:all-srcs",
|
||||
"//test/integration/node:all-srcs",
|
||||
"//test/integration/objectmeta:all-srcs",
|
||||
"//test/integration/openshift:all-srcs",
|
||||
"//test/integration/pods:all-srcs",
|
||||
|
51
test/integration/node/BUILD
Normal file
51
test/integration/node/BUILD
Normal file
@ -0,0 +1,51 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
size = "large",
|
||||
srcs = [
|
||||
"lifecycle_test.go",
|
||||
"main_test.go",
|
||||
],
|
||||
tags = ["integration"],
|
||||
deps = [
|
||||
"//pkg/controller/nodelifecycle:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
|
||||
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
|
||||
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/rest:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
|
||||
"//test/e2e/framework/pod:go_default_library",
|
||||
"//test/integration/framework:go_default_library",
|
||||
"//test/integration/util:go_default_library",
|
||||
"//test/utils/image:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
)
|
6
test/integration/node/OWNERS
Normal file
6
test/integration/node/OWNERS
Normal file
@ -0,0 +1,6 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-node-approvers
|
||||
reviewers:
|
||||
- sig-node-reviewers
|
306
test/integration/node/lifecycle_test.go
Normal file
306
test/integration/node/lifecycle_test.go
Normal file
@ -0,0 +1,306 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/admission"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
|
||||
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
|
||||
"k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
testutils "k8s.io/kubernetes/test/integration/util"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
)
|
||||
|
||||
// TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
|
||||
func TestTaintBasedEvictions(t *testing.T) {
|
||||
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
|
||||
nodeCount := 3
|
||||
zero := int64(0)
|
||||
gracePeriod := int64(1)
|
||||
heartbeatInternal := time.Second * 2
|
||||
testPod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{Name: "container", Image: imageutils.GetPauseImageName()},
|
||||
},
|
||||
Tolerations: []v1.Toleration{
|
||||
{
|
||||
Key: v1.TaintNodeNotReady,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
},
|
||||
},
|
||||
TerminationGracePeriodSeconds: &gracePeriod,
|
||||
},
|
||||
}
|
||||
tolerationSeconds := []int64{200, 300, 0}
|
||||
tests := []struct {
|
||||
name string
|
||||
nodeTaints []v1.Taint
|
||||
nodeConditions []v1.NodeCondition
|
||||
pod *v1.Pod
|
||||
waitForPodCondition string
|
||||
}{
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: testPod,
|
||||
waitForPodCondition: "updated with tolerationSeconds of 200",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady with no pod tolerations",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "testpod1"},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{Name: "container", Image: imageutils.GetPauseImageName()},
|
||||
},
|
||||
},
|
||||
},
|
||||
waitForPodCondition: "updated with tolerationSeconds=300",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: testPod,
|
||||
waitForPodCondition: "terminating",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeUnreachable",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}},
|
||||
},
|
||||
}
|
||||
|
||||
// Enable TaintBasedEvictions
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
|
||||
|
||||
// Build admission chain handler.
|
||||
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
|
||||
admission := admission.NewChainHandler(
|
||||
podTolerations,
|
||||
defaulttolerationseconds.NewDefaultTolerationSeconds(),
|
||||
)
|
||||
for i, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
testCtx := testutils.InitTestMaster(t, "taint-based-evictions", admission)
|
||||
|
||||
// Build clientset and informers for controllers.
|
||||
externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
|
||||
QPS: -1,
|
||||
Host: testCtx.HTTPServer.URL,
|
||||
ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
|
||||
podTolerations.SetExternalKubeClientSet(externalClientset)
|
||||
podTolerations.SetExternalKubeInformerFactory(externalInformers)
|
||||
|
||||
testCtx = testutils.InitTestScheduler(t, testCtx, true, nil)
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
informers := testCtx.InformerFactory
|
||||
_, err := cs.CoreV1().Namespaces().Create(context.TODO(), testCtx.NS, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create namespace %+v", err)
|
||||
}
|
||||
|
||||
// Start NodeLifecycleController for taint.
|
||||
nc, err := nodelifecycle.NewNodeLifecycleController(
|
||||
informers.Coordination().V1().Leases(),
|
||||
informers.Core().V1().Pods(),
|
||||
informers.Core().V1().Nodes(),
|
||||
informers.Apps().V1().DaemonSets(),
|
||||
cs,
|
||||
5*time.Second, // Node monitor grace period
|
||||
time.Minute, // Node startup grace period
|
||||
time.Millisecond, // Node monitor period
|
||||
time.Second, // Pod eviction timeout
|
||||
100, // Eviction limiter QPS
|
||||
100, // Secondary eviction limiter QPS
|
||||
50, // Large cluster threshold
|
||||
0.55, // Unhealthy zone threshold
|
||||
true, // Run taint manager
|
||||
true, // Use taint based evictions
|
||||
)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create node controller: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
go nc.Run(testCtx.Ctx.Done())
|
||||
|
||||
// Waiting for all controller sync.
|
||||
externalInformers.Start(testCtx.Ctx.Done())
|
||||
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
informers.Start(testCtx.Ctx.Done())
|
||||
informers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
|
||||
nodeRes := v1.ResourceList{
|
||||
v1.ResourceCPU: resource.MustParse("4000m"),
|
||||
v1.ResourceMemory: resource.MustParse("16Gi"),
|
||||
v1.ResourcePods: resource.MustParse("110"),
|
||||
}
|
||||
|
||||
var nodes []*v1.Node
|
||||
for i := 0; i < nodeCount; i++ {
|
||||
nodes = append(nodes, &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("node-%d", i),
|
||||
Labels: map[string]string{v1.LabelZoneRegion: "region1", v1.LabelZoneFailureDomain: "zone1"},
|
||||
},
|
||||
Spec: v1.NodeSpec{},
|
||||
Status: v1.NodeStatus{
|
||||
Capacity: nodeRes,
|
||||
Allocatable: nodeRes,
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
LastHeartbeatTime: metav1.Now(),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
if _, err := cs.CoreV1().Nodes().Create(context.TODO(), nodes[i], metav1.CreateOptions{}); err != nil {
|
||||
t.Errorf("Failed to create node, err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
neededNode := nodes[1]
|
||||
if test.pod != nil {
|
||||
test.pod.Name = fmt.Sprintf("testpod-%d", i)
|
||||
if len(test.pod.Spec.Tolerations) > 0 {
|
||||
test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
|
||||
}
|
||||
|
||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
||||
}
|
||||
|
||||
if err := testutils.WaitForPodToSchedule(cs, test.pod); err != nil {
|
||||
t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
|
||||
test.pod.Namespace, test.pod.Name, err)
|
||||
}
|
||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
||||
}
|
||||
neededNode, err = cs.CoreV1().Nodes().Get(context.TODO(), test.pod.Spec.NodeName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
|
||||
// TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
|
||||
for i := 0; i < nodeCount; i++ {
|
||||
var conditions []v1.NodeCondition
|
||||
// If current node is not <neededNode>
|
||||
if neededNode.Name != nodes[i].Name {
|
||||
conditions = []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
c, err := testutils.NodeReadyStatus(test.nodeConditions)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
// Need to distinguish NodeReady/False and NodeReady/Unknown.
|
||||
// If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
|
||||
// we need to keep sending the update event to keep it alive, rather than just sending once.
|
||||
if c == v1.ConditionFalse {
|
||||
conditions = test.nodeConditions
|
||||
} else if c == v1.ConditionUnknown {
|
||||
// If it's expected to update the node with condition NotReady/Unknown,
|
||||
// i.e. expect a Unreachable:NoExecute taint,
|
||||
// we need to only send the update event once to simulate the network unreachable scenario.
|
||||
nodeCopy := testutils.NodeCopyWithConditions(nodes[i], test.nodeConditions)
|
||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
||||
t.Errorf("Cannot update node: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Keeping sending NodeReady/True or NodeReady/False events.
|
||||
go func(i int) {
|
||||
for {
|
||||
select {
|
||||
case <-testCtx.Ctx.Done():
|
||||
return
|
||||
case <-time.Tick(heartbeatInternal):
|
||||
nodeCopy := testutils.NodeCopyWithConditions(nodes[i], conditions)
|
||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
||||
t.Errorf("Cannot update node: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
if err := testutils.WaitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
|
||||
t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
|
||||
}
|
||||
|
||||
if test.pod != nil {
|
||||
err = pod.WaitForPodCondition(cs, testCtx.NS.Name, test.pod.Name, test.waitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||
// as node is unreachable, pod0 is expected to be in Terminating status
|
||||
// rather than getting deleted
|
||||
if tolerationSeconds[i] == 0 {
|
||||
return pod.DeletionTimestamp != nil, nil
|
||||
}
|
||||
if seconds, err := testutils.GetTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
||||
return seconds == tolerationSeconds[i], nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
pod, _ := cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||
t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.waitForPodCondition, pod)
|
||||
}
|
||||
testutils.CleanupPods(cs, t, []*v1.Pod{test.pod})
|
||||
}
|
||||
testutils.CleanupNodes(cs, t)
|
||||
testutils.WaitForSchedulerCacheCleanup(testCtx.Scheduler, t)
|
||||
})
|
||||
}
|
||||
}
|
27
test/integration/node/main_test.go
Normal file
27
test/integration/node/main_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package node
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/test/integration/framework"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
framework.EtcdMain(m.Run)
|
||||
}
|
@ -34,7 +34,6 @@ go_test(
|
||||
"//pkg/scheduler/nodeinfo:go_default_library",
|
||||
"//pkg/scheduler/profile:go_default_library",
|
||||
"//pkg/scheduler/testing:go_default_library",
|
||||
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
|
||||
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
|
||||
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
|
||||
"//plugin/pkg/admission/priority:go_default_library",
|
||||
@ -49,7 +48,6 @@ go_test(
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
@ -59,7 +57,6 @@ go_test(
|
||||
"//staging/src/k8s.io/client-go/tools/events:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
|
||||
"//staging/src/k8s.io/kube-scheduler/extender/v1:go_default_library",
|
||||
"//test/e2e/framework/pod:go_default_library",
|
||||
"//test/integration/framework:go_default_library",
|
||||
"//test/integration/util:go_default_library",
|
||||
"//test/utils:go_default_library",
|
||||
|
@ -20,30 +20,21 @@ package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/admission"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
|
||||
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
|
||||
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
|
||||
"k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
testutils "k8s.io/kubernetes/test/integration/util"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
)
|
||||
|
||||
func newPod(nsName, name string, req, limit v1.ResourceList) *v1.Pod {
|
||||
@ -577,297 +568,3 @@ func TestTaintNodeByCondition(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
|
||||
func TestTaintBasedEvictions(t *testing.T) {
|
||||
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
|
||||
nodeCount := 3
|
||||
zero := int64(0)
|
||||
gracePeriod := int64(1)
|
||||
heartbeatInternal := time.Second * 2
|
||||
testPod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{Name: "container", Image: imageutils.GetPauseImageName()},
|
||||
},
|
||||
Tolerations: []v1.Toleration{
|
||||
{
|
||||
Key: v1.TaintNodeNotReady,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
},
|
||||
},
|
||||
TerminationGracePeriodSeconds: &gracePeriod,
|
||||
},
|
||||
}
|
||||
tolerationSeconds := []int64{200, 300, 0}
|
||||
tests := []struct {
|
||||
name string
|
||||
nodeTaints []v1.Taint
|
||||
nodeConditions []v1.NodeCondition
|
||||
pod *v1.Pod
|
||||
waitForPodCondition string
|
||||
}{
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: testPod,
|
||||
waitForPodCondition: "updated with tolerationSeconds of 200",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady with no pod tolerations",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "testpod1"},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{Name: "container", Image: imageutils.GetPauseImageName()},
|
||||
},
|
||||
},
|
||||
},
|
||||
waitForPodCondition: "updated with tolerationSeconds=300",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
|
||||
pod: testPod,
|
||||
waitForPodCondition: "terminating",
|
||||
},
|
||||
{
|
||||
name: "Taint based evictions for NodeUnreachable",
|
||||
nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}},
|
||||
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}},
|
||||
},
|
||||
}
|
||||
|
||||
// Enable TaintBasedEvictions
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
|
||||
|
||||
// Build admission chain handler.
|
||||
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
|
||||
admission := admission.NewChainHandler(
|
||||
podTolerations,
|
||||
defaulttolerationseconds.NewDefaultTolerationSeconds(),
|
||||
)
|
||||
for i, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
testCtx := testutils.InitTestMaster(t, "taint-based-evictions", admission)
|
||||
|
||||
// Build clientset and informers for controllers.
|
||||
externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
|
||||
QPS: -1,
|
||||
Host: testCtx.HTTPServer.URL,
|
||||
ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
|
||||
podTolerations.SetExternalKubeClientSet(externalClientset)
|
||||
podTolerations.SetExternalKubeInformerFactory(externalInformers)
|
||||
|
||||
testCtx = testutils.InitTestScheduler(t, testCtx, true, nil)
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
cs := testCtx.ClientSet
|
||||
informers := testCtx.InformerFactory
|
||||
_, err := cs.CoreV1().Namespaces().Create(context.TODO(), testCtx.NS, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create namespace %+v", err)
|
||||
}
|
||||
|
||||
// Start NodeLifecycleController for taint.
|
||||
nc, err := nodelifecycle.NewNodeLifecycleController(
|
||||
informers.Coordination().V1().Leases(),
|
||||
informers.Core().V1().Pods(),
|
||||
informers.Core().V1().Nodes(),
|
||||
informers.Apps().V1().DaemonSets(),
|
||||
cs,
|
||||
5*time.Second, // Node monitor grace period
|
||||
time.Minute, // Node startup grace period
|
||||
time.Millisecond, // Node monitor period
|
||||
time.Second, // Pod eviction timeout
|
||||
100, // Eviction limiter QPS
|
||||
100, // Secondary eviction limiter QPS
|
||||
50, // Large cluster threshold
|
||||
0.55, // Unhealthy zone threshold
|
||||
true, // Run taint manager
|
||||
true, // Use taint based evictions
|
||||
)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create node controller: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
go nc.Run(testCtx.Ctx.Done())
|
||||
|
||||
// Waiting for all controller sync.
|
||||
externalInformers.Start(testCtx.Ctx.Done())
|
||||
externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
informers.Start(testCtx.Ctx.Done())
|
||||
informers.WaitForCacheSync(testCtx.Ctx.Done())
|
||||
|
||||
nodeRes := v1.ResourceList{
|
||||
v1.ResourceCPU: resource.MustParse("4000m"),
|
||||
v1.ResourceMemory: resource.MustParse("16Gi"),
|
||||
v1.ResourcePods: resource.MustParse("110"),
|
||||
}
|
||||
|
||||
var nodes []*v1.Node
|
||||
for i := 0; i < nodeCount; i++ {
|
||||
nodes = append(nodes, &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("node-%d", i),
|
||||
Labels: map[string]string{v1.LabelZoneRegion: "region1", v1.LabelZoneFailureDomain: "zone1"},
|
||||
},
|
||||
Spec: v1.NodeSpec{},
|
||||
Status: v1.NodeStatus{
|
||||
Capacity: nodeRes,
|
||||
Allocatable: nodeRes,
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
LastHeartbeatTime: metav1.Now(),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
if _, err := cs.CoreV1().Nodes().Create(context.TODO(), nodes[i], metav1.CreateOptions{}); err != nil {
|
||||
t.Errorf("Failed to create node, err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
neededNode := nodes[1]
|
||||
if test.pod != nil {
|
||||
test.pod.Name = fmt.Sprintf("testpod-%d", i)
|
||||
if len(test.pod.Spec.Tolerations) > 0 {
|
||||
test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
|
||||
}
|
||||
|
||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
||||
}
|
||||
|
||||
if err := testutils.WaitForPodToSchedule(cs, test.pod); err != nil {
|
||||
t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
|
||||
test.pod.Namespace, test.pod.Name, err)
|
||||
}
|
||||
test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test Failed: error: %v, while creating pod", err)
|
||||
}
|
||||
neededNode, err = cs.CoreV1().Nodes().Get(context.TODO(), test.pod.Spec.NodeName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
|
||||
// TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
|
||||
for i := 0; i < nodeCount; i++ {
|
||||
var conditions []v1.NodeCondition
|
||||
// If current node is not <neededNode>
|
||||
if neededNode.Name != nodes[i].Name {
|
||||
conditions = []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
c, err := nodeReadyStatus(test.nodeConditions)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
// Need to distinguish NodeReady/False and NodeReady/Unknown.
|
||||
// If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
|
||||
// we need to keep sending the update event to keep it alive, rather than just sending once.
|
||||
if c == v1.ConditionFalse {
|
||||
conditions = test.nodeConditions
|
||||
} else if c == v1.ConditionUnknown {
|
||||
// If it's expected to update the node with condition NotReady/Unknown,
|
||||
// i.e. expect a Unreachable:NoExecute taint,
|
||||
// we need to only send the update event once to simulate the network unreachable scenario.
|
||||
nodeCopy := nodeCopyWithConditions(nodes[i], test.nodeConditions)
|
||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
||||
t.Errorf("Cannot update node: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Keeping sending NodeReady/True or NodeReady/False events.
|
||||
go func(i int) {
|
||||
for {
|
||||
select {
|
||||
case <-testCtx.Ctx.Done():
|
||||
return
|
||||
case <-time.Tick(heartbeatInternal):
|
||||
nodeCopy := nodeCopyWithConditions(nodes[i], conditions)
|
||||
if err := testutils.UpdateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
|
||||
t.Errorf("Cannot update node: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
if err := testutils.WaitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
|
||||
t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
|
||||
}
|
||||
|
||||
if test.pod != nil {
|
||||
err = pod.WaitForPodCondition(cs, testCtx.NS.Name, test.pod.Name, test.waitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||
// as node is unreachable, pod0 is expected to be in Terminating status
|
||||
// rather than getting deleted
|
||||
if tolerationSeconds[i] == 0 {
|
||||
return pod.DeletionTimestamp != nil, nil
|
||||
}
|
||||
if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
||||
return seconds == tolerationSeconds[i], nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
pod, _ := cs.CoreV1().Pods(testCtx.NS.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
|
||||
t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.waitForPodCondition, pod)
|
||||
}
|
||||
testutils.CleanupPods(cs, t, []*v1.Pod{test.pod})
|
||||
}
|
||||
testutils.CleanupNodes(cs, t)
|
||||
testutils.WaitForSchedulerCacheCleanup(testCtx.Scheduler, t)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
|
||||
for _, t := range tolerations {
|
||||
if t.Key == v1.TaintNodeNotReady && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
|
||||
return *t.TolerationSeconds, nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("cannot find toleration")
|
||||
}
|
||||
|
||||
// nodeReadyStatus returns the status of first condition with type NodeReady.
|
||||
// If none of the condition is of type NodeReady, returns an error.
|
||||
func nodeReadyStatus(conditions []v1.NodeCondition) (v1.ConditionStatus, error) {
|
||||
for _, c := range conditions {
|
||||
if c.Type != v1.NodeReady {
|
||||
continue
|
||||
}
|
||||
// Just return the first condition with type NodeReady
|
||||
return c.Status, nil
|
||||
}
|
||||
return v1.ConditionFalse, errors.New("None of the conditions is of type NodeReady")
|
||||
}
|
||||
|
||||
func nodeCopyWithConditions(node *v1.Node, conditions []v1.NodeCondition) *v1.Node {
|
||||
copy := node.DeepCopy()
|
||||
copy.ResourceVersion = "0"
|
||||
copy.Status.Conditions = conditions
|
||||
for i := range copy.Status.Conditions {
|
||||
copy.Status.Conditions[i].LastHeartbeatTime = metav1.Now()
|
||||
}
|
||||
return copy
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user