mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
Merge pull request #69796 from Huang-Wei/e2e-TaintBasedEvictions
add e2e tests for TaintBasedEvictions
This commit is contained in:
commit
6b2a01709c
@ -394,6 +394,12 @@ func SkipUnlessSecretExistsAfterWait(c clientset.Interface, name, namespace stri
|
|||||||
Logf("Secret %v in namespace %v found after duration %v", name, namespace, time.Since(start))
|
Logf("Secret %v in namespace %v found after duration %v", name, namespace, time.Since(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SkipUnlessTaintBasedEvictionsEnabled() {
|
||||||
|
if !utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions) {
|
||||||
|
Skipf("Only supported when %v feature is enabled", features.TaintBasedEvictions)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func SkipIfContainerRuntimeIs(runtimes ...string) {
|
func SkipIfContainerRuntimeIs(runtimes ...string) {
|
||||||
for _, runtime := range runtimes {
|
for _, runtime := range runtimes {
|
||||||
if runtime == TestContext.ContainerRuntime {
|
if runtime == TestContext.ContainerRuntime {
|
||||||
@ -5197,3 +5203,17 @@ func GetClusterZones(c clientset.Interface) (sets.String, error) {
|
|||||||
}
|
}
|
||||||
return zones, nil
|
return zones, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WaitForNodeHasTaintOrNot waits for a taint to be added/removed from the node until timeout occurs, whichever comes first.
|
||||||
|
func WaitForNodeHasTaintOrNot(c clientset.Interface, nodeName string, taint *v1.Taint, wantTrue bool, timeout time.Duration) error {
|
||||||
|
if err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
|
||||||
|
has, err := NodeHasTaint(c, nodeName, taint)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to check taint %s on node %s or not", taint.ToString(), nodeName)
|
||||||
|
}
|
||||||
|
return has == wantTrue, nil
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("expect node %v to have taint = %v within %v: %v", nodeName, wantTrue, timeout, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -12,6 +12,7 @@ go_library(
|
|||||||
"preemption.go",
|
"preemption.go",
|
||||||
"priorities.go",
|
"priorities.go",
|
||||||
"resource_quota.go",
|
"resource_quota.go",
|
||||||
|
"taint_based_evictions.go",
|
||||||
"taints.go",
|
"taints.go",
|
||||||
"ubernetes_lite.go",
|
"ubernetes_lite.go",
|
||||||
"ubernetes_lite_volumes.go",
|
"ubernetes_lite_volumes.go",
|
||||||
@ -25,12 +26,14 @@ go_library(
|
|||||||
"//pkg/kubelet/apis:go_default_library",
|
"//pkg/kubelet/apis:go_default_library",
|
||||||
"//pkg/quota/v1/evaluator/core:go_default_library",
|
"//pkg/quota/v1/evaluator/core:go_default_library",
|
||||||
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
|
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
|
||||||
|
"//pkg/scheduler/api:go_default_library",
|
||||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||||
"//staging/src/k8s.io/api/extensions/v1beta1:go_default_library",
|
"//staging/src/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||||
"//staging/src/k8s.io/api/scheduling/v1beta1:go_default_library",
|
"//staging/src/k8s.io/api/scheduling/v1beta1:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
|
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||||
|
@ -56,6 +56,7 @@ type pausePodConfig struct {
|
|||||||
Ports []v1.ContainerPort
|
Ports []v1.ContainerPort
|
||||||
OwnerReferences []metav1.OwnerReference
|
OwnerReferences []metav1.OwnerReference
|
||||||
PriorityClassName string
|
PriorityClassName string
|
||||||
|
DeletionGracePeriodSeconds *int64
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||||
@ -631,6 +632,9 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
|
|||||||
if conf.Resources != nil {
|
if conf.Resources != nil {
|
||||||
pod.Spec.Containers[0].Resources = *conf.Resources
|
pod.Spec.Containers[0].Resources = *conf.Resources
|
||||||
}
|
}
|
||||||
|
if conf.DeletionGracePeriodSeconds != nil {
|
||||||
|
pod.ObjectMeta.DeletionGracePeriodSeconds = conf.DeletionGracePeriodSeconds
|
||||||
|
}
|
||||||
return pod
|
return pod
|
||||||
}
|
}
|
||||||
|
|
||||||
|
189
test/e2e/scheduling/taint_based_evictions.go
Normal file
189
test/e2e/scheduling/taint_based_evictions.go
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package scheduling
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/fields"
|
||||||
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
|
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newUnreachableNoExecuteTaint() *v1.Taint {
|
||||||
|
return &v1.Taint{
|
||||||
|
Key: schedulerapi.TaintNodeUnreachable,
|
||||||
|
Effect: v1.TaintEffectNoExecute,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
|
||||||
|
for _, t := range tolerations {
|
||||||
|
if t.Key == schedulerapi.TaintNodeUnreachable && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
|
||||||
|
return *t.TolerationSeconds, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, errors.New("cannot find toleration")
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ = SIGDescribe("TaintBasedEvictions [Serial]", func() {
|
||||||
|
f := framework.NewDefaultFramework("sched-taint-based-evictions")
|
||||||
|
var cs clientset.Interface
|
||||||
|
var ns string
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
cs = f.ClientSet
|
||||||
|
ns = f.Namespace.Name
|
||||||
|
// skip if TaintBasedEvictions is not enabled
|
||||||
|
// TODO(Huang-Wei): remove this when TaintBasedEvictions is GAed
|
||||||
|
framework.SkipUnlessTaintBasedEvictionsEnabled()
|
||||||
|
// it's required to run on a cluster that has more than 1 node
|
||||||
|
// otherwise node lifecycle manager enters a fully disruption mode
|
||||||
|
framework.SkipUnlessNodeCountIsAtLeast(2)
|
||||||
|
})
|
||||||
|
|
||||||
|
// This test verifies that when a node becomes unreachable
|
||||||
|
// 1. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionUnknown]
|
||||||
|
// 1. it's applied with node.kubernetes.io/unreachable=:NoExecute taint
|
||||||
|
// 2. pods without toleration are applied with toleration with tolerationSeconds=300
|
||||||
|
// 3. pods with toleration and without tolerationSeconds won't be modifed, and won't be evicted
|
||||||
|
// 4. pods with toleration and with tolerationSeconds won't be modified, and will be evicted after tolerationSeconds
|
||||||
|
// When network issue recovers, it's expected to see:
|
||||||
|
// 5. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionTrue]
|
||||||
|
// 6. node.kubernetes.io/unreachable=:NoExecute taint is taken off the node
|
||||||
|
It("Checks that the node becomes unreachable", func() {
|
||||||
|
// find an available node
|
||||||
|
nodeName := GetNodeThatCanRunPod(f)
|
||||||
|
By("Finding an available node " + nodeName)
|
||||||
|
|
||||||
|
// pod0 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=0s
|
||||||
|
// pod1 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=200s
|
||||||
|
// pod2 is a pod without any toleration
|
||||||
|
base := "taint-based-eviction"
|
||||||
|
tolerationSeconds := []int64{0, 200}
|
||||||
|
numPods := len(tolerationSeconds) + 1
|
||||||
|
By(fmt.Sprintf("Preparing %v pods", numPods))
|
||||||
|
pods := make([]*v1.Pod, numPods)
|
||||||
|
zero := int64(0)
|
||||||
|
// build pod0, pod1
|
||||||
|
for i := 0; i < numPods-1; i++ {
|
||||||
|
pods[i] = createPausePod(f, pausePodConfig{
|
||||||
|
Name: fmt.Sprintf("%v-%v", base, i),
|
||||||
|
NodeName: nodeName,
|
||||||
|
Tolerations: []v1.Toleration{
|
||||||
|
{
|
||||||
|
Key: schedulerapi.TaintNodeUnreachable,
|
||||||
|
Operator: v1.TolerationOpExists,
|
||||||
|
Effect: v1.TaintEffectNoExecute,
|
||||||
|
TolerationSeconds: &tolerationSeconds[i],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
DeletionGracePeriodSeconds: &zero,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// build pod2
|
||||||
|
pods[numPods-1] = createPausePod(f, pausePodConfig{
|
||||||
|
Name: fmt.Sprintf("%v-%v", base, numPods-1),
|
||||||
|
NodeName: nodeName,
|
||||||
|
})
|
||||||
|
|
||||||
|
By("Verifying all pods are running properly")
|
||||||
|
for _, pod := range pods {
|
||||||
|
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the node API object
|
||||||
|
nodeSelector := fields.OneTermEqualSelector("metadata.name", nodeName)
|
||||||
|
nodeList, err := cs.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: nodeSelector.String()})
|
||||||
|
if err != nil || len(nodeList.Items) != 1 {
|
||||||
|
framework.Failf("expected no err, got %v; expected len(nodes) = 1, got %v", err, len(nodeList.Items))
|
||||||
|
}
|
||||||
|
node := nodeList.Items[0]
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Blocking traffic from node %s to the master", nodeName))
|
||||||
|
host, err := framework.GetNodeExternalIP(&node)
|
||||||
|
// TODO(Huang-Wei): make this case work for local provider
|
||||||
|
// if err != nil {
|
||||||
|
// host, err = framework.GetNodeInternalIP(&node)
|
||||||
|
// }
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
master := framework.GetMasterAddress(cs)
|
||||||
|
taint := newUnreachableNoExecuteTaint()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
By(fmt.Sprintf("Unblocking traffic from node %s to the master", node.Name))
|
||||||
|
framework.UnblockNetwork(host, master)
|
||||||
|
|
||||||
|
if CurrentGinkgoTestDescription().Failed {
|
||||||
|
framework.Failf("Current e2e test has failed, so return from here.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Expecting to see node %q becomes Ready", nodeName))
|
||||||
|
framework.WaitForNodeToBeReady(cs, nodeName, time.Minute*1)
|
||||||
|
By("Expecting to see unreachable=:NoExecute taint is taken off")
|
||||||
|
err := framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, false, time.Second*30)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
}()
|
||||||
|
|
||||||
|
framework.BlockNetwork(host, master)
|
||||||
|
|
||||||
|
By(fmt.Sprintf("Expecting to see node %q becomes NotReady", nodeName))
|
||||||
|
if !framework.WaitForNodeToBeNotReady(cs, nodeName, time.Minute*3) {
|
||||||
|
framework.Failf("node %q doesn't turn to NotReady after 3 minutes", nodeName)
|
||||||
|
}
|
||||||
|
By("Expecting to see unreachable=:NoExecute taint is applied")
|
||||||
|
err = framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, true, time.Second*30)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
By("Expecting pod0 to be evicted immediately")
|
||||||
|
err = framework.WaitForPodCondition(cs, ns, pods[0].Name, "pod0 terminating", time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||||
|
// as node is unreachable, pod0 is expected to be in Terminating status
|
||||||
|
// rather than getting deleted
|
||||||
|
if pod.DeletionTimestamp != nil {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
By("Expecting pod2 to be updated with a toleration with tolerationSeconds=300")
|
||||||
|
err = framework.WaitForPodCondition(cs, ns, pods[2].Name, "pod2 updated with tolerationSeconds=300", time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||||
|
if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
||||||
|
return seconds == 300, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
By("Expecting pod1 to be unchanged")
|
||||||
|
livePod1, err := cs.CoreV1().Pods(pods[1].Namespace).Get(pods[1].Name, metav1.GetOptions{})
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
seconds, err := getTolerationSeconds(livePod1.Spec.Tolerations)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
if seconds != 200 {
|
||||||
|
framework.Failf("expect tolerationSeconds of pod1 is 200, but got %v", seconds)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
Loading…
Reference in New Issue
Block a user