mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #69796 from Huang-Wei/e2e-TaintBasedEvictions
add e2e tests for TaintBasedEvictions
This commit is contained in:
commit
6b2a01709c
@ -394,6 +394,12 @@ func SkipUnlessSecretExistsAfterWait(c clientset.Interface, name, namespace stri
|
||||
Logf("Secret %v in namespace %v found after duration %v", name, namespace, time.Since(start))
|
||||
}
|
||||
|
||||
func SkipUnlessTaintBasedEvictionsEnabled() {
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions) {
|
||||
Skipf("Only supported when %v feature is enabled", features.TaintBasedEvictions)
|
||||
}
|
||||
}
|
||||
|
||||
func SkipIfContainerRuntimeIs(runtimes ...string) {
|
||||
for _, runtime := range runtimes {
|
||||
if runtime == TestContext.ContainerRuntime {
|
||||
@ -5197,3 +5203,17 @@ func GetClusterZones(c clientset.Interface) (sets.String, error) {
|
||||
}
|
||||
return zones, nil
|
||||
}
|
||||
|
||||
// WaitForNodeHasTaintOrNot waits for a taint to be added/removed from the node until timeout occurs, whichever comes first.
|
||||
func WaitForNodeHasTaintOrNot(c clientset.Interface, nodeName string, taint *v1.Taint, wantTrue bool, timeout time.Duration) error {
|
||||
if err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
|
||||
has, err := NodeHasTaint(c, nodeName, taint)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check taint %s on node %s or not", taint.ToString(), nodeName)
|
||||
}
|
||||
return has == wantTrue, nil
|
||||
}); err != nil {
|
||||
return fmt.Errorf("expect node %v to have taint = %v within %v: %v", nodeName, wantTrue, timeout, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ go_library(
|
||||
"preemption.go",
|
||||
"priorities.go",
|
||||
"resource_quota.go",
|
||||
"taint_based_evictions.go",
|
||||
"taints.go",
|
||||
"ubernetes_lite.go",
|
||||
"ubernetes_lite_volumes.go",
|
||||
@ -25,12 +26,14 @@ go_library(
|
||||
"//pkg/kubelet/apis:go_default_library",
|
||||
"//pkg/quota/v1/evaluator/core:go_default_library",
|
||||
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/api/scheduling/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||
|
@ -56,6 +56,7 @@ type pausePodConfig struct {
|
||||
Ports []v1.ContainerPort
|
||||
OwnerReferences []metav1.OwnerReference
|
||||
PriorityClassName string
|
||||
DeletionGracePeriodSeconds *int64
|
||||
}
|
||||
|
||||
var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||
@ -631,6 +632,9 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
|
||||
if conf.Resources != nil {
|
||||
pod.Spec.Containers[0].Resources = *conf.Resources
|
||||
}
|
||||
if conf.DeletionGracePeriodSeconds != nil {
|
||||
pod.ObjectMeta.DeletionGracePeriodSeconds = conf.DeletionGracePeriodSeconds
|
||||
}
|
||||
return pod
|
||||
}
|
||||
|
||||
|
189
test/e2e/scheduling/taint_based_evictions.go
Normal file
189
test/e2e/scheduling/taint_based_evictions.go
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
)
|
||||
|
||||
func newUnreachableNoExecuteTaint() *v1.Taint {
|
||||
return &v1.Taint{
|
||||
Key: schedulerapi.TaintNodeUnreachable,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
}
|
||||
}
|
||||
|
||||
func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
|
||||
for _, t := range tolerations {
|
||||
if t.Key == schedulerapi.TaintNodeUnreachable && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
|
||||
return *t.TolerationSeconds, nil
|
||||
}
|
||||
}
|
||||
return 0, errors.New("cannot find toleration")
|
||||
}
|
||||
|
||||
var _ = SIGDescribe("TaintBasedEvictions [Serial]", func() {
|
||||
f := framework.NewDefaultFramework("sched-taint-based-evictions")
|
||||
var cs clientset.Interface
|
||||
var ns string
|
||||
|
||||
BeforeEach(func() {
|
||||
cs = f.ClientSet
|
||||
ns = f.Namespace.Name
|
||||
// skip if TaintBasedEvictions is not enabled
|
||||
// TODO(Huang-Wei): remove this when TaintBasedEvictions is GAed
|
||||
framework.SkipUnlessTaintBasedEvictionsEnabled()
|
||||
// it's required to run on a cluster that has more than 1 node
|
||||
// otherwise node lifecycle manager enters a fully disruption mode
|
||||
framework.SkipUnlessNodeCountIsAtLeast(2)
|
||||
})
|
||||
|
||||
// This test verifies that when a node becomes unreachable
|
||||
// 1. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionUnknown]
|
||||
// 1. it's applied with node.kubernetes.io/unreachable=:NoExecute taint
|
||||
// 2. pods without toleration are applied with toleration with tolerationSeconds=300
|
||||
// 3. pods with toleration and without tolerationSeconds won't be modifed, and won't be evicted
|
||||
// 4. pods with toleration and with tolerationSeconds won't be modified, and will be evicted after tolerationSeconds
|
||||
// When network issue recovers, it's expected to see:
|
||||
// 5. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionTrue]
|
||||
// 6. node.kubernetes.io/unreachable=:NoExecute taint is taken off the node
|
||||
It("Checks that the node becomes unreachable", func() {
|
||||
// find an available node
|
||||
nodeName := GetNodeThatCanRunPod(f)
|
||||
By("Finding an available node " + nodeName)
|
||||
|
||||
// pod0 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=0s
|
||||
// pod1 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=200s
|
||||
// pod2 is a pod without any toleration
|
||||
base := "taint-based-eviction"
|
||||
tolerationSeconds := []int64{0, 200}
|
||||
numPods := len(tolerationSeconds) + 1
|
||||
By(fmt.Sprintf("Preparing %v pods", numPods))
|
||||
pods := make([]*v1.Pod, numPods)
|
||||
zero := int64(0)
|
||||
// build pod0, pod1
|
||||
for i := 0; i < numPods-1; i++ {
|
||||
pods[i] = createPausePod(f, pausePodConfig{
|
||||
Name: fmt.Sprintf("%v-%v", base, i),
|
||||
NodeName: nodeName,
|
||||
Tolerations: []v1.Toleration{
|
||||
{
|
||||
Key: schedulerapi.TaintNodeUnreachable,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
TolerationSeconds: &tolerationSeconds[i],
|
||||
},
|
||||
},
|
||||
DeletionGracePeriodSeconds: &zero,
|
||||
})
|
||||
}
|
||||
// build pod2
|
||||
pods[numPods-1] = createPausePod(f, pausePodConfig{
|
||||
Name: fmt.Sprintf("%v-%v", base, numPods-1),
|
||||
NodeName: nodeName,
|
||||
})
|
||||
|
||||
By("Verifying all pods are running properly")
|
||||
for _, pod := range pods {
|
||||
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
|
||||
}
|
||||
|
||||
// get the node API object
|
||||
nodeSelector := fields.OneTermEqualSelector("metadata.name", nodeName)
|
||||
nodeList, err := cs.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: nodeSelector.String()})
|
||||
if err != nil || len(nodeList.Items) != 1 {
|
||||
framework.Failf("expected no err, got %v; expected len(nodes) = 1, got %v", err, len(nodeList.Items))
|
||||
}
|
||||
node := nodeList.Items[0]
|
||||
|
||||
By(fmt.Sprintf("Blocking traffic from node %s to the master", nodeName))
|
||||
host, err := framework.GetNodeExternalIP(&node)
|
||||
// TODO(Huang-Wei): make this case work for local provider
|
||||
// if err != nil {
|
||||
// host, err = framework.GetNodeInternalIP(&node)
|
||||
// }
|
||||
framework.ExpectNoError(err)
|
||||
master := framework.GetMasterAddress(cs)
|
||||
taint := newUnreachableNoExecuteTaint()
|
||||
|
||||
defer func() {
|
||||
By(fmt.Sprintf("Unblocking traffic from node %s to the master", node.Name))
|
||||
framework.UnblockNetwork(host, master)
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
framework.Failf("Current e2e test has failed, so return from here.")
|
||||
return
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("Expecting to see node %q becomes Ready", nodeName))
|
||||
framework.WaitForNodeToBeReady(cs, nodeName, time.Minute*1)
|
||||
By("Expecting to see unreachable=:NoExecute taint is taken off")
|
||||
err := framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, false, time.Second*30)
|
||||
framework.ExpectNoError(err)
|
||||
}()
|
||||
|
||||
framework.BlockNetwork(host, master)
|
||||
|
||||
By(fmt.Sprintf("Expecting to see node %q becomes NotReady", nodeName))
|
||||
if !framework.WaitForNodeToBeNotReady(cs, nodeName, time.Minute*3) {
|
||||
framework.Failf("node %q doesn't turn to NotReady after 3 minutes", nodeName)
|
||||
}
|
||||
By("Expecting to see unreachable=:NoExecute taint is applied")
|
||||
err = framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, true, time.Second*30)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
By("Expecting pod0 to be evicted immediately")
|
||||
err = framework.WaitForPodCondition(cs, ns, pods[0].Name, "pod0 terminating", time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||
// as node is unreachable, pod0 is expected to be in Terminating status
|
||||
// rather than getting deleted
|
||||
if pod.DeletionTimestamp != nil {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
By("Expecting pod2 to be updated with a toleration with tolerationSeconds=300")
|
||||
err = framework.WaitForPodCondition(cs, ns, pods[2].Name, "pod2 updated with tolerationSeconds=300", time.Second*15, func(pod *v1.Pod) (bool, error) {
|
||||
if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
|
||||
return seconds == 300, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
By("Expecting pod1 to be unchanged")
|
||||
livePod1, err := cs.CoreV1().Pods(pods[1].Namespace).Get(pods[1].Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
seconds, err := getTolerationSeconds(livePod1.Spec.Tolerations)
|
||||
framework.ExpectNoError(err)
|
||||
if seconds != 200 {
|
||||
framework.Failf("expect tolerationSeconds of pod1 is 200, but got %v", seconds)
|
||||
}
|
||||
})
|
||||
})
|
Loading…
Reference in New Issue
Block a user