Convert tbe e2e to integration test

This commit is contained in:
ravisantoshgudimetla 2019-07-31 10:05:58 -07:00 committed by Mike Dame
parent 001f2cd2b5
commit 7e1794dcb1
4 changed files with 234 additions and 199 deletions

View File

@ -10,7 +10,6 @@ go_library(
"predicates.go",
"preemption.go",
"priorities.go",
"taint_based_evictions.go",
"taints.go",
"ubernetes_lite.go",
"ubernetes_lite_volumes.go",
@ -22,14 +21,12 @@ go_library(
"//pkg/apis/extensions:go_default_library",
"//pkg/apis/scheduling:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/scheduling/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",

View File

@ -1,196 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"errors"
"fmt"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
clientset "k8s.io/client-go/kubernetes"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/test/e2e/framework"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
"github.com/onsi/ginkgo"
)
func newUnreachableNoExecuteTaint() *v1.Taint {
return &v1.Taint{
Key: schedulerapi.TaintNodeUnreachable,
Effect: v1.TaintEffectNoExecute,
}
}
func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
for _, t := range tolerations {
if t.Key == schedulerapi.TaintNodeUnreachable && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
return *t.TolerationSeconds, nil
}
}
return 0, errors.New("cannot find toleration")
}
var _ = SIGDescribe("TaintBasedEvictions [Serial]", func() {
f := framework.NewDefaultFramework("sched-taint-based-evictions")
var cs clientset.Interface
var ns string
ginkgo.BeforeEach(func() {
cs = f.ClientSet
ns = f.Namespace.Name
// skip if TaintBasedEvictions is not enabled
// TODO(Huang-Wei): remove this when TaintBasedEvictions is GAed
framework.SkipUnlessTaintBasedEvictionsEnabled()
// it's required to run on a cluster that has more than 1 node
// otherwise node lifecycle manager enters a fully disruption mode
framework.SkipUnlessNodeCountIsAtLeast(2)
})
// This test verifies that when a node becomes unreachable
// 1. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionUnknown]
// 1. it's applied with node.kubernetes.io/unreachable=:NoExecute taint
// 2. pods without toleration are applied with toleration with tolerationSeconds=300
// 3. pods with toleration and without tolerationSeconds won't be modified, and won't be evicted
// 4. pods with toleration and with tolerationSeconds won't be modified, and will be evicted after tolerationSeconds
// When network issue recovers, it's expected to see:
// 5. node lifecycle manager generate a status change: [NodeReady=true, status=ConditionTrue]
// 6. node.kubernetes.io/unreachable=:NoExecute taint is taken off the node
ginkgo.It("Checks that the node becomes unreachable", func() {
framework.SkipUnlessSSHKeyPresent()
// find an available node
nodeName := GetNodeThatCanRunPod(f)
ginkgo.By("Finding an available node " + nodeName)
// pod0 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=0s
// pod1 is a pod with unschedulable=:NoExecute toleration, and tolerationSeconds=200s
// pod2 is a pod without any toleration
base := "taint-based-eviction"
tolerationSeconds := []int64{0, 200}
numPods := len(tolerationSeconds) + 1
ginkgo.By(fmt.Sprintf("Preparing %v pods", numPods))
pods := make([]*v1.Pod, numPods)
zero := int64(0)
// build pod0, pod1
for i := 0; i < numPods-1; i++ {
pods[i] = createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("%v-%v", base, i),
NodeName: nodeName,
Tolerations: []v1.Toleration{
{
Key: schedulerapi.TaintNodeUnreachable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
TolerationSeconds: &tolerationSeconds[i],
},
},
DeletionGracePeriodSeconds: &zero,
})
}
// build pod2
pods[numPods-1] = createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("%v-%v", base, numPods-1),
NodeName: nodeName,
})
ginkgo.By("Verifying all pods are running properly")
for _, pod := range pods {
framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(cs, pod))
}
// get the node API object
nodeSelector := fields.OneTermEqualSelector("metadata.name", nodeName)
nodeList, err := cs.CoreV1().Nodes().List(metav1.ListOptions{FieldSelector: nodeSelector.String()})
if err != nil || len(nodeList.Items) != 1 {
framework.Failf("expected no err, got %v; expected len(nodes) = 1, got %v", err, len(nodeList.Items))
}
node := nodeList.Items[0]
ginkgo.By(fmt.Sprintf("Blocking traffic from node %s to the master", nodeName))
host, err := e2enode.GetExternalIP(&node)
if err != nil {
host, err = e2enode.GetInternalIP(&node)
}
framework.ExpectNoError(err)
masterAddresses := framework.GetAllMasterAddresses(cs)
taint := newUnreachableNoExecuteTaint()
defer func() {
ginkgo.By(fmt.Sprintf("Unblocking traffic from node %s to the master", node.Name))
for _, masterAddress := range masterAddresses {
framework.UnblockNetwork(host, masterAddress)
}
if ginkgo.CurrentGinkgoTestDescription().Failed {
framework.Failf("Current e2e test has failed, so return from here.")
return
}
ginkgo.By(fmt.Sprintf("Expecting to see node %q becomes Ready", nodeName))
e2enode.WaitForNodeToBeReady(cs, nodeName, time.Minute*1)
ginkgo.By("Expecting to see unreachable=:NoExecute taint is taken off")
err := framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, false, time.Second*30)
framework.ExpectNoError(err)
}()
for _, masterAddress := range masterAddresses {
framework.BlockNetwork(host, masterAddress)
}
ginkgo.By(fmt.Sprintf("Expecting to see node %q becomes NotReady", nodeName))
if !e2enode.WaitForNodeToBeNotReady(cs, nodeName, time.Minute*3) {
framework.Failf("node %q doesn't turn to NotReady after 3 minutes", nodeName)
}
ginkgo.By("Expecting to see unreachable=:NoExecute taint is applied")
err = framework.WaitForNodeHasTaintOrNot(cs, nodeName, taint, true, time.Second*30)
framework.ExpectNoError(err)
ginkgo.By("Expecting pod0 to be evicted immediately")
err = e2epod.WaitForPodCondition(cs, ns, pods[0].Name, "pod0 terminating", time.Second*15, func(pod *v1.Pod) (bool, error) {
// as node is unreachable, pod0 is expected to be in Terminating status
// rather than getting deleted
if pod.DeletionTimestamp != nil {
return true, nil
}
return false, nil
})
framework.ExpectNoError(err)
ginkgo.By("Expecting pod2 to be updated with a toleration with tolerationSeconds=300")
err = e2epod.WaitForPodCondition(cs, ns, pods[2].Name, "pod2 updated with tolerationSeconds=300", time.Second*15, func(pod *v1.Pod) (bool, error) {
if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
return seconds == 300, nil
}
return false, nil
})
framework.ExpectNoError(err)
ginkgo.By("Expecting pod1 to be unchanged")
livePod1, err := cs.CoreV1().Pods(pods[1].Namespace).Get(pods[1].Name, metav1.GetOptions{})
framework.ExpectNoError(err)
seconds, err := getTolerationSeconds(livePod1.Spec.Tolerations)
framework.ExpectNoError(err)
if seconds != 200 {
framework.Failf("expect tolerationSeconds of pod1 is 200, but got %v", seconds)
}
})
})

View File

@ -35,6 +35,7 @@ go_test(
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
@ -48,6 +49,7 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
@ -56,6 +58,7 @@ go_test(
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/tools/events:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//test/e2e/framework/pod:go_default_library",
"//test/integration/framework:go_default_library",
"//test/utils:go_default_library",
"//test/utils/image:go_default_library",

View File

@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
@ -36,8 +37,11 @@ import (
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
"k8s.io/kubernetes/test/e2e/framework/pod"
imageutils "k8s.io/kubernetes/test/utils/image"
)
func newPod(nsName, name string, req, limit v1.ResourceList) *v1.Pod {
@ -571,3 +575,230 @@ func TestTaintNodeByCondition(t *testing.T) {
})
}
}
// TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
func TestTaintBasedEvictions(t *testing.T) {
// we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
nodeCount := 3
zero := int64(0)
gracePeriod := int64(1)
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: "container", Image: imageutils.GetPauseImageName()},
},
Tolerations: []v1.Toleration{
{
Key: schedulerapi.TaintNodeNotReady,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
},
},
TerminationGracePeriodSeconds: &gracePeriod,
},
}
tolerationSeconds := []int64{200, 300, 0}
tests := []struct {
name string
nodeTaints []v1.Taint
nodeConditions []v1.NodeCondition
pod *v1.Pod
waitForPodCondition string
}{
{
name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
nodeTaints: []v1.Taint{{Key: schedulerapi.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
pod: testPod,
waitForPodCondition: "updated with tolerationSeconds of 200",
},
{
name: "Taint based evictions for NodeNotReady with no pod tolerations",
nodeTaints: []v1.Taint{{Key: schedulerapi.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "testpod1"},
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: "container", Image: imageutils.GetPauseImageName()},
},
},
},
waitForPodCondition: "updated with tolerationSeconds=300",
},
{
name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
nodeTaints: []v1.Taint{{Key: schedulerapi.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
pod: testPod,
waitForPodCondition: "terminating",
},
{
name: "Taint based evictions for NodeUnreachable",
nodeTaints: []v1.Taint{{Key: schedulerapi.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}},
nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}},
},
}
// Enable TaintBasedEvictions
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
// ApplyFeatureGates() is called to ensure TaintNodesByCondition related logic is applied/restored properly.
defer algorithmprovider.ApplyFeatureGates()()
// Build admission chain handler.
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
admission := admission.NewChainHandler(
podTolerations,
defaulttolerationseconds.NewDefaultTolerationSeconds(),
)
for i, test := range tests {
t.Run(test.name, func(t *testing.T) {
context := initTestMaster(t, "taint-based-evictions", admission)
// Build clientset and informers for controllers.
externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
QPS: -1,
Host: context.httpServer.URL,
ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
podTolerations.SetExternalKubeClientSet(externalClientset)
podTolerations.SetExternalKubeInformerFactory(externalInformers)
context = initTestScheduler(t, context, true, nil)
cs := context.clientSet
informers := context.informerFactory
_, err := cs.CoreV1().Namespaces().Create(context.ns)
if err != nil {
t.Errorf("Failed to create namespace %+v", err)
}
// Start NodeLifecycleController for taint.
nc, err := nodelifecycle.NewNodeLifecycleController(
informers.Coordination().V1beta1().Leases(),
informers.Core().V1().Pods(),
informers.Core().V1().Nodes(),
informers.Apps().V1().DaemonSets(),
cs,
5*time.Second, // Node monitor grace period
time.Minute, // Node startup grace period
time.Millisecond, // Node monitor period
time.Second, // Pod eviction timeout
100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS
50, // Large cluster threshold
0.55, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
false, // Enabled TaintNodeByCondition feature
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
return
}
go nc.Run(context.stopCh)
// Waiting for all controller sync.
externalInformers.Start(context.stopCh)
externalInformers.WaitForCacheSync(context.stopCh)
informers.Start(context.stopCh)
informers.WaitForCacheSync(context.stopCh)
nodeRes := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
}
var nodes []*v1.Node
for i := 0; i < nodeCount; i++ {
nodes = append(nodes, &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("node-%d", i),
Labels: map[string]string{v1.LabelZoneRegion: "region1", v1.LabelZoneFailureDomain: "zone1"},
},
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Capacity: nodeRes,
Allocatable: nodeRes,
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
})
if _, err := cs.CoreV1().Nodes().Create(nodes[i]); err != nil {
t.Errorf("Failed to create node, err: %v", err)
}
}
neededNode := nodes[1]
if test.pod != nil {
test.pod.Name = fmt.Sprintf("testpod-%d", i)
if len(test.pod.Spec.Tolerations) > 0 {
test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
}
test.pod, err = cs.CoreV1().Pods(context.ns.Name).Create(test.pod)
if err != nil {
t.Fatalf("Test Failed: error: %v, while creating pod", err)
}
if err := waitForPodToSchedule(cs, test.pod); err != nil {
t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
test.pod.Namespace, test.pod.Name, err)
}
test.pod, err = cs.CoreV1().Pods(context.ns.Name).Get(test.pod.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Test Failed: error: %v, while creating pod", err)
}
neededNode, err = cs.CoreV1().Nodes().Get(test.pod.Spec.NodeName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
}
}
neededNode.Status.Conditions = test.nodeConditions
// Update node condition.
err = updateNodeStatus(cs, neededNode)
if err != nil {
t.Fatalf("Cannot update node: %v", err)
}
if err := waitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
}
if test.pod != nil {
err = pod.WaitForPodCondition(cs, context.ns.Name, test.pod.Name, test.waitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
// as node is unreachable, pod0 is expected to be in Terminating status
// rather than getting deleted
if tolerationSeconds[i] == 0 {
return pod.DeletionTimestamp != nil, nil
}
if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
return seconds == tolerationSeconds[i], nil
}
return false, nil
})
if err != nil {
pod, _ := cs.CoreV1().Pods(context.ns.Name).Get(test.pod.Name, metav1.GetOptions{})
t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.waitForPodCondition, pod)
}
cleanupPods(cs, t, []*v1.Pod{test.pod})
}
cleanupNodes(cs, t)
waitForSchedulerCacheCleanup(context.scheduler, t)
})
}
}
func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
for _, t := range tolerations {
if t.Key == schedulerapi.TaintNodeNotReady && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
return *t.TolerationSeconds, nil
}
}
return 0, fmt.Errorf("cannot find toleration")
}