From c1e7ab07bde02930895b274022e804ca9d38bd7d Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 15 Nov 2018 16:17:39 -0800 Subject: [PATCH 1/3] add an e2e test to verify preemption running path --- test/e2e/scheduling/BUILD | 1 + test/e2e/scheduling/preemption.go | 220 ++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) diff --git a/test/e2e/scheduling/BUILD b/test/e2e/scheduling/BUILD index 9da465bf82c..33eaa147b54 100644 --- a/test/e2e/scheduling/BUILD +++ b/test/e2e/scheduling/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/quota/v1/evaluator/core:go_default_library", "//pkg/scheduler/algorithm/priorities/util:go_default_library", "//pkg/scheduler/api:go_default_library", + "//staging/src/k8s.io/api/apps/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/extensions/v1beta1:go_default_library", "//staging/src/k8s.io/api/scheduling/v1beta1:go_default_library", diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index f4d57dde116..6de8f8ed92e 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -18,13 +18,19 @@ package scheduling import ( "fmt" + "strings" "time" + "k8s.io/client-go/tools/cache" + + appsv1 "k8s.io/api/apps/v1" "k8s.io/api/core/v1" schedulerapi "k8s.io/api/scheduling/v1beta1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/test/e2e/framework" @@ -353,3 +359,217 @@ var _ = SIGDescribe("PodPriorityResolution [Serial]", func() { } }) }) + +// construct a fakecpu so as to set it to status of Node object +// otherwise if we update CPU/Memory/etc, those values will be corrected back by kubelet +var fakecpu v1.ResourceName = "example.com/fakecpu" + +var _ = SIGDescribe("PreemptionExecutionPath", func() { + var cs clientset.Interface + var node *v1.Node + var ns string + f := framework.NewDefaultFramework("sched-preemption-path") + + AfterEach(func() { + if node != nil { + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + delete(nodeCopy.Status.Capacity, fakecpu) + _, err := cs.CoreV1().Nodes().UpdateStatus(nodeCopy) + framework.ExpectNoError(err) + } + }) + + BeforeEach(func() { + cs = f.ClientSet + ns = f.Namespace.Name + + // find an available node + By("Finding an available node") + nodeName := GetNodeThatCanRunPod(f) + framework.Logf("found a healthy node: %s", nodeName) + + // get the node API object + var err error + node, err = cs.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + framework.Failf("error getting node %q: %v", nodeName, err) + } + + // update Node API object with a fake resource + nodeCopy := node.DeepCopy() + // force it to update + nodeCopy.ResourceVersion = "0" + nodeCopy.Status.Capacity[fakecpu] = resource.MustParse("800") + node, err = cs.CoreV1().Nodes().UpdateStatus(nodeCopy) + framework.ExpectNoError(err) + + // create four PriorityClass: p1, p2, p3, p4 + for i := 1; i <= 4; i++ { + priorityName := fmt.Sprintf("p%d", i) + _, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: priorityName}, Value: int32(i)}) + Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) + } + }) + + It("runs ReplicaSets to verify preemption running path", func() { + podNamesSeen := make(map[string]struct{}) + stopCh := make(chan struct{}) + + // create an pod controller to list/watch pod events from the test framework namespace + _, podController := cache.NewInformer( + &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + obj, err := f.ClientSet.CoreV1().Pods(ns).List(options) + return runtime.Object(obj), err + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + return f.ClientSet.CoreV1().Pods(ns).Watch(options) + }, + }, + &v1.Pod{}, + 0, + cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + if pod, ok := obj.(*v1.Pod); ok { + podNamesSeen[pod.Name] = struct{}{} + } + }, + }, + ) + go podController.Run(stopCh) + defer close(stopCh) + + // prepare four ReplicaSet + rsConfs := []pauseRSConfig{ + { + Replicas: int32(5), + PodConfig: pausePodConfig{ + Name: fmt.Sprintf("pod1"), + Namespace: ns, + Labels: map[string]string{"name": "pod1"}, + PriorityClassName: "p1", + NodeSelector: map[string]string{"kubernetes.io/hostname": node.Name}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{fakecpu: resource.MustParse("40")}, + Limits: v1.ResourceList{fakecpu: resource.MustParse("40")}, + }, + }, + }, + { + Replicas: int32(4), + PodConfig: pausePodConfig{ + Name: fmt.Sprintf("pod2"), + Namespace: ns, + Labels: map[string]string{"name": "pod2"}, + PriorityClassName: "p2", + NodeSelector: map[string]string{"kubernetes.io/hostname": node.Name}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{fakecpu: resource.MustParse("50")}, + Limits: v1.ResourceList{fakecpu: resource.MustParse("50")}, + }, + }, + }, + { + Replicas: int32(4), + PodConfig: pausePodConfig{ + Name: fmt.Sprintf("pod3"), + Namespace: ns, + Labels: map[string]string{"name": "pod3"}, + PriorityClassName: "p3", + NodeSelector: map[string]string{"kubernetes.io/hostname": node.Name}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{fakecpu: resource.MustParse("95")}, + Limits: v1.ResourceList{fakecpu: resource.MustParse("95")}, + }, + }, + }, + { + Replicas: int32(1), + PodConfig: pausePodConfig{ + Name: fmt.Sprintf("pod4"), + Namespace: ns, + Labels: map[string]string{"name": "pod4"}, + PriorityClassName: "p4", + NodeSelector: map[string]string{"kubernetes.io/hostname": node.Name}, + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{fakecpu: resource.MustParse("400")}, + Limits: v1.ResourceList{fakecpu: resource.MustParse("400")}, + }, + }, + }, + } + // create ReplicaSet{1,2,3} so as to occupy 780/800 fake resource + rsNum := len(rsConfs) + for i := 0; i < rsNum-1; i++ { + runPauseRS(f, rsConfs[i]) + } + + framework.Logf("pods created so far: %v", podNamesSeen) + framework.Logf("length of pods created so far: %v", len(podNamesSeen)) + + // create ReplicaSet4 + // if runPauseRS failed, it means ReplicaSet4 cannot be scheduled even after 1 minute + // which is unacceptable + runPauseRS(f, rsConfs[rsNum-1]) + + framework.Logf("pods created so far: %v", podNamesSeen) + framework.Logf("length of pods created so far: %v", len(podNamesSeen)) + + // count pods number of RepliaSet3, if it's more than orignal replicas (4) + // then means its pods has been preempted once or more + rs3PodsSeen := 0 + for podName := range podNamesSeen { + if strings.HasPrefix(podName, "rs-pod3") { + rs3PodsSeen++ + } + } + if rs3PodsSeen != 4 { + framework.Failf("some pods of ReplicaSet3 have been preempted: expect 4 pod names, but got %d", rs3PodsSeen) + } + }) + +}) + +type pauseRSConfig struct { + Replicas int32 + PodConfig pausePodConfig +} + +func initPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet { + pausePod := initPausePod(f, conf.PodConfig) + pauseRS := &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs-" + pausePod.Name, + Namespace: pausePod.Namespace, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &conf.Replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: pausePod.Labels, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: pausePod.ObjectMeta.Labels}, + Spec: pausePod.Spec, + }, + }, + } + return pauseRS +} + +func createPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet { + namespace := conf.PodConfig.Namespace + if len(namespace) == 0 { + namespace = f.Namespace.Name + } + rs, err := f.ClientSet.AppsV1().ReplicaSets(namespace).Create(initPauseRS(f, conf)) + framework.ExpectNoError(err) + return rs +} + +func runPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet { + rs := createPauseRS(f, conf) + framework.ExpectNoError(framework.WaitForReplicaSetTargetAvailableReplicas(f.ClientSet, rs, conf.Replicas)) + return rs +} From 4ce10eda64e2d0ff1e277bf35f56c9d7f95123d6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 20 Nov 2018 11:00:26 -0800 Subject: [PATCH 2/3] cleanup PriorityClass after each test --- test/e2e/scheduling/preemption.go | 35 ++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index 6de8f8ed92e..05d354a2c28 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -40,6 +40,11 @@ import ( _ "github.com/stretchr/testify/assert" ) +type priorityPair struct { + name string + value int32 +} + var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { var cs clientset.Interface var nodeList *v1.NodeList @@ -50,24 +55,31 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { lowPriorityClassName := f.BaseName + "-low-priority" mediumPriorityClassName := f.BaseName + "-medium-priority" highPriorityClassName := f.BaseName + "-high-priority" + priorityPairs := []priorityPair{ + {name: lowPriorityClassName, value: lowPriority}, + {name: mediumPriorityClassName, value: mediumPriority}, + {name: highPriorityClassName, value: highPriority}, + } + AfterEach(func() { + for _, pair := range priorityPairs { + cs.SchedulingV1beta1().PriorityClasses().Delete(pair.name, metav1.NewDeleteOptions(0)) + } }) BeforeEach(func() { cs = f.ClientSet ns = f.Namespace.Name nodeList = &v1.NodeList{} - _, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority}) - Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) - _, err = f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: mediumPriorityClassName}, Value: mediumPriority}) - Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) - _, err = f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: lowPriorityClassName}, Value: lowPriority}) - Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) + for _, pair := range priorityPairs { + _, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: pair.name}, Value: pair.value}) + Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) + } framework.WaitForAllNodesHealthy(cs, time.Minute) masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs) - err = framework.CheckTestingNSDeletedExcept(cs, ns) + err := framework.CheckTestingNSDeletedExcept(cs, ns) framework.ExpectNoError(err) }) @@ -370,6 +382,8 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { var ns string f := framework.NewDefaultFramework("sched-preemption-path") + priorityPairs := make([]priorityPair, 0) + AfterEach(func() { if node != nil { nodeCopy := node.DeepCopy() @@ -379,6 +393,9 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { _, err := cs.CoreV1().Nodes().UpdateStatus(nodeCopy) framework.ExpectNoError(err) } + for _, pair := range priorityPairs { + cs.SchedulingV1beta1().PriorityClasses().Delete(pair.name, metav1.NewDeleteOptions(0)) + } }) BeforeEach(func() { @@ -408,7 +425,9 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { // create four PriorityClass: p1, p2, p3, p4 for i := 1; i <= 4; i++ { priorityName := fmt.Sprintf("p%d", i) - _, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: priorityName}, Value: int32(i)}) + priorityVal := int32(i) + priorityPairs = append(priorityPairs, priorityPair{name: priorityName, value: priorityVal}) + _, err := cs.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: priorityName}, Value: priorityVal}) Expect(err == nil || errors.IsAlreadyExists(err)).To(Equal(true)) } }) From f9f588f05227da8fb4a7de6353f34cc490f74d7f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 29 Nov 2018 10:26:24 -0800 Subject: [PATCH 3/3] address comments --- test/e2e/scheduling/preemption.go | 33 ++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index 05d354a2c28..c1f7395bdf3 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -436,7 +436,7 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { podNamesSeen := make(map[string]struct{}) stopCh := make(chan struct{}) - // create an pod controller to list/watch pod events from the test framework namespace + // create a pod controller to list/watch pod events from the test framework namespace _, podController := cache.NewInformer( &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { @@ -465,7 +465,7 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { { Replicas: int32(5), PodConfig: pausePodConfig{ - Name: fmt.Sprintf("pod1"), + Name: "pod1", Namespace: ns, Labels: map[string]string{"name": "pod1"}, PriorityClassName: "p1", @@ -479,7 +479,7 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { { Replicas: int32(4), PodConfig: pausePodConfig{ - Name: fmt.Sprintf("pod2"), + Name: "pod2", Namespace: ns, Labels: map[string]string{"name": "pod2"}, PriorityClassName: "p2", @@ -493,7 +493,7 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { { Replicas: int32(4), PodConfig: pausePodConfig{ - Name: fmt.Sprintf("pod3"), + Name: "pod3", Namespace: ns, Labels: map[string]string{"name": "pod3"}, PriorityClassName: "p3", @@ -507,7 +507,7 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { { Replicas: int32(1), PodConfig: pausePodConfig{ - Name: fmt.Sprintf("pod4"), + Name: "pod4", Namespace: ns, Labels: map[string]string{"name": "pod4"}, PriorityClassName: "p4", @@ -536,16 +536,25 @@ var _ = SIGDescribe("PreemptionExecutionPath", func() { framework.Logf("pods created so far: %v", podNamesSeen) framework.Logf("length of pods created so far: %v", len(podNamesSeen)) - // count pods number of RepliaSet3, if it's more than orignal replicas (4) - // then means its pods has been preempted once or more - rs3PodsSeen := 0 + // count pods number of ReplicaSet{1,2,3}, if it's more than expected replicas + // then it denotes its pods have been over-preempted + // "*2" means pods of ReplicaSet{1,2} are expected to be only preempted once + maxRSPodsSeen := []int{5 * 2, 4 * 2, 4} + rsPodsSeen := []int{0, 0, 0} for podName := range podNamesSeen { - if strings.HasPrefix(podName, "rs-pod3") { - rs3PodsSeen++ + if strings.HasPrefix(podName, "rs-pod1") { + rsPodsSeen[0]++ + } else if strings.HasPrefix(podName, "rs-pod2") { + rsPodsSeen[1]++ + } else if strings.HasPrefix(podName, "rs-pod3") { + rsPodsSeen[2]++ } } - if rs3PodsSeen != 4 { - framework.Failf("some pods of ReplicaSet3 have been preempted: expect 4 pod names, but got %d", rs3PodsSeen) + for i, got := range rsPodsSeen { + expected := maxRSPodsSeen[i] + if got > expected { + framework.Failf("pods of ReplicaSet%d have been over-preempted: expect %v pod names, but got %d", i+1, expected, got) + } } })