From 488621815f1a5c2f980faabd8d631e2082fd0fb6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 24 Jun 2020 09:37:15 -0700 Subject: [PATCH 1/2] Fix a preemption bug when pods are matched by pdb.Status.DisruptedPods --- pkg/scheduler/core/generic_scheduler.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 5a5541423ab..9c136dbdba4 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -936,15 +936,14 @@ func filterPodsWithPDBViolation(pods []*v1.Pod, pdbs []*policy.PodDisruptionBudg if selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) { continue } + // Only decrement the matched pdb when it's not in its ; + // otherwise we may over-decrement the budget number. + if _, exist := pdb.Status.DisruptedPods[pod.Name]; !exist { + pdbsAllowed[i]-- + } // We have found a matching PDB. - if pdbsAllowed[i] <= 0 { + if pdbsAllowed[i] < 0 { pdbForPodIsViolated = true - } else { - // Only decrement the matched pdb when it's not in its ; - // otherwise we may over-decrement the budget number. - if _, exist := pdb.Status.DisruptedPods[pod.Name]; !exist { - pdbsAllowed[i]-- - } } } } From 82ab6db94b9e636ac338fd4add18018af1aad600 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 24 Jun 2020 11:21:28 -0700 Subject: [PATCH 2/2] Pods in pdb.Status.DisruptedPods are treated as 'nonViolating' in any case --- pkg/scheduler/core/generic_scheduler.go | 10 +++++++--- pkg/scheduler/core/generic_scheduler_test.go | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 9c136dbdba4..cc8ab4aaa27 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -936,11 +936,15 @@ func filterPodsWithPDBViolation(pods []*v1.Pod, pdbs []*policy.PodDisruptionBudg if selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) { continue } + + // Existing in DisruptedPods means it has been processed in API server, + // we don't treat it as a violating case. + if _, exist := pdb.Status.DisruptedPods[pod.Name]; exist { + continue + } // Only decrement the matched pdb when it's not in its ; // otherwise we may over-decrement the budget number. - if _, exist := pdb.Status.DisruptedPods[pod.Name]; !exist { - pdbsAllowed[i]-- - } + pdbsAllowed[i]-- // We have found a matching PDB. if pdbsAllowed[i] < 0 { pdbForPodIsViolated = true diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index 98922a639e1..7d4f5f6223d 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -1496,10 +1496,28 @@ func TestSelectNodesForPreemption(t *testing.T) { {ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a"), Labels: map[string]string{"app": "foo"}}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}}, {ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b"), Labels: map[string]string{"app": "foo"}}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}}}, pdbs: []*policy.PodDisruptionBudget{ - {Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"a": {Time: time.Now()}}}}}, + {Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"b": {Time: time.Now()}}}}}, expected: map[string]victims{"machine1": {pods: sets.NewString("a", "b"), numPDBViolations: 0}}, expectedNumFilterCalled: 3, }, + { + name: "preemption with violation of the pdb with pod whose eviction was processed, the victim which belongs to DisruptedPods is treated as 'nonViolating'", + registerPlugins: []st.RegisterPluginFunc{ + st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), + st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), + st.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), + }, + nodes: []string{"machine1"}, + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}}, + pods: []*v1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a"), Labels: map[string]string{"app": "foo"}}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b"), Labels: map[string]string{"app": "foo"}}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "c", UID: types.UID("c"), Labels: map[string]string{"app": "foo"}}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}}}, + pdbs: []*policy.PodDisruptionBudget{ + {Spec: policy.PodDisruptionBudgetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "foo"}}}, Status: policy.PodDisruptionBudgetStatus{DisruptionsAllowed: 1, DisruptedPods: map[string]metav1.Time{"c": {Time: time.Now()}}}}}, + expected: map[string]victims{"machine1": {pods: sets.NewString("a", "b", "c"), numPDBViolations: 1}}, + expectedNumFilterCalled: 4, + }, } labelKeys := []string{"hostname", "zone", "region"} for _, test := range tests {