From 89f936f6ac3af4ee8a9c00d3c4e1745f448e2c05 Mon Sep 17 00:00:00 2001 From: Abdullah Gharaibeh Date: Wed, 18 Sep 2019 15:48:26 -0400 Subject: [PATCH] Modified the Filter interface to pass in nodeinfo instead of node name. This is necessary to support preemption, which relies on passing modified nodeinfo objects to the filters to simulate evicting lower-priority pods. --- pkg/scheduler/core/generic_scheduler.go | 2 +- pkg/scheduler/core/generic_scheduler_test.go | 4 ++-- pkg/scheduler/framework/plugins/noop/BUILD | 1 + pkg/scheduler/framework/plugins/noop/noop.go | 3 ++- pkg/scheduler/framework/v1alpha1/framework.go | 4 ++-- pkg/scheduler/framework/v1alpha1/interface.go | 23 +++++++++++++++---- .../internal/queue/scheduling_queue_test.go | 2 +- test/integration/scheduler/framework_test.go | 3 ++- 8 files changed, 29 insertions(+), 13 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index fb24bf098a2..1e4fb42207a 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -675,7 +675,7 @@ func (g *genericScheduler) podFitsOnNode( } } - status = g.framework.RunFilterPlugins(pluginContext, pod, info.Node().Name) + status = g.framework.RunFilterPlugins(pluginContext, pod, nodeInfoToUse) if !status.IsSuccess() && !status.IsUnschedulable() { return false, failedPredicates, status, status.AsError() } diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index e753257b52a..1e5215d5db0 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -160,10 +160,10 @@ func (fp *FakeFilterPlugin) reset() { // Filter is a test function that returns an error or nil, depending on the // value of "failedNodeReturnCodeMap". -func (fp *FakeFilterPlugin) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeName string) *framework.Status { +func (fp *FakeFilterPlugin) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status { atomic.AddInt32(&fp.numFilterCalled, 1) - if returnCode, ok := fp.failedNodeReturnCodeMap[nodeName]; ok { + if returnCode, ok := fp.failedNodeReturnCodeMap[nodeInfo.Node().Name]; ok { return framework.NewStatus(returnCode, fmt.Sprintf("injecting failure for pod %v", pod.Name)) } diff --git a/pkg/scheduler/framework/plugins/noop/BUILD b/pkg/scheduler/framework/plugins/noop/BUILD index c28a009d26c..4054116da3b 100644 --- a/pkg/scheduler/framework/plugins/noop/BUILD +++ b/pkg/scheduler/framework/plugins/noop/BUILD @@ -7,6 +7,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/scheduler/framework/v1alpha1:go_default_library", + "//pkg/scheduler/nodeinfo:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", ], diff --git a/pkg/scheduler/framework/plugins/noop/noop.go b/pkg/scheduler/framework/plugins/noop/noop.go index b590d0f6c73..f73568d982f 100644 --- a/pkg/scheduler/framework/plugins/noop/noop.go +++ b/pkg/scheduler/framework/plugins/noop/noop.go @@ -21,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" + "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) // Filter is a plugin that implements the filter plugin and always returns Success. @@ -40,7 +41,7 @@ func (n Filter) Name() string { } // Filter invoked at the filter extension point. -func (n Filter) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeName string) *framework.Status { +func (n Filter) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status { return nil } diff --git a/pkg/scheduler/framework/v1alpha1/framework.go b/pkg/scheduler/framework/v1alpha1/framework.go index f29d9a127b2..641f27d46ae 100644 --- a/pkg/scheduler/framework/v1alpha1/framework.go +++ b/pkg/scheduler/framework/v1alpha1/framework.go @@ -311,9 +311,9 @@ func (f *framework) RunPreFilterPlugins( // given node is not suitable for running pod. // Meanwhile, the failure message and status are set for the given node. func (f *framework) RunFilterPlugins(pc *PluginContext, - pod *v1.Pod, nodeName string) *Status { + pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status { for _, pl := range f.filterPlugins { - status := pl.Filter(pc, pod, nodeName) + status := pl.Filter(pc, pod, nodeInfo) if !status.IsSuccess() { if !status.IsUnschedulable() { errMsg := fmt.Sprintf("error while running %q filter plugin for pod %q: %v", diff --git a/pkg/scheduler/framework/v1alpha1/interface.go b/pkg/scheduler/framework/v1alpha1/interface.go index 246bfdb34f4..25490ad32e5 100644 --- a/pkg/scheduler/framework/v1alpha1/interface.go +++ b/pkg/scheduler/framework/v1alpha1/interface.go @@ -188,7 +188,14 @@ type FilterPlugin interface { // the given node fits the pod. If Filter doesn't return "Success", // please refer scheduler/algorithm/predicates/error.go // to set error message. - Filter(pc *PluginContext, pod *v1.Pod, nodeName string) *Status + // For the node being evaluated, Filter plugins should look at the passed + // nodeInfo reference for this particular node's information (e.g., pods + // considered to be running on the node) instead of looking it up in the + // NodeInfoSnapshot because we don't guarantee that they will be the same. + // For example, during preemption, we may pass a copy of the original + // nodeInfo object that has some pods removed from it to evaluate the + // possibility of preempting them to schedule the target pod. + Filter(pc *PluginContext, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status } // PostFilterPlugin is an interface for Post-filter plugin. Post-filter is an @@ -308,10 +315,16 @@ type Framework interface { // cycle is aborted. RunPreFilterPlugins(pc *PluginContext, pod *v1.Pod) *Status - // RunFilterPlugins runs the set of configured filter plugins for pod on the - // given host. If any of these plugins returns any status other than "Success", - // the given node is not suitable for running the pod. - RunFilterPlugins(pc *PluginContext, pod *v1.Pod, nodeName string) *Status + // RunFilterPlugins runs the set of configured filter plugins for pod on + // the given node. It returns directly if any of the filter plugins + // return any status other than "Success". Note that for the node being + // evaluated, the passed nodeInfo reference could be different from the + // one in NodeInfoSnapshot map (e.g., pods considered to be running on + // the node could be different). For example, during preemption, we may + // pass a copy of the original nodeInfo object that has some pods + // removed from it to evaluate the possibility of preempting them to + // schedule the target pod. + RunFilterPlugins(pc *PluginContext, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status // RunPostFilterPlugins runs the set of configured post-filter plugins. If any // of these plugins returns any status other than "Success", the given node is diff --git a/pkg/scheduler/internal/queue/scheduling_queue_test.go b/pkg/scheduler/internal/queue/scheduling_queue_test.go index 779f69d9f13..4365346cb76 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue_test.go +++ b/pkg/scheduler/internal/queue/scheduling_queue_test.go @@ -171,7 +171,7 @@ func (*fakeFramework) RunPreFilterPlugins(pc *framework.PluginContext, pod *v1.P return nil } -func (*fakeFramework) RunFilterPlugins(pc *framework.PluginContext, pod *v1.Pod, nodeName string) *framework.Status { +func (*fakeFramework) RunFilterPlugins(pc *framework.PluginContext, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status { return nil } diff --git a/test/integration/scheduler/framework_test.go b/test/integration/scheduler/framework_test.go index 61729fbf580..333058957ef 100644 --- a/test/integration/scheduler/framework_test.go +++ b/test/integration/scheduler/framework_test.go @@ -29,6 +29,7 @@ import ( clientset "k8s.io/client-go/kubernetes" schedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config" framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) type PreFilterPlugin struct { @@ -197,7 +198,7 @@ func (fp *FilterPlugin) reset() { // Filter is a test function that returns an error or nil, depending on the // value of "failFilter". -func (fp *FilterPlugin) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeName string) *framework.Status { +func (fp *FilterPlugin) Filter(pc *framework.PluginContext, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status { fp.numFilterCalled++ if fp.failFilter {