Merge pull request #2906 from abhgupta/abhgupta-dev

Enhancements to scheduler priority functions
2025-09-06 19:52:42 +00:00 · 2015-01-14 21:47:28 -08:00
parent 84ce5c441a dbac18a909
commit 2675cfa16b
13 changed files with 919 additions and 49 deletions
--- a/plugin/pkg/scheduler/algorithmprovider/affinity/affinity.go
+++ b/plugin/pkg/scheduler/algorithmprovider/affinity/affinity.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2014 Google Inc. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// This algorithm provider has predicates and priorities related to affinity/anti-affinity for the scheduler.
+package affinity
+
+import (
+	algorithm "github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
+	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/factory"
+)
+
+const AffinityProvider string = "AffinityProvider"
+
+func init() {
+	factory.RegisterAlgorithmProvider(AffinityProvider, affinityPredicates(), affinityPriorities())
+}
+
+func affinityPredicates() util.StringSet {
+	return util.NewStringSet(
+		"HostName",
+		"MatchNodeSelector",
+		"PodFitsPorts",
+		"PodFitsResources",
+		"NoDiskConflict",
+		// Ensures that all pods within the same service are hosted on minions within the same region as defined by the "region" label
+		factory.RegisterFitPredicate("ServiceAffinity", algorithm.NewServiceAffinityPredicate(factory.PodLister, factory.ServiceLister, factory.MinionLister, []string{"region"})),
+		// Fit is defined based on the presence of the "region" label on a minion, regardless of value.
+		factory.RegisterFitPredicate("NodeLabelPredicate", algorithm.NewNodeLabelPredicate(factory.MinionLister, []string{"region"}, true)),
+	)
+}
+
+func affinityPriorities() util.StringSet {
+	return util.NewStringSet(
+		"LeastRequestedPriority",
+		"ServiceSpreadingPriority",
+		// spreads pods belonging to the same service across minions in different zones
+		factory.RegisterPriorityFunction("ZoneSpreadingPriority", algorithm.NewServiceAntiAffinityPriority(factory.ServiceLister, "zone"), 2),
+		// Prioritize nodes based on the presence of the "zone" label on a minion, regardless of value.
+		factory.RegisterPriorityFunction("NodeLabelPriority", algorithm.NewNodeLabelPriority("zone", true), 1),
+	)
+}
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -31,11 +31,11 @@ func defaultPredicates() util.StringSet {
 	return util.NewStringSet(
 		// Fit is defined based on the absence of port conflicts.
 		factory.RegisterFitPredicate("PodFitsPorts", algorithm.PodFitsPorts),
-		// Fit is determined by resource availability
+		// Fit is determined by resource availability.
 		factory.RegisterFitPredicate("PodFitsResources", algorithm.NewResourceFitPredicate(factory.MinionLister)),
-		// Fit is determined by non-conflicting disk volumes
+		// Fit is determined by non-conflicting disk volumes.
 		factory.RegisterFitPredicate("NoDiskConflict", algorithm.NoDiskConflict),
-		// Fit is determined by node selector query
+		// Fit is determined by node selector query.
 		factory.RegisterFitPredicate("MatchNodeSelector", algorithm.NewSelectorMatchPredicate(factory.MinionLister)),
 		// Fit is determined by the presence of the Host parameter and a string match
 		factory.RegisterFitPredicate("HostName", algorithm.PodFitsHost),
@@ -46,8 +46,8 @@ func defaultPriorities() util.StringSet {
 	return util.NewStringSet(
 		// Prioritize nodes by least requested utilization.
 		factory.RegisterPriorityFunction("LeastRequestedPriority", algorithm.LeastRequestedPriority, 1),
-		// spreads pods by minimizing the number of pods on the same minion with the same labels.
-		factory.RegisterPriorityFunction("SpreadingPriority", algorithm.CalculateSpreadPriority, 1),
+		// spreads pods by minimizing the number of pods (belonging to the same service) on the same minion.
+		factory.RegisterPriorityFunction("ServiceSpreadingPriority", algorithm.NewServiceSpreadPriority(factory.ServiceLister), 1),
 		// EqualPriority is a prioritizer function that gives an equal weight of one to all minions
 		factory.RegisterPriorityFunction("EqualPriority", algorithm.EqualPriority, 0),
 	)
--- a/plugin/pkg/scheduler/algorithmprovider/plugins.go
+++ b/plugin/pkg/scheduler/algorithmprovider/plugins.go
@@ -18,5 +18,6 @@ limitations under the License.
 package algorithmprovider

 import (
+	_ "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithmprovider/affinity"
 	_ "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithmprovider/defaults"
 )
--- a/plugin/pkg/scheduler/algorithmprovider/plugins_test.go
+++ b/plugin/pkg/scheduler/algorithmprovider/plugins_test.go
@@ -19,12 +19,14 @@ package algorithmprovider
 import (
 	"testing"

+	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithmprovider/affinity"
 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/factory"
 )

 var (
 	algorithmProviderNames = []string{
 		factory.DefaultProvider,
+		affinity.AffinityProvider,
 	}
 )

--- a/plugin/pkg/scheduler/factory/factory.go
+++ b/plugin/pkg/scheduler/factory/factory.go
@@ -35,8 +35,9 @@ import (
 )

 var (
-	PodLister    = &cache.StoreToPodLister{cache.NewStore()}
-	MinionLister = &cache.StoreToNodeLister{cache.NewStore()}
+	PodLister     = &cache.StoreToPodLister{cache.NewStore()}
+	MinionLister  = &cache.StoreToNodeLister{cache.NewStore()}
+	ServiceLister = &cache.StoreToServiceLister{cache.NewStore()}
 )

 // ConfigFactory knows how to fill out a scheduler config with its support functions.
@@ -48,15 +49,18 @@ type ConfigFactory struct {
 	PodLister *cache.StoreToPodLister
 	// a means to list all minions
 	MinionLister *cache.StoreToNodeLister
+	// a means to list all services
+	ServiceLister *cache.StoreToServiceLister
 }

 // NewConfigFactory initializes the factory.
 func NewConfigFactory(client *client.Client) *ConfigFactory {
 	return &ConfigFactory{
-		Client:       client,
-		PodQueue:     cache.NewFIFO(),
-		PodLister:    PodLister,
-		MinionLister: MinionLister,
+		Client:        client,
+		PodQueue:      cache.NewFIFO(),
+		PodLister:     PodLister,
+		MinionLister:  MinionLister,
+		ServiceLister: ServiceLister,
 	}
 }

@@ -106,6 +110,11 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSe
 		cache.NewPoller(f.pollMinions, 10*time.Second, f.MinionLister.Store).Run()
 	}

+	// Watch and cache all service objects. Scheduler needs to find all pods
+	// created by the same service, so that it can spread them correctly.
+	// Cache this locally.
+	cache.NewReflector(f.createServiceLW(), &api.Service{}, f.ServiceLister.Store).Run()
+
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))

 	algo := algorithm.NewGenericScheduler(predicateFuncs, priorityConfigs, f.PodLister, r)
@@ -205,6 +214,15 @@ func (factory *ConfigFactory) pollMinions() (cache.Enumerator, error) {
 	return &nodeEnumerator{nodes}, nil
 }

+// createServiceLW returns a cache.ListWatch that gets all changes to services.
+func (factory *ConfigFactory) createServiceLW() *cache.ListWatch {
+	return &cache.ListWatch{
+		Client:        factory.Client,
+		FieldSelector: parseSelectorOrDie(""),
+		Resource:      "services",
+	}
+}
+
 func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
 	return func(pod *api.Pod, err error) {
 		glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)