ClusterAutoscaler-friendly scheduler priority function that promotes well-used nodes.

2025-10-30 05:14:54 +00:00 · 2016-08-09 14:49:58 +02:00
parent ec4d645da4
commit de2fea95ca
3 changed files with 228 additions and 8 deletions
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go
@@ -39,9 +39,10 @@ func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
 	return result
 }
-// the unused capacity is calculated on a scale of 0-10
+// The unused capacity is calculated on a scale of 0-10
-// 0 being the lowest priority and 10 being the highest
+// 0 being the lowest priority and 10 being the highest.
-func calculateScore(requested int64, capacity int64, node string) int64 {
+// The more unused resources the higher the score is.
 func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
 	if capacity == 0 {
 		return 0
 	}
@@ -53,17 +54,37 @@ func calculateScore(requested int64, capacity int64, node string) int64 {
 	return ((capacity - requested) * 10) / capacity
 }
-// Calculate the resource occupancy on a node.  'node' has information about the resources on the node.
+// The used capacity is calculated on a scale of 0-10
 // 0 being the lowest priority and 10 being the highest.
 // The more resources are used the higher the score is. This function
 // is almost a reversed version of calculatUnusedScore (10 - calculateUnusedScore).
 // The main difference is in rounding. It was added to keep the
 // final formula clean and not to modify the widely used (by users
 // in their default scheduling policies) calculateUSedScore.
 func calculateUsedScore(requested int64, capacity int64, node string) int64 {
 	if capacity == 0 {
 		return 0
 	}
 	if requested > capacity {
 		glog.V(2).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 			requested, capacity, node)
 		return 0
 	}
 	return (requested * 10) / capacity
 }
 // Calculates host priority based on the amount of unused resources.
 // 'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
 // TODO: Use Node() from nodeInfo instead of passing it.
-func calculateResourceOccupancy(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
+func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
-	cpuScore := calculateScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
+	cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
-	memoryScore := calculateScore(totalResources.Memory, allocatableResources.Memory, node.Name)
+	memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 	if glog.V(10) {
 		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 		// not logged. There is visible performance gain from it.
@@ -82,6 +103,35 @@ func calculateResourceOccupancy(pod *api.Pod, podRequests *schedulercache.Resour
 	}
 }
 // Calculate the resource used on a node.  'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
 // TODO: Use Node() from nodeInfo instead of passing it.
 func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 	cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 	memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 	if glog.V(10) {
 		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 		// not logged. There is visible performance gain from it.
 		glog.V(10).Infof(
 			"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 			pod.Name, node.Name,
 			allocatableResources.MilliCPU, allocatableResources.Memory,
 			totalResources.MilliCPU, totalResources.Memory,
 			cpuScore, memoryScore,
 		)
 	}
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: int((cpuScore + memoryScore) / 2),
 	}
 }
 // LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
 // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 // based on the minimum of the average of the fraction of requested to capacity.
@@ -90,7 +140,20 @@ func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulerca
 	podResources := getNonZeroRequests(pod)
 	list := make(schedulerapi.HostPriorityList, 0, len(nodes))
 	for _, node := range nodes {
-		list = append(list, calculateResourceOccupancy(pod, podResources, node, nodeNameToInfo[node.Name]))
+		list = append(list, calculateUnusedPriority(pod, podResources, node, nodeNameToInfo[node.Name]))
 	}
 	return list, nil
 }
 // MostRequestedPriority is a priority function that favors nodes with most requested resources.
 // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 // based on the maximum of the average of the fraction of requested to capacity.
 // Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
 func MostRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
 	podResources := getNonZeroRequests(pod)
 	list := make(schedulerapi.HostPriorityList, 0, len(nodes))
 	for _, node := range nodes {
 		list = append(list, calculateUsedPriority(pod, podResources, node, nodeNameToInfo[node.Name]))
 	}
 	return list, nil
 }
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
@@ -411,6 +411,161 @@ func TestLeastRequested(t *testing.T) {
 	}
 }
 func TestMostRequested(t *testing.T) {
 	labels1 := map[string]string{
 		"foo": "bar",
 		"baz": "blah",
 	}
 	labels2 := map[string]string{
 		"bar": "foo",
 		"baz": "blah",
 	}
 	noResources := api.PodSpec{
 		Containers: []api.Container{},
 	}
 	cpuOnly := api.PodSpec{
 		NodeName: "machine1",
 		Containers: []api.Container{
 			{
 				Resources: api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("1000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
 			{
 				Resources: api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("2000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
 		},
 	}
 	cpuOnly2 := cpuOnly
 	cpuOnly2.NodeName = "machine2"
 	cpuAndMemory := api.PodSpec{
 		NodeName: "machine2",
 		Containers: []api.Container{
 			{
 				Resources: api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("1000m"),
 						"memory": resource.MustParse("2000"),
 					},
 				},
 			},
 			{
 				Resources: api.ResourceRequirements{
 					Requests: api.ResourceList{
 						"cpu":    resource.MustParse("2000m"),
 						"memory": resource.MustParse("3000"),
 					},
 				},
 			},
 		},
 	}
 	tests := []struct {
 		pod          *api.Pod
 		pods         []*api.Pod
 		nodes        []*api.Node
 		expectedList schedulerapi.HostPriorityList
 		test         string
 	}{
 		{
 			/*
 				Node1 scores (used resources) on 0-10 scale
 				CPU Score: (0 * 10  / 4000 = 0
 				Memory Score: (0 * 10) / 10000 = 0
 				Node1 Score: (0 + 0) / 2 = 0
 				Node2 scores (used resources) on 0-10 scale
 				CPU Score: (0 * 10 / 4000 = 0
 				Memory Score: (0 * 10 / 10000 = 0
 				Node2 Score: (0 + 0) / 2 = 0
 			*/
 			pod:          &api.Pod{Spec: noResources},
 			nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
 			expectedList: []schedulerapi.HostPriority{{"machine1", 0}, {"machine2", 0}},
 			test:         "nothing scheduled, nothing requested",
 		},
 		{
 			/*
 				Node1 scores on 0-10 scale
 				CPU Score: (3000 * 10 / 4000 = 7.5
 				Memory Score: (5000 * 10) / 10000 = 5
 				Node1 Score: (7.5 + 5) / 2 = 6
 				Node2 scores on 0-10 scale
 				CPU Score: (3000 * 10 / 6000 = 5
 				Memory Score: (5000 * 10 / 10000 = 5
 				Node2 Score: (5 + 5) / 2 = 5
 			*/
 			pod:          &api.Pod{Spec: cpuAndMemory},
 			nodes:        []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
 			expectedList: []schedulerapi.HostPriority{{"machine1", 6}, {"machine2", 5}},
 			test:         "nothing scheduled, resources requested, differently sized machines",
 		},
 		{
 			/*
 				Node1 scores on 0-10 scale
 				CPU Score: (6000 * 10) / 10000 = 6
 				Memory Score: (0 * 10) / 20000 = 10
 				Node1 Score: (6 + 0) / 2 = 3
 				Node2 scores on 0-10 scale
 				CPU Score: (6000 * 10) / 10000 = 6
 				Memory Score: (5000 * 10) / 20000 = 2.5
 				Node2 Score: (6 + 2.5) / 2 = 4
 			*/
 			pod:          &api.Pod{Spec: noResources},
 			nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 			expectedList: []schedulerapi.HostPriority{{"machine1", 3}, {"machine2", 4}},
 			test:         "no resources requested, pods scheduled with resources",
 			pods: []*api.Pod{
 				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 				{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 				{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 				{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 			},
 		},
 		{
 			/*
 				Node1 scores on 0-10 scale
 				CPU Score: (6000 * 10) / 10000 = 6
 				Memory Score: (5000 * 10) / 20000 = 2.5
 				Node1 Score: (6 + 2.5) / 2 = 4
 				Node2 scores on 0-10 scale
 				CPU Score: (6000 * 10) / 10000 = 6
 				Memory Score: (10000 * 10) / 20000 = 5
 				Node2 Score: (6 + 5) / 2 = 5
 			*/
 			pod:          &api.Pod{Spec: cpuAndMemory},
 			nodes:        []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
 			expectedList: []schedulerapi.HostPriority{{"machine1", 4}, {"machine2", 5}},
 			test:         "resources requested, pods scheduled with resources",
 			pods: []*api.Pod{
 				{Spec: cpuOnly},
 				{Spec: cpuAndMemory},
 			},
 		},
 	}
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
 		list, err := MostRequestedPriority(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
 		if !reflect.DeepEqual(test.expectedList, list) {
 			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 		}
 	}
 }
 func TestNewNodeLabelPriority(t *testing.T) {
 	label1 := map[string]string{"foo": "bar"}
 	label2 := map[string]string{"bar": "foo"}
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -93,6 +93,8 @@ func init() {
 	factory.RegisterFitPredicate("HostName", predicates.PodFitsHost)
 	// Fit is determined by node selector query.
 	factory.RegisterFitPredicate("MatchNodeSelector", predicates.PodSelectorMatches)
 	// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
 	factory.RegisterPriorityFunction("MostRequestedPriority", priorities.MostRequestedPriority, 1)
 }
 func defaultPredicates() sets.String {