Implement resource limit priority function. This function checks if the input pod's

resource limits are satisfied by the input node's allocatable resources or not. If yes, the node is assigned a score of 1, otherwise the node's score is not changed.
2025-07-28 14:07:14 +00:00 · 2017-11-15 14:42:02 -05:00 · 2017-11-15 14:42:02 -05:00 · b571001999
commit b571001999
parent 02a7c12cbd
5 changed files with 292 additions and 0 deletions
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@ -206,6 +206,12 @@ const (
 	// alpha: v1.9
 	// Postpone deletion of a persistent volume claim in case it is used by a pod
 	PVCProtection utilfeature.Feature = "PVCProtection"
 	// owner: @aveshagarwal
 	// alpha: v1.9
 	//
 	// Enable resource limits priority function
 	ResourceLimitsPriorityFunction utilfeature.Feature = "ResourceLimitsPriorityFunction"
 )
 func init() {
@ -244,6 +250,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 	CustomPodDNS:                                {Default: false, PreRelease: utilfeature.Alpha},
 	BlockVolume:                                 {Default: false, PreRelease: utilfeature.Alpha},
 	PVCProtection:                               {Default: false, PreRelease: utilfeature.Alpha},
 	ResourceLimitsPriorityFunction:              {Default: false, PreRelease: utilfeature.Alpha},
 	// inherited features from generic apiserver, relisted here to get a conflict if it is changed
 	// unintentionally on either side:
--- a/plugin/pkg/scheduler/algorithm/priorities/BUILD
+++ b/plugin/pkg/scheduler/algorithm/priorities/BUILD
@ -19,6 +19,7 @@ go_library(
        "node_label.go",
        "node_prefer_avoid_pods.go",
        "reduce.go",
        "resource_limits.go",
        "selector_spreading.go",
        "taint_toleration.go",
        "test_util.go",
@ -54,6 +55,7 @@ go_test(
        "node_affinity_test.go",
        "node_label_test.go",
        "node_prefer_avoid_pods_test.go",
        "resource_limits_test.go",
        "selector_spreading_test.go",
        "taint_toleration_test.go",
    ],
--- a/plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
@ -0,0 +1,128 @@
 /*
 Copyright 2017 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package priorities
 import (
 	"fmt"
 	"k8s.io/api/core/v1"
 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 	"github.com/golang/glog"
 )
 // ResourceLimitsPriorityMap is a priority function that increases score of input node by 1 if the node satisfies
 // input pod's resource limits. In detail, this priority function works as follows: If a node does not publish its
 // allocatable resources (cpu and memory both), the node score is not affected. If a pod does not specify
 // its cpu and memory limits both, the node score is not affected. If one or both of cpu and memory limits
 // of the pod are satisfied, the node is assigned a score of 1.
 // Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
 // same scores assigned by one of least and most requested priority functions.
 func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	node := nodeInfo.Node()
 	if node == nil {
 		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 	}
 	allocatableResources := nodeInfo.AllocatableResource()
 	// compute pod limits
 	podLimits := getResourceLimits(pod)
 	cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
 	memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
 	score := int(0)
 	if cpuScore == 1 || memScore == 1 {
 		score = 1
 	}
 	if glog.V(10) {
 		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 		// not logged. There is visible performance gain from it.
 		glog.Infof(
 			"%v -> %v: Resource Limits Priority, allocatable %d millicores %d memory bytes, pod limits %d millicores %d memory bytes, score %d",
 			pod.Name, node.Name,
 			allocatableResources.MilliCPU, allocatableResources.Memory,
 			podLimits.MilliCPU, podLimits.Memory,
 			score,
 		)
 	}
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: score,
 	}, nil
 }
 // computeScore return 1 if limit value is less than or equal to allocable
 // value, otherwise it returns 0.
 func computeScore(limit, allocatable int64) int64 {
 	if limit != 0 && allocatable != 0 && limit <= allocatable {
 		return 1
 	}
 	return 0
 }
 // getResourceLimits computes resource limits for input pod.
 // The reason to create this new function is to be consistent with other
 // priority functions because most or perhaps all priority functions work
 // with schedulercache.Resource.
 // TODO: cache it as part of metadata passed to priority functions.
 func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
 	result := &schedulercache.Resource{}
 	for _, container := range pod.Spec.Containers {
 		result.Add(container.Resources.Limits)
 	}
 	// take max_resource(sum_pod, any_init_container)
 	for _, container := range pod.Spec.InitContainers {
 		for rName, rQuantity := range container.Resources.Limits {
 			switch rName {
 			case v1.ResourceMemory:
 				if mem := rQuantity.Value(); mem > result.Memory {
 					result.Memory = mem
 				}
 			case v1.ResourceCPU:
 				if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
 					result.MilliCPU = cpu
 				}
 				// keeping these resources though score computation in other priority functions and in this
 				// are only computed based on cpu and memory only.
 			case v1.ResourceEphemeralStorage:
 				if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
 					result.EphemeralStorage = ephemeralStorage
 				}
 			case v1.ResourceNvidiaGPU:
 				if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
 					result.NvidiaGPU = gpu
 				}
 			default:
 				if v1helper.IsScalarResourceName(rName) {
 					value := rQuantity.Value()
 					if value > result.ScalarResources[rName] {
 						result.SetScalar(rName, value)
 					}
 				}
 			}
 		}
 	}
 	return result
 }
--- a/plugin/pkg/scheduler/algorithm/priorities/resource_limits_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/resource_limits_test.go
@ -0,0 +1,151 @@
 /*
 Copyright 2017 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package priorities
 import (
 	"reflect"
 	"testing"
 	"k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
 func TestResourceLimistPriority(t *testing.T) {
 	noResources := v1.PodSpec{
 		Containers: []v1.Container{},
 	}
 	cpuOnly := v1.PodSpec{
 		NodeName: "machine1",
 		Containers: []v1.Container{
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("1000m"),
 						v1.ResourceMemory: resource.MustParse("0"),
 					},
 				},
 			},
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("2000m"),
 						v1.ResourceMemory: resource.MustParse("0"),
 					},
 				},
 			},
 		},
 	}
 	memOnly := v1.PodSpec{
 		NodeName: "machine2",
 		Containers: []v1.Container{
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("0"),
 						v1.ResourceMemory: resource.MustParse("2000"),
 					},
 				},
 			},
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("0"),
 						v1.ResourceMemory: resource.MustParse("3000"),
 					},
 				},
 			},
 		},
 	}
 	cpuAndMemory := v1.PodSpec{
 		NodeName: "machine2",
 		Containers: []v1.Container{
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("1000m"),
 						v1.ResourceMemory: resource.MustParse("2000"),
 					},
 				},
 			},
 			{
 				Resources: v1.ResourceRequirements{
 					Limits: v1.ResourceList{
 						v1.ResourceCPU:    resource.MustParse("2000m"),
 						v1.ResourceMemory: resource.MustParse("3000"),
 					},
 				},
 			},
 		},
 	}
 	tests := []struct {
 		// input pod
 		pod          *v1.Pod
 		nodes        []*v1.Node
 		expectedList schedulerapi.HostPriorityList
 		test         string
 	}{
 		{
 			pod:          &v1.Pod{Spec: noResources},
 			nodes:        []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
 			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
 			test:         "pod does not specify its resource limits",
 		},
 		{
 			pod:          &v1.Pod{Spec: cpuOnly},
 			nodes:        []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
 			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
 			test:         "pod only specifies  cpu limits",
 		},
 		{
 			pod:          &v1.Pod{Spec: memOnly},
 			nodes:        []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
 			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
 			test:         "pod only specifies  mem limits",
 		},
 		{
 			pod:          &v1.Pod{Spec: cpuAndMemory},
 			nodes:        []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
 			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
 			test:         "pod specifies both cpu and  mem limits",
 		},
 		{
 			pod:          &v1.Pod{Spec: cpuAndMemory},
 			nodes:        []*v1.Node{makeNode("machine1", 0, 0)},
 			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
 			test:         "node does not advertise its allocatables",
 		},
 	}
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
 		list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
 		if !reflect.DeepEqual(test.expectedList, list) {
 			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 		}
 	}
 }
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@ -106,6 +106,10 @@ func init() {
 	factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
 	// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
 	factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
 	// Prioritizes nodes that satisfy pod's resource limits
 	if utilfeature.DefaultFeatureGate.Enabled(features.ResourceLimitsPriorityFunction) {
 		factory.RegisterPriorityFunction2("ResourceLimitsPriority", priorities.ResourceLimitsPriorityMap, nil, 1)
 	}
 }
 func defaultPredicates() sets.String {