mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Implement resource limit priority function. This function checks if the input pod's
resource limits are satisfied by the input node's allocatable resources or not. If yes, the node is assigned a score of 1, otherwise the node's score is not changed.
This commit is contained in:
parent
02a7c12cbd
commit
b571001999
@ -206,6 +206,12 @@ const (
|
|||||||
// alpha: v1.9
|
// alpha: v1.9
|
||||||
// Postpone deletion of a persistent volume claim in case it is used by a pod
|
// Postpone deletion of a persistent volume claim in case it is used by a pod
|
||||||
PVCProtection utilfeature.Feature = "PVCProtection"
|
PVCProtection utilfeature.Feature = "PVCProtection"
|
||||||
|
|
||||||
|
// owner: @aveshagarwal
|
||||||
|
// alpha: v1.9
|
||||||
|
//
|
||||||
|
// Enable resource limits priority function
|
||||||
|
ResourceLimitsPriorityFunction utilfeature.Feature = "ResourceLimitsPriorityFunction"
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@ -244,6 +250,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
|
|||||||
CustomPodDNS: {Default: false, PreRelease: utilfeature.Alpha},
|
CustomPodDNS: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
BlockVolume: {Default: false, PreRelease: utilfeature.Alpha},
|
BlockVolume: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
|
PVCProtection: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
|
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
|
||||||
|
|
||||||
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
|
||||||
// unintentionally on either side:
|
// unintentionally on either side:
|
||||||
|
@ -19,6 +19,7 @@ go_library(
|
|||||||
"node_label.go",
|
"node_label.go",
|
||||||
"node_prefer_avoid_pods.go",
|
"node_prefer_avoid_pods.go",
|
||||||
"reduce.go",
|
"reduce.go",
|
||||||
|
"resource_limits.go",
|
||||||
"selector_spreading.go",
|
"selector_spreading.go",
|
||||||
"taint_toleration.go",
|
"taint_toleration.go",
|
||||||
"test_util.go",
|
"test_util.go",
|
||||||
@ -54,6 +55,7 @@ go_test(
|
|||||||
"node_affinity_test.go",
|
"node_affinity_test.go",
|
||||||
"node_label_test.go",
|
"node_label_test.go",
|
||||||
"node_prefer_avoid_pods_test.go",
|
"node_prefer_avoid_pods_test.go",
|
||||||
|
"resource_limits_test.go",
|
||||||
"selector_spreading_test.go",
|
"selector_spreading_test.go",
|
||||||
"taint_toleration_test.go",
|
"taint_toleration_test.go",
|
||||||
],
|
],
|
||||||
|
128
plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
Normal file
128
plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2017 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ResourceLimitsPriorityMap is a priority function that increases score of input node by 1 if the node satisfies
|
||||||
|
// input pod's resource limits. In detail, this priority function works as follows: If a node does not publish its
|
||||||
|
// allocatable resources (cpu and memory both), the node score is not affected. If a pod does not specify
|
||||||
|
// its cpu and memory limits both, the node score is not affected. If one or both of cpu and memory limits
|
||||||
|
// of the pod are satisfied, the node is assigned a score of 1.
|
||||||
|
// Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
|
||||||
|
// same scores assigned by one of least and most requested priority functions.
|
||||||
|
func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
allocatableResources := nodeInfo.AllocatableResource()
|
||||||
|
|
||||||
|
// compute pod limits
|
||||||
|
podLimits := getResourceLimits(pod)
|
||||||
|
|
||||||
|
cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
|
||||||
|
memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
|
||||||
|
|
||||||
|
score := int(0)
|
||||||
|
if cpuScore == 1 || memScore == 1 {
|
||||||
|
score = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if glog.V(10) {
|
||||||
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
|
// not logged. There is visible performance gain from it.
|
||||||
|
glog.Infof(
|
||||||
|
"%v -> %v: Resource Limits Priority, allocatable %d millicores %d memory bytes, pod limits %d millicores %d memory bytes, score %d",
|
||||||
|
pod.Name, node.Name,
|
||||||
|
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||||
|
podLimits.MilliCPU, podLimits.Memory,
|
||||||
|
score,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: score,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeScore return 1 if limit value is less than or equal to allocable
|
||||||
|
// value, otherwise it returns 0.
|
||||||
|
func computeScore(limit, allocatable int64) int64 {
|
||||||
|
if limit != 0 && allocatable != 0 && limit <= allocatable {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// getResourceLimits computes resource limits for input pod.
|
||||||
|
// The reason to create this new function is to be consistent with other
|
||||||
|
// priority functions because most or perhaps all priority functions work
|
||||||
|
// with schedulercache.Resource.
|
||||||
|
// TODO: cache it as part of metadata passed to priority functions.
|
||||||
|
func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
|
||||||
|
result := &schedulercache.Resource{}
|
||||||
|
for _, container := range pod.Spec.Containers {
|
||||||
|
result.Add(container.Resources.Limits)
|
||||||
|
}
|
||||||
|
|
||||||
|
// take max_resource(sum_pod, any_init_container)
|
||||||
|
for _, container := range pod.Spec.InitContainers {
|
||||||
|
for rName, rQuantity := range container.Resources.Limits {
|
||||||
|
switch rName {
|
||||||
|
case v1.ResourceMemory:
|
||||||
|
if mem := rQuantity.Value(); mem > result.Memory {
|
||||||
|
result.Memory = mem
|
||||||
|
}
|
||||||
|
case v1.ResourceCPU:
|
||||||
|
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
|
||||||
|
result.MilliCPU = cpu
|
||||||
|
}
|
||||||
|
// keeping these resources though score computation in other priority functions and in this
|
||||||
|
// are only computed based on cpu and memory only.
|
||||||
|
case v1.ResourceEphemeralStorage:
|
||||||
|
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
|
||||||
|
result.EphemeralStorage = ephemeralStorage
|
||||||
|
}
|
||||||
|
case v1.ResourceNvidiaGPU:
|
||||||
|
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
|
||||||
|
result.NvidiaGPU = gpu
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if v1helper.IsScalarResourceName(rName) {
|
||||||
|
value := rQuantity.Value()
|
||||||
|
if value > result.ScalarResources[rName] {
|
||||||
|
result.SetScalar(rName, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
@ -0,0 +1,151 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2017 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestResourceLimistPriority(t *testing.T) {
|
||||||
|
noResources := v1.PodSpec{
|
||||||
|
Containers: []v1.Container{},
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuOnly := v1.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("1000m"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("2000m"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
memOnly := v1.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("0"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("2000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("0"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("3000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuAndMemory := v1.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("1000m"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("2000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: v1.ResourceRequirements{
|
||||||
|
Limits: v1.ResourceList{
|
||||||
|
v1.ResourceCPU: resource.MustParse("2000m"),
|
||||||
|
v1.ResourceMemory: resource.MustParse("3000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
// input pod
|
||||||
|
pod *v1.Pod
|
||||||
|
nodes []*v1.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pod: &v1.Pod{Spec: noResources},
|
||||||
|
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
|
||||||
|
test: "pod does not specify its resource limits",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &v1.Pod{Spec: cpuOnly},
|
||||||
|
nodes: []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "pod only specifies cpu limits",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &v1.Pod{Spec: memOnly},
|
||||||
|
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
|
||||||
|
test: "pod only specifies mem limits",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &v1.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
|
||||||
|
test: "pod specifies both cpu and mem limits",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &v1.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*v1.Node{makeNode("machine1", 0, 0)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
|
||||||
|
test: "node does not advertise its allocatables",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||||
|
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -106,6 +106,10 @@ func init() {
|
|||||||
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
|
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
|
||||||
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
|
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
|
||||||
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
|
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
|
||||||
|
// Prioritizes nodes that satisfy pod's resource limits
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.ResourceLimitsPriorityFunction) {
|
||||||
|
factory.RegisterPriorityFunction2("ResourceLimitsPriority", priorities.ResourceLimitsPriorityMap, nil, 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func defaultPredicates() sets.String {
|
func defaultPredicates() sets.String {
|
||||||
|
Loading…
Reference in New Issue
Block a user