Merge 3 resource allocation priority functions

2025-07-23 19:56:01 +00:00 · 2017-11-09 15:53:52 -08:00 · 2017-11-09 15:53:52 -08:00 · c65225ee19
commit c65225ee19
parent f302487942
6 changed files with 143 additions and 179 deletions
--- a/plugin/pkg/scheduler/algorithm/priorities/BUILD
+++ b/plugin/pkg/scheduler/algorithm/priorities/BUILD
@ -19,6 +19,7 @@ go_library(
        "node_label.go",
        "node_prefer_avoid_pods.go",
        "reduce.go",
        "resource_allocation.go",
        "resource_limits.go",
        "selector_spreading.go",
        "taint_toleration.go",
--- a/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go
@ -17,76 +17,40 @@ limitations under the License.
 package priorities
 import (
 	"fmt"
 	"math"
 	"k8s.io/api/core/v1"
 	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 	"github.com/golang/glog"
 )
-// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
+var (
-const (
+	balanceResourcePriority = &ResourceAllocationPriority{"BalanceResourceAllocation", balancedResourceScorer}
-	mb         int64 = 1024 * 1024
+
-	minImgSize int64 = 23 * mb
+	// BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
-	maxImgSize int64 = 1000 * mb
+	// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together
 	// with LeastRequestedPriority. It calculates the difference between the cpu and memory fraction
 	// of capacity, and prioritizes the host based on how close the two metrics are to each other.
 	// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
 	// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
 	// Resource Utilization"
 	BalancedResourceAllocationMap = balanceResourcePriority.PriorityMap
 )
-// Also used in most/least_requested nad metadata.
+func balancedResourceScorer(requested, allocable *schedulercache.Resource) int64 {
-// TODO: despaghettify it
+	cpuFraction := fractionOfCapacity(requested.MilliCPU, allocable.MilliCPU)
-func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
+	memoryFraction := fractionOfCapacity(requested.Memory, allocable.Memory)
 	result := &schedulercache.Resource{}
 	for i := range pod.Spec.Containers {
 		container := &pod.Spec.Containers[i]
 		cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
 		result.MilliCPU += cpu
 		result.Memory += memory
 	}
 	return result
 }
 func calculateBalancedResourceAllocation(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	node := nodeInfo.Node()
 	if node == nil {
 		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 	}
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 	cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
 	memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
 	score := int(0)
 	if cpuFraction >= 1 || memoryFraction >= 1 {
 		// if requested >= capacity, the corresponding host should never be preferred.
-		score = 0
+		return 0
 	} else {
 		// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
 		// respectively. Multilying the absolute value of the difference by 10 scales the value to
 		// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
 		// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
 		diff := math.Abs(cpuFraction - memoryFraction)
 		score = int((1 - diff) * float64(schedulerapi.MaxPriority))
 	}
 	if glog.V(10) {
 		glog.Infof(
 			"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
 			pod.Name, node.Name,
 			allocatableResources.MilliCPU, allocatableResources.Memory,
 			totalResources.MilliCPU, totalResources.Memory,
 			score,
 		)
 	}
-	return schedulerapi.HostPriority{
+	// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
-		Host:  node.Name,
+	// respectively. Multilying the absolute value of the difference by 10 scales the value to
-		Score: score,
+	// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
-	}, nil
+	// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
 	diff := math.Abs(cpuFraction - memoryFraction)
 	return int64((1 - diff) * float64(schedulerapi.MaxPriority))
 }
 func fractionOfCapacity(requested, capacity int64) float64 {
@ -95,20 +59,3 @@ func fractionOfCapacity(requested, capacity int64) float64 {
 	}
 	return float64(requested) / float64(capacity)
 }
 // BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
 // BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
 // It calculates the difference between the cpu and memory fraction of capacity, and prioritizes the host based on how
 // close the two metrics are to each other.
 // Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
 // "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
 func BalancedResourceAllocationMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	var nonZeroRequest *schedulercache.Resource
 	if priorityMeta, ok := meta.(*priorityMetadata); ok {
 		nonZeroRequest = priorityMeta.nonZeroRequest
 	} else {
 		// We couldn't parse metadatat - fallback to computing it.
 		nonZeroRequest = getNonZeroRequests(pod)
 	}
 	return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
 }
--- a/plugin/pkg/scheduler/algorithm/priorities/image_locality.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/image_locality.go
@ -24,6 +24,13 @@ import (
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
 // This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
 const (
 	mb         int64 = 1024 * 1024
 	minImgSize int64 = 23 * mb
 	maxImgSize int64 = 1000 * mb
 )
 // ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
 // It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
 // based on the total size of those images.
--- a/plugin/pkg/scheduler/algorithm/priorities/least_requested.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/least_requested.go
@ -17,73 +17,37 @@ limitations under the License.
 package priorities
 import (
 	"fmt"
 	"k8s.io/api/core/v1"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 	"github.com/golang/glog"
 )
-// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
+var (
-// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
+	leastResourcePriority = &ResourceAllocationPriority{"LeastResourceAllocation", leastResourceScorer}
-// based on the minimum of the average of the fraction of requested to capacity.
+
-// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
+	// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
-func LeastRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
+	// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and
-	var nonZeroRequest *schedulercache.Resource
+	// prioritizes based on the minimum of the average of the fraction of requested to capacity.
-	if priorityMeta, ok := meta.(*priorityMetadata); ok {
+	//
-		nonZeroRequest = priorityMeta.nonZeroRequest
+	// Details:
-	} else {
+	// cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity)/2
-		// We couldn't parse metadata - fallback to computing it.
+	LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
-		nonZeroRequest = getNonZeroRequests(pod)
+)
-	}
+
-	return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
+func leastResourceScorer(requested, allocable *schedulercache.Resource) int64 {
 	return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
 		leastRequestedScore(requested.Memory, allocable.Memory)) / 2
 }
 // The unused capacity is calculated on a scale of 0-10
 // 0 being the lowest priority and 10 being the highest.
 // The more unused resources the higher the score is.
-func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
+func leastRequestedScore(requested, capacity int64) int64 {
 	if capacity == 0 {
 		return 0
 	}
 	if requested > capacity {
 		glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 			requested, capacity, node)
 		return 0
 	}
 	return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity
 }
 // Calculates host priority based on the amount of unused resources.
 // 'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
 func calculateUnusedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	node := nodeInfo.Node()
 	if node == nil {
 		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 	}
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 	cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 	memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 	if glog.V(10) {
 		glog.Infof(
 			"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 			pod.Name, node.Name,
 			allocatableResources.MilliCPU, allocatableResources.Memory,
 			totalResources.MilliCPU, totalResources.Memory,
 			cpuScore, memoryScore,
 		)
 	}
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: int((cpuScore + memoryScore) / 2),
 	}, nil
 }
--- a/plugin/pkg/scheduler/algorithm/priorities/most_requested.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/most_requested.go
@ -17,28 +17,23 @@ limitations under the License.
 package priorities
 import (
 	"fmt"
 	"k8s.io/api/core/v1"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 	"github.com/golang/glog"
 )
-// MostRequestedPriority is a priority function that favors nodes with most requested resources.
+var (
-// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
+	mostResourcePriority = &ResourceAllocationPriority{"MostResourceAllocation", mostResourceScorer}
-// based on the maximum of the average of the fraction of requested to capacity.
+
-// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
+	// MostRequestedPriority is a priority function that favors nodes with most requested resources.
-func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
+	// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
-	var nonZeroRequest *schedulercache.Resource
+	// based on the maximum of the average of the fraction of requested to capacity.
-	if priorityMeta, ok := meta.(*priorityMetadata); ok {
+	// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
-		nonZeroRequest = priorityMeta.nonZeroRequest
+	MostRequestedPriorityMap = mostResourcePriority.PriorityMap
-	} else {
+)
-		// We couldn't parse metadatat - fallback to computing it.
+
-		nonZeroRequest = getNonZeroRequests(pod)
+func mostResourceScorer(requested, allocable *schedulercache.Resource) int64 {
-	}
+	return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
-	return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
+		mostRequestedScore(requested.Memory, allocable.Memory)) / 2
 }
 // The used capacity is calculated on a scale of 0-10
@ -48,45 +43,13 @@ func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
 // (10 - calculateUnusedScore). The main difference is in rounding. It was added to
 // keep the final formula clean and not to modify the widely used (by users
 // in their default scheduling policies) calculateUSedScore.
-func calculateUsedScore(requested int64, capacity int64, node string) int64 {
+func mostRequestedScore(requested, capacity int64) int64 {
 	if capacity == 0 {
 		return 0
 	}
 	if requested > capacity {
 		glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
 			requested, capacity, node)
 		return 0
 	}
 	return (requested * schedulerapi.MaxPriority) / capacity
 }
 // Calculate the resource used on a node.  'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
 func calculateUsedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	node := nodeInfo.Node()
 	if node == nil {
 		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 	}
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	totalResources.Memory += nodeInfo.NonZeroRequest().Memory
 	cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
 	memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
 	if glog.V(10) {
 		glog.Infof(
 			"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
 			pod.Name, node.Name,
 			allocatableResources.MilliCPU, allocatableResources.Memory,
 			totalResources.MilliCPU, totalResources.Memory,
 			cpuScore, memoryScore,
 		)
 	}
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: int((cpuScore + memoryScore) / 2),
 	}, nil
 }
--- a/plugin/pkg/scheduler/algorithm/priorities/resource_allocation.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/resource_allocation.go
@ -0,0 +1,82 @@
 /*
 Copyright 2017 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package priorities
 import (
 	"fmt"
 	"github.com/golang/glog"
 	"k8s.io/api/core/v1"
 	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
 type ResourceAllocationPriority struct {
 	Name   string
 	scorer func(requested, allocable *schedulercache.Resource) int64
 }
 func (r *ResourceAllocationPriority) PriorityMap(
 	pod *v1.Pod,
 	meta interface{},
 	nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	node := nodeInfo.Node()
 	if node == nil {
 		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 	}
 	allocatable := nodeInfo.AllocatableResource()
 	var requested schedulercache.Resource
 	if priorityMeta, ok := meta.(*priorityMetadata); ok {
 		requested = *priorityMeta.nonZeroRequest
 	} else {
 		// We couldn't parse metadatat - fallback to computing it.
 		requested = *getNonZeroRequests(pod)
 	}
 	requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
 	requested.Memory += nodeInfo.NonZeroRequest().Memory
 	score := r.scorer(&requested, &allocatable)
 	if glog.V(10) {
 		glog.Infof(
 			"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
 			pod.Name, node.Name, r.Name,
 			allocatable.MilliCPU, allocatable.Memory,
 			requested.MilliCPU+allocatable.MilliCPU, requested.Memory+allocatable.Memory,
 			score,
 		)
 	}
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: int(score),
 	}, nil
 }
 func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
 	result := &schedulercache.Resource{}
 	for i := range pod.Spec.Containers {
 		container := &pod.Spec.Containers[i]
 		cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
 		result.MilliCPU += cpu
 		result.Memory += memory
 	}
 	return result
 }