Merge 3 resource allocation priority functions

This commit is contained in:
Yongkun Anfernee Gui 2017-11-09 15:53:52 -08:00
parent f302487942
commit c65225ee19
6 changed files with 143 additions and 179 deletions

View File

@ -19,6 +19,7 @@ go_library(
"node_label.go", "node_label.go",
"node_prefer_avoid_pods.go", "node_prefer_avoid_pods.go",
"reduce.go", "reduce.go",
"resource_allocation.go",
"resource_limits.go", "resource_limits.go",
"selector_spreading.go", "selector_spreading.go",
"taint_toleration.go", "taint_toleration.go",

View File

@ -17,76 +17,40 @@ limitations under the License.
package priorities package priorities
import ( import (
"fmt"
"math" "math"
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
) )
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range. var (
const ( balanceResourcePriority = &ResourceAllocationPriority{"BalanceResourceAllocation", balancedResourceScorer}
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb // BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
maxImgSize int64 = 1000 * mb // BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together
// with LeastRequestedPriority. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
BalancedResourceAllocationMap = balanceResourcePriority.PriorityMap
) )
// Also used in most/least_requested nad metadata. func balancedResourceScorer(requested, allocable *schedulercache.Resource) int64 {
// TODO: despaghettify it cpuFraction := fractionOfCapacity(requested.MilliCPU, allocable.MilliCPU)
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource { memoryFraction := fractionOfCapacity(requested.Memory, allocable.Memory)
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}
func calculateBalancedResourceAllocation(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
score := int(0)
if cpuFraction >= 1 || memoryFraction >= 1 { if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred. // if requested >= capacity, the corresponding host should never be preferred.
score = 0 return 0
} else {
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
score = int((1 - diff) * float64(schedulerapi.MaxPriority))
}
if glog.V(10) {
glog.Infof(
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
score,
)
} }
return schedulerapi.HostPriority{ // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
Host: node.Name, // respectively. Multilying the absolute value of the difference by 10 scales the value to
Score: score, // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
}, nil // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
return int64((1 - diff) * float64(schedulerapi.MaxPriority))
} }
func fractionOfCapacity(requested, capacity int64) float64 { func fractionOfCapacity(requested, capacity int64) float64 {
@ -95,20 +59,3 @@ func fractionOfCapacity(requested, capacity int64) float64 {
} }
return float64(requested) / float64(capacity) return float64(requested) / float64(capacity)
} }
// BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
// It calculates the difference between the cpu and memory fraction of capacity, and prioritizes the host based on how
// close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
func BalancedResourceAllocationMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
}

View File

@ -24,6 +24,13 @@ import (
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
) )
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)
// ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images. // ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10 // It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images. // based on the total size of those images.

View File

@ -17,73 +17,37 @@ limitations under the License.
package priorities package priorities
import ( import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
) )
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources. var (
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes leastResourcePriority = &ResourceAllocationPriority{"LeastResourceAllocation", leastResourceScorer}
// based on the minimum of the average of the fraction of requested to capacity.
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2 // LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
func LeastRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and
var nonZeroRequest *schedulercache.Resource // prioritizes based on the minimum of the average of the fraction of requested to capacity.
if priorityMeta, ok := meta.(*priorityMetadata); ok { //
nonZeroRequest = priorityMeta.nonZeroRequest // Details:
} else { // cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity)/2
// We couldn't parse metadata - fallback to computing it. LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
nonZeroRequest = getNonZeroRequests(pod) )
}
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo) func leastResourceScorer(requested, allocable *schedulercache.Resource) int64 {
return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
leastRequestedScore(requested.Memory, allocable.Memory)) / 2
} }
// The unused capacity is calculated on a scale of 0-10 // The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest. // 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is. // The more unused resources the higher the score is.
func calculateUnusedScore(requested int64, capacity int64, node string) int64 { func leastRequestedScore(requested, capacity int64) int64 {
if capacity == 0 { if capacity == 0 {
return 0 return 0
} }
if requested > capacity { if requested > capacity {
glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0 return 0
} }
return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity
} }
// Calculates host priority based on the amount of unused resources.
// 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUnusedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
glog.Infof(
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View File

@ -17,28 +17,23 @@ limitations under the License.
package priorities package priorities
import ( import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
) )
// MostRequestedPriority is a priority function that favors nodes with most requested resources. var (
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes mostResourcePriority = &ResourceAllocationPriority{"MostResourceAllocation", mostResourceScorer}
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2 // MostRequestedPriority is a priority function that favors nodes with most requested resources.
func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
var nonZeroRequest *schedulercache.Resource // based on the maximum of the average of the fraction of requested to capacity.
if priorityMeta, ok := meta.(*priorityMetadata); ok { // Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
nonZeroRequest = priorityMeta.nonZeroRequest MostRequestedPriorityMap = mostResourcePriority.PriorityMap
} else { )
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod) func mostResourceScorer(requested, allocable *schedulercache.Resource) int64 {
} return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo) mostRequestedScore(requested.Memory, allocable.Memory)) / 2
} }
// The used capacity is calculated on a scale of 0-10 // The used capacity is calculated on a scale of 0-10
@ -48,45 +43,13 @@ func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to // (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users // keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore. // in their default scheduling policies) calculateUSedScore.
func calculateUsedScore(requested int64, capacity int64, node string) int64 { func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 { if capacity == 0 {
return 0 return 0
} }
if requested > capacity { if requested > capacity {
glog.V(10).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0 return 0
} }
return (requested * schedulerapi.MaxPriority) / capacity return (requested * schedulerapi.MaxPriority) / capacity
} }
// Calculate the resource used on a node. 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUsedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
glog.Infof(
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View File

@ -0,0 +1,82 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type ResourceAllocationPriority struct {
Name string
scorer func(requested, allocable *schedulercache.Resource) int64
}
func (r *ResourceAllocationPriority) PriorityMap(
pod *v1.Pod,
meta interface{},
nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatable := nodeInfo.AllocatableResource()
var requested schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
requested = *priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
requested = *getNonZeroRequests(pod)
}
requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
requested.Memory += nodeInfo.NonZeroRequest().Memory
score := r.scorer(&requested, &allocatable)
if glog.V(10) {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU+allocatable.MilliCPU, requested.Memory+allocatable.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(score),
}, nil
}
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}