mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #35612 from gmarek/scheduler2
Automatic merge from submit-queue split scheduler priorities into separate files In the current state it's really hard to find a thing one is looking for, if he doesn't know already know where to look. cc @davidopp
This commit is contained in:
commit
8c90bc35e2
@ -13,16 +13,23 @@ load(
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"balanced_resource_allocation.go",
|
||||
"image_locality.go",
|
||||
"interpod_affinity.go",
|
||||
"least_requested.go",
|
||||
"metadata.go",
|
||||
"most_requested.go",
|
||||
"node_affinity.go",
|
||||
"priorities.go",
|
||||
"node_label.go",
|
||||
"node_prefer_avoid_pods.go",
|
||||
"selector_spreading.go",
|
||||
"taint_toleration.go",
|
||||
"test_util.go",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
deps = [
|
||||
"//pkg/api:go_default_library",
|
||||
"//pkg/api/resource:go_default_library",
|
||||
"//pkg/api/unversioned:go_default_library",
|
||||
"//pkg/labels:go_default_library",
|
||||
"//pkg/util/node:go_default_library",
|
||||
@ -39,8 +46,14 @@ go_library(
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"balanced_resource_allocation_test.go",
|
||||
"image_locality_test.go",
|
||||
"interpod_affinity_test.go",
|
||||
"least_requested_test.go",
|
||||
"most_requested_test.go",
|
||||
"node_affinity_test.go",
|
||||
"node_label_test.go",
|
||||
"node_prefer_avoid_pods_test.go",
|
||||
"priorities_test.go",
|
||||
"selector_spreading_test.go",
|
||||
"taint_toleration_test.go",
|
||||
|
@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
|
||||
const (
|
||||
mb int64 = 1024 * 1024
|
||||
minImgSize int64 = 23 * mb
|
||||
maxImgSize int64 = 1000 * mb
|
||||
)
|
||||
|
||||
// Also used in most/least_requested nad metadata.
|
||||
// TODO: despaghettify it
|
||||
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
||||
result := &schedulercache.Resource{}
|
||||
for i := range pod.Spec.Containers {
|
||||
container := &pod.Spec.Containers[i]
|
||||
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
|
||||
result.MilliCPU += cpu
|
||||
result.Memory += memory
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
|
||||
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
|
||||
score := int(0)
|
||||
if cpuFraction >= 1 || memoryFraction >= 1 {
|
||||
// if requested >= capacity, the corresponding host should never be preferred.
|
||||
score = 0
|
||||
} else {
|
||||
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
|
||||
// respectively. Multilying the absolute value of the difference by 10 scales the value to
|
||||
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
|
||||
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
|
||||
diff := math.Abs(cpuFraction - memoryFraction)
|
||||
score = int(10 - diff*10)
|
||||
}
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
score,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: score,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func fractionOfCapacity(requested, capacity int64) float64 {
|
||||
if capacity == 0 {
|
||||
return 1
|
||||
}
|
||||
return float64(requested) / float64(capacity)
|
||||
}
|
||||
|
||||
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
|
||||
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
|
||||
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
|
||||
// close the two metrics are to each other.
|
||||
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
|
||||
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
|
||||
func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadatat - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
@ -0,0 +1,263 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestBalancedResourceAllocation(t *testing.T) {
|
||||
labels1 := map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "blah",
|
||||
}
|
||||
labels2 := map[string]string{
|
||||
"bar": "foo",
|
||||
"baz": "blah",
|
||||
}
|
||||
machine1Spec := api.PodSpec{
|
||||
NodeName: "machine1",
|
||||
}
|
||||
machine2Spec := api.PodSpec{
|
||||
NodeName: "machine2",
|
||||
}
|
||||
noResources := api.PodSpec{
|
||||
Containers: []api.Container{},
|
||||
}
|
||||
cpuOnly := api.PodSpec{
|
||||
NodeName: "machine1",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
cpuOnly2 := cpuOnly
|
||||
cpuOnly2.NodeName = "machine2"
|
||||
cpuAndMemory := api.PodSpec{
|
||||
NodeName: "machine2",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("2000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("3000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
pods []*api.Pod
|
||||
nodes []*api.Node
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
/*
|
||||
Node1 scores (remaining resources) on 0-10 scale
|
||||
CPU Fraction: 0 / 4000 = 0%
|
||||
Memory Fraction: 0 / 10000 = 0%
|
||||
Node1 Score: 10 - (0-0)*10 = 10
|
||||
|
||||
Node2 scores (remaining resources) on 0-10 scale
|
||||
CPU Fraction: 0 / 4000 = 0 %
|
||||
Memory Fraction: 0 / 10000 = 0%
|
||||
Node2 Score: 10 - (0-0)*10 = 10
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||
test: "nothing scheduled, nothing requested",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 3000 / 4000= 75%
|
||||
Memory Fraction: 5000 / 10000 = 50%
|
||||
Node1 Score: 10 - (0.75-0.5)*10 = 7
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 3000 / 6000= 50%
|
||||
Memory Fraction: 5000/10000 = 50%
|
||||
Node2 Score: 10 - (0.5-0.5)*10 = 10
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}},
|
||||
test: "nothing scheduled, resources requested, differently sized machines",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 0 / 4000= 0%
|
||||
Memory Fraction: 0 / 10000 = 0%
|
||||
Node1 Score: 10 - (0-0)*10 = 10
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 0 / 4000= 0%
|
||||
Memory Fraction: 0 / 10000 = 0%
|
||||
Node2 Score: 10 - (0-0)*10 = 10
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||
test: "no resources requested, pods scheduled",
|
||||
pods: []*api.Pod{
|
||||
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 0 / 20000 = 0%
|
||||
Node1 Score: 10 - (0.6-0)*10 = 4
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 5000 / 20000 = 25%
|
||||
Node2 Score: 10 - (0.6-0.25)*10 = 6
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
|
||||
test: "no resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 5000 / 20000 = 25%
|
||||
Node1 Score: 10 - (0.6-0.25)*10 = 6
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 10000 / 20000 = 50%
|
||||
Node2 Score: 10 - (0.6-0.5)*10 = 9
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
|
||||
test: "resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 5000 / 20000 = 25%
|
||||
Node1 Score: 10 - (0.6-0.25)*10 = 6
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 10000 = 60%
|
||||
Memory Fraction: 10000 / 50000 = 20%
|
||||
Node2 Score: 10 - (0.6-0.2)*10 = 6
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
|
||||
test: "resources requested, pods scheduled with resources, differently sized machines",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
|
||||
Memory Fraction: 0 / 10000 = 0
|
||||
Node1 Score: 0
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
|
||||
Memory Fraction 5000 / 10000 = 50%
|
||||
Node2 Score: 0
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuOnly},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||
test: "requested resources exceed node capacity",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||
test: "zero node resources, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||
list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
79
plugin/pkg/scheduler/algorithm/priorities/image_locality.go
Normal file
79
plugin/pkg/scheduler/algorithm/priorities/image_locality.go
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
|
||||
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
|
||||
// based on the total size of those images.
|
||||
// - If none of the images are present, this node will be given the lowest priority.
|
||||
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
|
||||
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
var sumSize int64
|
||||
for i := range pod.Spec.Containers {
|
||||
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
|
||||
}
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: calculateScoreFromSize(sumSize),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
|
||||
// 1. Split image size range into 10 buckets.
|
||||
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
|
||||
func calculateScoreFromSize(sumSize int64) int {
|
||||
var score int
|
||||
switch {
|
||||
case sumSize == 0 || sumSize < minImgSize:
|
||||
// score == 0 means none of the images required by this pod are present on this
|
||||
// node or the total size of the images present is too small to be taken into further consideration.
|
||||
score = 0
|
||||
// If existing images' total size is larger than max, just make it highest priority.
|
||||
case sumSize >= maxImgSize:
|
||||
score = 10
|
||||
default:
|
||||
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
|
||||
}
|
||||
// Return which bucket the given size belongs to
|
||||
return score
|
||||
}
|
||||
|
||||
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
|
||||
func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
if container.Image == name {
|
||||
// Should return immediately.
|
||||
return image.SizeBytes
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
182
plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
Normal file
182
plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestImageLocalityPriority(t *testing.T) {
|
||||
test_40_250 := api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: "gcr.io/40",
|
||||
},
|
||||
{
|
||||
Image: "gcr.io/250",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
test_40_140 := api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: "gcr.io/40",
|
||||
},
|
||||
{
|
||||
Image: "gcr.io/140",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
test_min_max := api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: "gcr.io/10",
|
||||
},
|
||||
{
|
||||
Image: "gcr.io/2000",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
node_40_140_2000 := api.NodeStatus{
|
||||
Images: []api.ContainerImage{
|
||||
{
|
||||
Names: []string{
|
||||
"gcr.io/40",
|
||||
"gcr.io/40:v1",
|
||||
"gcr.io/40:v1",
|
||||
},
|
||||
SizeBytes: int64(40 * mb),
|
||||
},
|
||||
{
|
||||
Names: []string{
|
||||
"gcr.io/140",
|
||||
"gcr.io/140:v1",
|
||||
},
|
||||
SizeBytes: int64(140 * mb),
|
||||
},
|
||||
{
|
||||
Names: []string{
|
||||
"gcr.io/2000",
|
||||
},
|
||||
SizeBytes: int64(2000 * mb),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
node_250_10 := api.NodeStatus{
|
||||
Images: []api.ContainerImage{
|
||||
{
|
||||
Names: []string{
|
||||
"gcr.io/250",
|
||||
},
|
||||
SizeBytes: int64(250 * mb),
|
||||
},
|
||||
{
|
||||
Names: []string{
|
||||
"gcr.io/10",
|
||||
"gcr.io/10:v1",
|
||||
},
|
||||
SizeBytes: int64(10 * mb),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
pods []*api.Pod
|
||||
nodes []*api.Node
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
// Pod: gcr.io/40 gcr.io/250
|
||||
|
||||
// Node1
|
||||
// Image: gcr.io/40 40MB
|
||||
// Score: (40M-23M)/97.7M + 1 = 1
|
||||
|
||||
// Node2
|
||||
// Image: gcr.io/250 250MB
|
||||
// Score: (250M-23M)/97.7M + 1 = 3
|
||||
pod: &api.Pod{Spec: test_40_250},
|
||||
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
|
||||
test: "two images spread on two nodes, prefer the larger image one",
|
||||
},
|
||||
{
|
||||
// Pod: gcr.io/40 gcr.io/140
|
||||
|
||||
// Node1
|
||||
// Image: gcr.io/40 40MB, gcr.io/140 140MB
|
||||
// Score: (40M+140M-23M)/97.7M + 1 = 2
|
||||
|
||||
// Node2
|
||||
// Image: not present
|
||||
// Score: 0
|
||||
pod: &api.Pod{Spec: test_40_140},
|
||||
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
|
||||
test: "two images on one node, prefer this node",
|
||||
},
|
||||
{
|
||||
// Pod: gcr.io/2000 gcr.io/10
|
||||
|
||||
// Node1
|
||||
// Image: gcr.io/2000 2000MB
|
||||
// Score: 2000 > max score = 10
|
||||
|
||||
// Node2
|
||||
// Image: gcr.io/10 10MB
|
||||
// Score: 10 < min score = 0
|
||||
pod: &api.Pod{Spec: test_min_max},
|
||||
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}},
|
||||
test: "if exceed limit, use limit",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||
list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
sort.Sort(test.expectedList)
|
||||
sort.Sort(list)
|
||||
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeImageNode(node string, status api.NodeStatus) *api.Node {
|
||||
return &api.Node{
|
||||
ObjectMeta: api.ObjectMeta{Name: node},
|
||||
Status: status,
|
||||
}
|
||||
}
|
91
plugin/pkg/scheduler/algorithm/priorities/least_requested.go
Normal file
91
plugin/pkg/scheduler/algorithm/priorities/least_requested.go
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
|
||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||
// based on the minimum of the average of the fraction of requested to capacity.
|
||||
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
||||
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadata - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
||||
|
||||
// The unused capacity is calculated on a scale of 0-10
|
||||
// 0 being the lowest priority and 10 being the highest.
|
||||
// The more unused resources the higher the score is.
|
||||
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||
requested, capacity, node)
|
||||
return 0
|
||||
}
|
||||
return ((capacity - requested) * 10) / capacity
|
||||
}
|
||||
|
||||
// Calculates host priority based on the amount of unused resources.
|
||||
// 'node' has information about the resources on the node.
|
||||
// 'pods' is a list of pods currently scheduled on the node.
|
||||
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
cpuScore, memoryScore,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: int((cpuScore + memoryScore) / 2),
|
||||
}, nil
|
||||
}
|
@ -0,0 +1,263 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestLeastRequested(t *testing.T) {
|
||||
labels1 := map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "blah",
|
||||
}
|
||||
labels2 := map[string]string{
|
||||
"bar": "foo",
|
||||
"baz": "blah",
|
||||
}
|
||||
machine1Spec := api.PodSpec{
|
||||
NodeName: "machine1",
|
||||
}
|
||||
machine2Spec := api.PodSpec{
|
||||
NodeName: "machine2",
|
||||
}
|
||||
noResources := api.PodSpec{
|
||||
Containers: []api.Container{},
|
||||
}
|
||||
cpuOnly := api.PodSpec{
|
||||
NodeName: "machine1",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
cpuOnly2 := cpuOnly
|
||||
cpuOnly2.NodeName = "machine2"
|
||||
cpuAndMemory := api.PodSpec{
|
||||
NodeName: "machine2",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("2000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("3000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
pods []*api.Pod
|
||||
nodes []*api.Node
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
/*
|
||||
Node1 scores (remaining resources) on 0-10 scale
|
||||
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||
Node1 Score: (10 + 10) / 2 = 10
|
||||
|
||||
Node2 scores (remaining resources) on 0-10 scale
|
||||
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||
Node2 Score: (10 + 10) / 2 = 10
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||
test: "nothing scheduled, nothing requested",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
|
||||
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||
Node1 Score: (2.5 + 5) / 2 = 3
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((6000 - 3000) *10) / 6000 = 5
|
||||
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||
Node2 Score: (5 + 5) / 2 = 5
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
|
||||
test: "nothing scheduled, resources requested, differently sized machines",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||
Node1 Score: (10 + 10) / 2 = 10
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||
Node2 Score: (10 + 10) / 2 = 10
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||
test: "no resources requested, pods scheduled",
|
||||
pods: []*api.Pod{
|
||||
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((20000 - 0) *10) / 20000 = 10
|
||||
Node1 Score: (4 + 10) / 2 = 7
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||
Node2 Score: (4 + 7.5) / 2 = 5
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
|
||||
test: "no resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||
Node1 Score: (4 + 7.5) / 2 = 5
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((20000 - 10000) *10) / 20000 = 5
|
||||
Node2 Score: (4 + 5) / 2 = 4
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
|
||||
test: "resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||
Node1 Score: (4 + 7.5) / 2 = 5
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||
Memory Score: ((50000 - 10000) *10) / 50000 = 8
|
||||
Node2 Score: (4 + 8) / 2 = 6
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
|
||||
test: "resources requested, pods scheduled with resources, differently sized machines",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: ((4000 - 6000) *10) / 4000 = 0
|
||||
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||
Node1 Score: (0 + 10) / 2 = 5
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: ((4000 - 6000) *10) / 4000 = 0
|
||||
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||
Node2 Score: (0 + 5) / 2 = 2
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuOnly},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
|
||||
test: "requested resources exceed node capacity",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
{
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||
test: "zero node resources, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||
list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
94
plugin/pkg/scheduler/algorithm/priorities/most_requested.go
Normal file
94
plugin/pkg/scheduler/algorithm/priorities/most_requested.go
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||
// based on the maximum of the average of the fraction of requested to capacity.
|
||||
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
|
||||
func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadatat - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
||||
|
||||
// The used capacity is calculated on a scale of 0-10
|
||||
// 0 being the lowest priority and 10 being the highest.
|
||||
// The more resources are used the higher the score is. This function
|
||||
// is almost a reversed version of least_requested_priority.calculatUnusedScore
|
||||
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
|
||||
// keep the final formula clean and not to modify the widely used (by users
|
||||
// in their default scheduling policies) calculateUSedScore.
|
||||
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||
requested, capacity, node)
|
||||
return 0
|
||||
}
|
||||
return (requested * 10) / capacity
|
||||
}
|
||||
|
||||
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
||||
// 'pods' is a list of pods currently scheduled on the node.
|
||||
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
cpuScore, memoryScore,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: int((cpuScore + memoryScore) / 2),
|
||||
}, nil
|
||||
}
|
182
plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
Normal file
182
plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestMostRequested(t *testing.T) {
|
||||
labels1 := map[string]string{
|
||||
"foo": "bar",
|
||||
"baz": "blah",
|
||||
}
|
||||
labels2 := map[string]string{
|
||||
"bar": "foo",
|
||||
"baz": "blah",
|
||||
}
|
||||
noResources := api.PodSpec{
|
||||
Containers: []api.Container{},
|
||||
}
|
||||
cpuOnly := api.PodSpec{
|
||||
NodeName: "machine1",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("0"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
cpuOnly2 := cpuOnly
|
||||
cpuOnly2.NodeName = "machine2"
|
||||
cpuAndMemory := api.PodSpec{
|
||||
NodeName: "machine2",
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("1000m"),
|
||||
"memory": resource.MustParse("2000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
"cpu": resource.MustParse("2000m"),
|
||||
"memory": resource.MustParse("3000"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
pods []*api.Pod
|
||||
nodes []*api.Node
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
/*
|
||||
Node1 scores (used resources) on 0-10 scale
|
||||
CPU Score: (0 * 10 / 4000 = 0
|
||||
Memory Score: (0 * 10) / 10000 = 0
|
||||
Node1 Score: (0 + 0) / 2 = 0
|
||||
|
||||
Node2 scores (used resources) on 0-10 scale
|
||||
CPU Score: (0 * 10 / 4000 = 0
|
||||
Memory Score: (0 * 10 / 10000 = 0
|
||||
Node2 Score: (0 + 0) / 2 = 0
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||
test: "nothing scheduled, nothing requested",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: (3000 * 10 / 4000 = 7.5
|
||||
Memory Score: (5000 * 10) / 10000 = 5
|
||||
Node1 Score: (7.5 + 5) / 2 = 6
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: (3000 * 10 / 6000 = 5
|
||||
Memory Score: (5000 * 10 / 10000 = 5
|
||||
Node2 Score: (5 + 5) / 2 = 5
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
|
||||
test: "nothing scheduled, resources requested, differently sized machines",
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: (6000 * 10) / 10000 = 6
|
||||
Memory Score: (0 * 10) / 20000 = 10
|
||||
Node1 Score: (6 + 0) / 2 = 3
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: (6000 * 10) / 10000 = 6
|
||||
Memory Score: (5000 * 10) / 20000 = 2.5
|
||||
Node2 Score: (6 + 2.5) / 2 = 4
|
||||
*/
|
||||
pod: &api.Pod{Spec: noResources},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
|
||||
test: "no resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||
},
|
||||
},
|
||||
{
|
||||
/*
|
||||
Node1 scores on 0-10 scale
|
||||
CPU Score: (6000 * 10) / 10000 = 6
|
||||
Memory Score: (5000 * 10) / 20000 = 2.5
|
||||
Node1 Score: (6 + 2.5) / 2 = 4
|
||||
|
||||
Node2 scores on 0-10 scale
|
||||
CPU Score: (6000 * 10) / 10000 = 6
|
||||
Memory Score: (10000 * 10) / 20000 = 5
|
||||
Node2 Score: (6 + 5) / 2 = 5
|
||||
*/
|
||||
pod: &api.Pod{Spec: cpuAndMemory},
|
||||
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
|
||||
test: "resources requested, pods scheduled with resources",
|
||||
pods: []*api.Pod{
|
||||
{Spec: cpuOnly},
|
||||
{Spec: cpuAndMemory},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||
list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
60
plugin/pkg/scheduler/algorithm/priorities/node_label.go
Normal file
60
plugin/pkg/scheduler/algorithm/priorities/node_label.go
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
type NodeLabelPrioritizer struct {
|
||||
label string
|
||||
presence bool
|
||||
}
|
||||
|
||||
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
||||
labelPrioritizer := &NodeLabelPrioritizer{
|
||||
label: label,
|
||||
presence: presence,
|
||||
}
|
||||
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
|
||||
}
|
||||
|
||||
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
|
||||
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
|
||||
// If presence is false, prioritizes nodes that do not have the specified label.
|
||||
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
exists := labels.Set(node.Labels).Has(n.label)
|
||||
score := 0
|
||||
if (exists && n.presence) || (!exists && !n.presence) {
|
||||
score = 10
|
||||
}
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: score,
|
||||
}, nil
|
||||
}
|
121
plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
Normal file
121
plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestNewNodeLabelPriority(t *testing.T) {
|
||||
label1 := map[string]string{"foo": "bar"}
|
||||
label2 := map[string]string{"bar": "foo"}
|
||||
label3 := map[string]string{"bar": "baz"}
|
||||
tests := []struct {
|
||||
nodes []*api.Node
|
||||
label string
|
||||
presence bool
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||
label: "baz",
|
||||
presence: true,
|
||||
test: "no match found, presence true",
|
||||
},
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
label: "baz",
|
||||
presence: false,
|
||||
test: "no match found, presence false",
|
||||
},
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||
label: "foo",
|
||||
presence: true,
|
||||
test: "one match found, presence true",
|
||||
},
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
label: "foo",
|
||||
presence: false,
|
||||
test: "one match found, presence false",
|
||||
},
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
label: "bar",
|
||||
presence: true,
|
||||
test: "two matches found, presence true",
|
||||
},
|
||||
{
|
||||
nodes: []*api.Node{
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||
},
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||
label: "bar",
|
||||
presence: false,
|
||||
test: "two matches found, presence false",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||
list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
// sort the two lists to avoid failures on account of different ordering
|
||||
sort.Sort(test.expectedList)
|
||||
sort.Sort(list)
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
controllerRef := priorityutil.GetControllerRef(pod)
|
||||
if controllerRef != nil {
|
||||
// Ignore pods that are owned by other controller than ReplicationController
|
||||
// or ReplicaSet.
|
||||
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
|
||||
controllerRef = nil
|
||||
}
|
||||
}
|
||||
if controllerRef == nil {
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
||||
|
||||
avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
|
||||
if err != nil {
|
||||
// If we cannot get annotation, assume it's schedulable there.
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
||||
for i := range avoids.PreferAvoidPods {
|
||||
avoid := &avoids.PreferAvoidPods[i]
|
||||
if controllerRef != nil {
|
||||
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
@ -0,0 +1,155 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func TestNodePreferAvoidPriority(t *testing.T) {
|
||||
annotations1 := map[string]string{
|
||||
api.PreferAvoidPodsAnnotationKey: `
|
||||
{
|
||||
"preferAvoidPods": [
|
||||
{
|
||||
"podSignature": {
|
||||
"podController": {
|
||||
"apiVersion": "v1",
|
||||
"kind": "ReplicationController",
|
||||
"name": "foo",
|
||||
"uid": "abcdef123456",
|
||||
"controller": true
|
||||
}
|
||||
},
|
||||
"reason": "some reason",
|
||||
"message": "some message"
|
||||
}
|
||||
]
|
||||
}`,
|
||||
}
|
||||
annotations2 := map[string]string{
|
||||
api.PreferAvoidPodsAnnotationKey: `
|
||||
{
|
||||
"preferAvoidPods": [
|
||||
{
|
||||
"podSignature": {
|
||||
"podController": {
|
||||
"apiVersion": "v1",
|
||||
"kind": "ReplicaSet",
|
||||
"name": "foo",
|
||||
"uid": "qwert12345",
|
||||
"controller": true
|
||||
}
|
||||
},
|
||||
"reason": "some reason",
|
||||
"message": "some message"
|
||||
}
|
||||
]
|
||||
}`,
|
||||
}
|
||||
testNodes := []*api.Node{
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{Name: "machine1", Annotations: annotations1},
|
||||
},
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{Name: "machine2", Annotations: annotations2},
|
||||
},
|
||||
{
|
||||
ObjectMeta: api.ObjectMeta{Name: "machine3"},
|
||||
},
|
||||
}
|
||||
trueVar := true
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
nodes []*api.Node
|
||||
expectedList schedulerapi.HostPriorityList
|
||||
test string
|
||||
}{
|
||||
{
|
||||
pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: "default",
|
||||
OwnerReferences: []api.OwnerReference{
|
||||
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
|
||||
},
|
||||
},
|
||||
},
|
||||
nodes: testNodes,
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
|
||||
},
|
||||
{
|
||||
pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: "default",
|
||||
OwnerReferences: []api.OwnerReference{
|
||||
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
|
||||
},
|
||||
},
|
||||
},
|
||||
nodes: testNodes,
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
test: "ownership by random controller should be ignored",
|
||||
},
|
||||
{
|
||||
pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: "default",
|
||||
OwnerReferences: []api.OwnerReference{
|
||||
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
|
||||
},
|
||||
},
|
||||
},
|
||||
nodes: testNodes,
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||
test: "owner without Controller field set should be ignored",
|
||||
},
|
||||
{
|
||||
pod: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Namespace: "default",
|
||||
OwnerReferences: []api.OwnerReference{
|
||||
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
|
||||
},
|
||||
},
|
||||
},
|
||||
nodes: testNodes,
|
||||
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}},
|
||||
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
// sort the two lists to avoid failures on account of different ordering
|
||||
sort.Sort(test.expectedList)
|
||||
sort.Sort(list)
|
||||
if !reflect.DeepEqual(test.expectedList, list) {
|
||||
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,367 +0,0 @@
|
||||
/*
|
||||
Copyright 2014 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
||||
result := &schedulercache.Resource{}
|
||||
for i := range pod.Spec.Containers {
|
||||
container := &pod.Spec.Containers[i]
|
||||
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
|
||||
result.MilliCPU += cpu
|
||||
result.Memory += memory
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// The unused capacity is calculated on a scale of 0-10
|
||||
// 0 being the lowest priority and 10 being the highest.
|
||||
// The more unused resources the higher the score is.
|
||||
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||
requested, capacity, node)
|
||||
return 0
|
||||
}
|
||||
return ((capacity - requested) * 10) / capacity
|
||||
}
|
||||
|
||||
// The used capacity is calculated on a scale of 0-10
|
||||
// 0 being the lowest priority and 10 being the highest.
|
||||
// The more resources are used the higher the score is. This function
|
||||
// is almost a reversed version of calculatUnusedScore (10 - calculateUnusedScore).
|
||||
// The main difference is in rounding. It was added to keep the
|
||||
// final formula clean and not to modify the widely used (by users
|
||||
// in their default scheduling policies) calculateUSedScore.
|
||||
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||
requested, capacity, node)
|
||||
return 0
|
||||
}
|
||||
return (requested * 10) / capacity
|
||||
}
|
||||
|
||||
// Calculates host priority based on the amount of unused resources.
|
||||
// 'node' has information about the resources on the node.
|
||||
// 'pods' is a list of pods currently scheduled on the node.
|
||||
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
cpuScore, memoryScore,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: int((cpuScore + memoryScore) / 2),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
||||
// 'pods' is a list of pods currently scheduled on the node.
|
||||
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
cpuScore, memoryScore,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: int((cpuScore + memoryScore) / 2),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
|
||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||
// based on the minimum of the average of the fraction of requested to capacity.
|
||||
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
||||
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadata - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
||||
|
||||
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||
// based on the maximum of the average of the fraction of requested to capacity.
|
||||
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
|
||||
func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadatat - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
||||
|
||||
type NodeLabelPrioritizer struct {
|
||||
label string
|
||||
presence bool
|
||||
}
|
||||
|
||||
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
||||
labelPrioritizer := &NodeLabelPrioritizer{
|
||||
label: label,
|
||||
presence: presence,
|
||||
}
|
||||
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
|
||||
}
|
||||
|
||||
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
|
||||
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
|
||||
// If presence is false, prioritizes nodes that do not have the specified label.
|
||||
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
exists := labels.Set(node.Labels).Has(n.label)
|
||||
score := 0
|
||||
if (exists && n.presence) || (!exists && !n.presence) {
|
||||
score = 10
|
||||
}
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: score,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
|
||||
const (
|
||||
mb int64 = 1024 * 1024
|
||||
minImgSize int64 = 23 * mb
|
||||
maxImgSize int64 = 1000 * mb
|
||||
)
|
||||
|
||||
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
|
||||
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
|
||||
// based on the total size of those images.
|
||||
// - If none of the images are present, this node will be given the lowest priority.
|
||||
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
|
||||
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
var sumSize int64
|
||||
for i := range pod.Spec.Containers {
|
||||
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
|
||||
}
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: calculateScoreFromSize(sumSize),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
|
||||
func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
if container.Image == name {
|
||||
// Should return immediately.
|
||||
return image.SizeBytes
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
|
||||
// 1. Split image size range into 10 buckets.
|
||||
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
|
||||
func calculateScoreFromSize(sumSize int64) int {
|
||||
var score int
|
||||
switch {
|
||||
case sumSize == 0 || sumSize < minImgSize:
|
||||
// score == 0 means none of the images required by this pod are present on this
|
||||
// node or the total size of the images present is too small to be taken into further consideration.
|
||||
score = 0
|
||||
// If existing images' total size is larger than max, just make it highest priority.
|
||||
case sumSize >= maxImgSize:
|
||||
score = 10
|
||||
default:
|
||||
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
|
||||
}
|
||||
// Return which bucket the given size belongs to
|
||||
return score
|
||||
}
|
||||
|
||||
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
|
||||
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
|
||||
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
|
||||
// close the two metrics are to each other.
|
||||
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
|
||||
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
|
||||
func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
var nonZeroRequest *schedulercache.Resource
|
||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||
} else {
|
||||
// We couldn't parse metadatat - fallback to computing it.
|
||||
nonZeroRequest = getNonZeroRequests(pod)
|
||||
}
|
||||
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
|
||||
}
|
||||
|
||||
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
allocatableResources := nodeInfo.AllocatableResource()
|
||||
totalResources := *podRequests
|
||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||
|
||||
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
|
||||
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
|
||||
score := int(0)
|
||||
if cpuFraction >= 1 || memoryFraction >= 1 {
|
||||
// if requested >= capacity, the corresponding host should never be preferred.
|
||||
score = 0
|
||||
} else {
|
||||
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
|
||||
// respectively. Multilying the absolute value of the difference by 10 scales the value to
|
||||
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
|
||||
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
|
||||
diff := math.Abs(cpuFraction - memoryFraction)
|
||||
score = int(10 - diff*10)
|
||||
}
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
glog.V(10).Infof(
|
||||
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
|
||||
pod.Name, node.Name,
|
||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||
totalResources.MilliCPU, totalResources.Memory,
|
||||
score,
|
||||
)
|
||||
}
|
||||
|
||||
return schedulerapi.HostPriority{
|
||||
Host: node.Name,
|
||||
Score: score,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func fractionOfCapacity(requested, capacity int64) float64 {
|
||||
if capacity == 0 {
|
||||
return 1
|
||||
}
|
||||
return float64(requested) / float64(capacity)
|
||||
}
|
||||
|
||||
func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
controllerRef := priorityutil.GetControllerRef(pod)
|
||||
if controllerRef != nil {
|
||||
// Ignore pods that are owned by other controller than ReplicationController
|
||||
// or ReplicaSet.
|
||||
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
|
||||
controllerRef = nil
|
||||
}
|
||||
}
|
||||
if controllerRef == nil {
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
||||
|
||||
avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
|
||||
if err != nil {
|
||||
// If we cannot get annotation, assume it's schedulable there.
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
||||
for i := range avoids.PreferAvoidPods {
|
||||
avoid := &avoids.PreferAvoidPods[i]
|
||||
if controllerRef != nil {
|
||||
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
60
plugin/pkg/scheduler/algorithm/priorities/test_util.go
Normal file
60
plugin/pkg/scheduler/algorithm/priorities/test_util.go
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package priorities
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
func makeNode(node string, milliCPU, memory int64) *api.Node {
|
||||
return &api.Node{
|
||||
ObjectMeta: api.ObjectMeta{Name: node},
|
||||
Status: api.NodeStatus{
|
||||
Capacity: api.ResourceList{
|
||||
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||
},
|
||||
Allocatable: api.ResourceList{
|
||||
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
|
||||
return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
||||
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
||||
for i := range nodes {
|
||||
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = append(result, hostResult)
|
||||
}
|
||||
if reduceFn != nil {
|
||||
if err := reduceFn(pod, result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user