mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 15:25:57 +00:00
Merge pull request #35612 from gmarek/scheduler2
Automatic merge from submit-queue split scheduler priorities into separate files In the current state it's really hard to find a thing one is looking for, if he doesn't know already know where to look. cc @davidopp
This commit is contained in:
commit
8c90bc35e2
@ -13,16 +13,23 @@ load(
|
|||||||
go_library(
|
go_library(
|
||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = [
|
srcs = [
|
||||||
|
"balanced_resource_allocation.go",
|
||||||
|
"image_locality.go",
|
||||||
"interpod_affinity.go",
|
"interpod_affinity.go",
|
||||||
|
"least_requested.go",
|
||||||
"metadata.go",
|
"metadata.go",
|
||||||
|
"most_requested.go",
|
||||||
"node_affinity.go",
|
"node_affinity.go",
|
||||||
"priorities.go",
|
"node_label.go",
|
||||||
|
"node_prefer_avoid_pods.go",
|
||||||
"selector_spreading.go",
|
"selector_spreading.go",
|
||||||
"taint_toleration.go",
|
"taint_toleration.go",
|
||||||
|
"test_util.go",
|
||||||
],
|
],
|
||||||
tags = ["automanaged"],
|
tags = ["automanaged"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/api:go_default_library",
|
"//pkg/api:go_default_library",
|
||||||
|
"//pkg/api/resource:go_default_library",
|
||||||
"//pkg/api/unversioned:go_default_library",
|
"//pkg/api/unversioned:go_default_library",
|
||||||
"//pkg/labels:go_default_library",
|
"//pkg/labels:go_default_library",
|
||||||
"//pkg/util/node:go_default_library",
|
"//pkg/util/node:go_default_library",
|
||||||
@ -39,8 +46,14 @@ go_library(
|
|||||||
go_test(
|
go_test(
|
||||||
name = "go_default_test",
|
name = "go_default_test",
|
||||||
srcs = [
|
srcs = [
|
||||||
|
"balanced_resource_allocation_test.go",
|
||||||
|
"image_locality_test.go",
|
||||||
"interpod_affinity_test.go",
|
"interpod_affinity_test.go",
|
||||||
|
"least_requested_test.go",
|
||||||
|
"most_requested_test.go",
|
||||||
"node_affinity_test.go",
|
"node_affinity_test.go",
|
||||||
|
"node_label_test.go",
|
||||||
|
"node_prefer_avoid_pods_test.go",
|
||||||
"priorities_test.go",
|
"priorities_test.go",
|
||||||
"selector_spreading_test.go",
|
"selector_spreading_test.go",
|
||||||
"taint_toleration_test.go",
|
"taint_toleration_test.go",
|
||||||
|
@ -0,0 +1,116 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
|
||||||
|
const (
|
||||||
|
mb int64 = 1024 * 1024
|
||||||
|
minImgSize int64 = 23 * mb
|
||||||
|
maxImgSize int64 = 1000 * mb
|
||||||
|
)
|
||||||
|
|
||||||
|
// Also used in most/least_requested nad metadata.
|
||||||
|
// TODO: despaghettify it
|
||||||
|
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
||||||
|
result := &schedulercache.Resource{}
|
||||||
|
for i := range pod.Spec.Containers {
|
||||||
|
container := &pod.Spec.Containers[i]
|
||||||
|
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
|
||||||
|
result.MilliCPU += cpu
|
||||||
|
result.Memory += memory
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
allocatableResources := nodeInfo.AllocatableResource()
|
||||||
|
totalResources := *podRequests
|
||||||
|
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||||
|
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||||
|
|
||||||
|
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
|
||||||
|
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
|
||||||
|
score := int(0)
|
||||||
|
if cpuFraction >= 1 || memoryFraction >= 1 {
|
||||||
|
// if requested >= capacity, the corresponding host should never be preferred.
|
||||||
|
score = 0
|
||||||
|
} else {
|
||||||
|
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
|
||||||
|
// respectively. Multilying the absolute value of the difference by 10 scales the value to
|
||||||
|
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
|
||||||
|
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
|
||||||
|
diff := math.Abs(cpuFraction - memoryFraction)
|
||||||
|
score = int(10 - diff*10)
|
||||||
|
}
|
||||||
|
if glog.V(10) {
|
||||||
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
|
// not logged. There is visible performance gain from it.
|
||||||
|
glog.V(10).Infof(
|
||||||
|
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
|
||||||
|
pod.Name, node.Name,
|
||||||
|
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||||
|
totalResources.MilliCPU, totalResources.Memory,
|
||||||
|
score,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: score,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fractionOfCapacity(requested, capacity int64) float64 {
|
||||||
|
if capacity == 0 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return float64(requested) / float64(capacity)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
|
||||||
|
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
|
||||||
|
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
|
||||||
|
// close the two metrics are to each other.
|
||||||
|
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
|
||||||
|
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
|
||||||
|
func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
var nonZeroRequest *schedulercache.Resource
|
||||||
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
|
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||||
|
} else {
|
||||||
|
// We couldn't parse metadatat - fallback to computing it.
|
||||||
|
nonZeroRequest = getNonZeroRequests(pod)
|
||||||
|
}
|
||||||
|
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
|
||||||
|
}
|
@ -0,0 +1,263 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBalancedResourceAllocation(t *testing.T) {
|
||||||
|
labels1 := map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
labels2 := map[string]string{
|
||||||
|
"bar": "foo",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
machine1Spec := api.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
}
|
||||||
|
machine2Spec := api.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
}
|
||||||
|
noResources := api.PodSpec{
|
||||||
|
Containers: []api.Container{},
|
||||||
|
}
|
||||||
|
cpuOnly := api.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cpuOnly2 := cpuOnly
|
||||||
|
cpuOnly2.NodeName = "machine2"
|
||||||
|
cpuAndMemory := api.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("2000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("3000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores (remaining resources) on 0-10 scale
|
||||||
|
CPU Fraction: 0 / 4000 = 0%
|
||||||
|
Memory Fraction: 0 / 10000 = 0%
|
||||||
|
Node1 Score: 10 - (0-0)*10 = 10
|
||||||
|
|
||||||
|
Node2 scores (remaining resources) on 0-10 scale
|
||||||
|
CPU Fraction: 0 / 4000 = 0 %
|
||||||
|
Memory Fraction: 0 / 10000 = 0%
|
||||||
|
Node2 Score: 10 - (0-0)*10 = 10
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||||
|
test: "nothing scheduled, nothing requested",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 3000 / 4000= 75%
|
||||||
|
Memory Fraction: 5000 / 10000 = 50%
|
||||||
|
Node1 Score: 10 - (0.75-0.5)*10 = 7
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 3000 / 6000= 50%
|
||||||
|
Memory Fraction: 5000/10000 = 50%
|
||||||
|
Node2 Score: 10 - (0.5-0.5)*10 = 10
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}},
|
||||||
|
test: "nothing scheduled, resources requested, differently sized machines",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 0 / 4000= 0%
|
||||||
|
Memory Fraction: 0 / 10000 = 0%
|
||||||
|
Node1 Score: 10 - (0-0)*10 = 10
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 0 / 4000= 0%
|
||||||
|
Memory Fraction: 0 / 10000 = 0%
|
||||||
|
Node2 Score: 10 - (0-0)*10 = 10
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||||
|
test: "no resources requested, pods scheduled",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||||
|
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 0 / 20000 = 0%
|
||||||
|
Node1 Score: 10 - (0.6-0)*10 = 4
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 5000 / 20000 = 25%
|
||||||
|
Node2 Score: 10 - (0.6-0.25)*10 = 6
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
|
||||||
|
test: "no resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 5000 / 20000 = 25%
|
||||||
|
Node1 Score: 10 - (0.6-0.25)*10 = 6
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 10000 / 20000 = 50%
|
||||||
|
Node2 Score: 10 - (0.6-0.5)*10 = 9
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
|
||||||
|
test: "resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 5000 / 20000 = 25%
|
||||||
|
Node1 Score: 10 - (0.6-0.25)*10 = 6
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 10000 = 60%
|
||||||
|
Memory Fraction: 10000 / 50000 = 20%
|
||||||
|
Node2 Score: 10 - (0.6-0.2)*10 = 6
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
|
||||||
|
test: "resources requested, pods scheduled with resources, differently sized machines",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
|
||||||
|
Memory Fraction: 0 / 10000 = 0
|
||||||
|
Node1 Score: 0
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
|
||||||
|
Memory Fraction 5000 / 10000 = 50%
|
||||||
|
Node2 Score: 0
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuOnly},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "requested resources exceed node capacity",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "zero node resources, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
|
list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
79
plugin/pkg/scheduler/algorithm/priorities/image_locality.go
Normal file
79
plugin/pkg/scheduler/algorithm/priorities/image_locality.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
|
||||||
|
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
|
||||||
|
// based on the total size of those images.
|
||||||
|
// - If none of the images are present, this node will be given the lowest priority.
|
||||||
|
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
|
||||||
|
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
var sumSize int64
|
||||||
|
for i := range pod.Spec.Containers {
|
||||||
|
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
|
||||||
|
}
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: calculateScoreFromSize(sumSize),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
|
||||||
|
// 1. Split image size range into 10 buckets.
|
||||||
|
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
|
||||||
|
func calculateScoreFromSize(sumSize int64) int {
|
||||||
|
var score int
|
||||||
|
switch {
|
||||||
|
case sumSize == 0 || sumSize < minImgSize:
|
||||||
|
// score == 0 means none of the images required by this pod are present on this
|
||||||
|
// node or the total size of the images present is too small to be taken into further consideration.
|
||||||
|
score = 0
|
||||||
|
// If existing images' total size is larger than max, just make it highest priority.
|
||||||
|
case sumSize >= maxImgSize:
|
||||||
|
score = 10
|
||||||
|
default:
|
||||||
|
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
|
||||||
|
}
|
||||||
|
// Return which bucket the given size belongs to
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
|
||||||
|
func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
|
||||||
|
for _, image := range node.Status.Images {
|
||||||
|
for _, name := range image.Names {
|
||||||
|
if container.Image == name {
|
||||||
|
// Should return immediately.
|
||||||
|
return image.SizeBytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
182
plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
Normal file
182
plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestImageLocalityPriority(t *testing.T) {
|
||||||
|
test_40_250 := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Image: "gcr.io/40",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: "gcr.io/250",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
test_40_140 := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Image: "gcr.io/40",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: "gcr.io/140",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
test_min_max := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Image: "gcr.io/10",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: "gcr.io/2000",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
node_40_140_2000 := api.NodeStatus{
|
||||||
|
Images: []api.ContainerImage{
|
||||||
|
{
|
||||||
|
Names: []string{
|
||||||
|
"gcr.io/40",
|
||||||
|
"gcr.io/40:v1",
|
||||||
|
"gcr.io/40:v1",
|
||||||
|
},
|
||||||
|
SizeBytes: int64(40 * mb),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Names: []string{
|
||||||
|
"gcr.io/140",
|
||||||
|
"gcr.io/140:v1",
|
||||||
|
},
|
||||||
|
SizeBytes: int64(140 * mb),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Names: []string{
|
||||||
|
"gcr.io/2000",
|
||||||
|
},
|
||||||
|
SizeBytes: int64(2000 * mb),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
node_250_10 := api.NodeStatus{
|
||||||
|
Images: []api.ContainerImage{
|
||||||
|
{
|
||||||
|
Names: []string{
|
||||||
|
"gcr.io/250",
|
||||||
|
},
|
||||||
|
SizeBytes: int64(250 * mb),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Names: []string{
|
||||||
|
"gcr.io/10",
|
||||||
|
"gcr.io/10:v1",
|
||||||
|
},
|
||||||
|
SizeBytes: int64(10 * mb),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
// Pod: gcr.io/40 gcr.io/250
|
||||||
|
|
||||||
|
// Node1
|
||||||
|
// Image: gcr.io/40 40MB
|
||||||
|
// Score: (40M-23M)/97.7M + 1 = 1
|
||||||
|
|
||||||
|
// Node2
|
||||||
|
// Image: gcr.io/250 250MB
|
||||||
|
// Score: (250M-23M)/97.7M + 1 = 3
|
||||||
|
pod: &api.Pod{Spec: test_40_250},
|
||||||
|
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
|
||||||
|
test: "two images spread on two nodes, prefer the larger image one",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Pod: gcr.io/40 gcr.io/140
|
||||||
|
|
||||||
|
// Node1
|
||||||
|
// Image: gcr.io/40 40MB, gcr.io/140 140MB
|
||||||
|
// Score: (40M+140M-23M)/97.7M + 1 = 2
|
||||||
|
|
||||||
|
// Node2
|
||||||
|
// Image: not present
|
||||||
|
// Score: 0
|
||||||
|
pod: &api.Pod{Spec: test_40_140},
|
||||||
|
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "two images on one node, prefer this node",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Pod: gcr.io/2000 gcr.io/10
|
||||||
|
|
||||||
|
// Node1
|
||||||
|
// Image: gcr.io/2000 2000MB
|
||||||
|
// Score: 2000 > max score = 10
|
||||||
|
|
||||||
|
// Node2
|
||||||
|
// Image: gcr.io/10 10MB
|
||||||
|
// Score: 10 < min score = 0
|
||||||
|
pod: &api.Pod{Spec: test_min_max},
|
||||||
|
nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "if exceed limit, use limit",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
|
list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Sort(test.expectedList)
|
||||||
|
sort.Sort(list)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeImageNode(node string, status api.NodeStatus) *api.Node {
|
||||||
|
return &api.Node{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: node},
|
||||||
|
Status: status,
|
||||||
|
}
|
||||||
|
}
|
91
plugin/pkg/scheduler/algorithm/priorities/least_requested.go
Normal file
91
plugin/pkg/scheduler/algorithm/priorities/least_requested.go
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
|
||||||
|
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||||
|
// based on the minimum of the average of the fraction of requested to capacity.
|
||||||
|
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
||||||
|
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
var nonZeroRequest *schedulercache.Resource
|
||||||
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
|
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||||
|
} else {
|
||||||
|
// We couldn't parse metadata - fallback to computing it.
|
||||||
|
nonZeroRequest = getNonZeroRequests(pod)
|
||||||
|
}
|
||||||
|
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The unused capacity is calculated on a scale of 0-10
|
||||||
|
// 0 being the lowest priority and 10 being the highest.
|
||||||
|
// The more unused resources the higher the score is.
|
||||||
|
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
|
||||||
|
if capacity == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if requested > capacity {
|
||||||
|
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||||
|
requested, capacity, node)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return ((capacity - requested) * 10) / capacity
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculates host priority based on the amount of unused resources.
|
||||||
|
// 'node' has information about the resources on the node.
|
||||||
|
// 'pods' is a list of pods currently scheduled on the node.
|
||||||
|
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
allocatableResources := nodeInfo.AllocatableResource()
|
||||||
|
totalResources := *podRequests
|
||||||
|
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||||
|
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||||
|
|
||||||
|
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||||
|
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||||
|
if glog.V(10) {
|
||||||
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
|
// not logged. There is visible performance gain from it.
|
||||||
|
glog.V(10).Infof(
|
||||||
|
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||||
|
pod.Name, node.Name,
|
||||||
|
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||||
|
totalResources.MilliCPU, totalResources.Memory,
|
||||||
|
cpuScore, memoryScore,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: int((cpuScore + memoryScore) / 2),
|
||||||
|
}, nil
|
||||||
|
}
|
@ -0,0 +1,263 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLeastRequested(t *testing.T) {
|
||||||
|
labels1 := map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
labels2 := map[string]string{
|
||||||
|
"bar": "foo",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
machine1Spec := api.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
}
|
||||||
|
machine2Spec := api.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
}
|
||||||
|
noResources := api.PodSpec{
|
||||||
|
Containers: []api.Container{},
|
||||||
|
}
|
||||||
|
cpuOnly := api.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cpuOnly2 := cpuOnly
|
||||||
|
cpuOnly2.NodeName = "machine2"
|
||||||
|
cpuAndMemory := api.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("2000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("3000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores (remaining resources) on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||||
|
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||||
|
Node1 Score: (10 + 10) / 2 = 10
|
||||||
|
|
||||||
|
Node2 scores (remaining resources) on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||||
|
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||||
|
Node2 Score: (10 + 10) / 2 = 10
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||||
|
test: "nothing scheduled, nothing requested",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
|
||||||
|
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||||
|
Node1 Score: (2.5 + 5) / 2 = 3
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((6000 - 3000) *10) / 6000 = 5
|
||||||
|
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||||
|
Node2 Score: (5 + 5) / 2 = 5
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
|
||||||
|
test: "nothing scheduled, resources requested, differently sized machines",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||||
|
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||||
|
Node1 Score: (10 + 10) / 2 = 10
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 0) *10) / 4000 = 10
|
||||||
|
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||||
|
Node2 Score: (10 + 10) / 2 = 10
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
|
||||||
|
test: "no resources requested, pods scheduled",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||||
|
{Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((20000 - 0) *10) / 20000 = 10
|
||||||
|
Node1 Score: (4 + 10) / 2 = 7
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||||
|
Node2 Score: (4 + 7.5) / 2 = 5
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
|
||||||
|
test: "no resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||||
|
Node1 Score: (4 + 7.5) / 2 = 5
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((20000 - 10000) *10) / 20000 = 5
|
||||||
|
Node2 Score: (4 + 5) / 2 = 4
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
|
||||||
|
test: "resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
|
||||||
|
Node1 Score: (4 + 7.5) / 2 = 5
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((10000 - 6000) *10) / 10000 = 4
|
||||||
|
Memory Score: ((50000 - 10000) *10) / 50000 = 8
|
||||||
|
Node2 Score: (4 + 8) / 2 = 6
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
|
||||||
|
test: "resources requested, pods scheduled with resources, differently sized machines",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 6000) *10) / 4000 = 0
|
||||||
|
Memory Score: ((10000 - 0) *10) / 10000 = 10
|
||||||
|
Node1 Score: (0 + 10) / 2 = 5
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: ((4000 - 6000) *10) / 4000 = 0
|
||||||
|
Memory Score: ((10000 - 5000) *10) / 10000 = 5
|
||||||
|
Node2 Score: (0 + 5) / 2 = 2
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuOnly},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
|
||||||
|
test: "requested resources exceed node capacity",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "zero node resources, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
|
list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
94
plugin/pkg/scheduler/algorithm/priorities/most_requested.go
Normal file
94
plugin/pkg/scheduler/algorithm/priorities/most_requested.go
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
||||||
|
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||||
|
// based on the maximum of the average of the fraction of requested to capacity.
|
||||||
|
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
|
||||||
|
func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
var nonZeroRequest *schedulercache.Resource
|
||||||
|
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
||||||
|
nonZeroRequest = priorityMeta.nonZeroRequest
|
||||||
|
} else {
|
||||||
|
// We couldn't parse metadatat - fallback to computing it.
|
||||||
|
nonZeroRequest = getNonZeroRequests(pod)
|
||||||
|
}
|
||||||
|
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The used capacity is calculated on a scale of 0-10
|
||||||
|
// 0 being the lowest priority and 10 being the highest.
|
||||||
|
// The more resources are used the higher the score is. This function
|
||||||
|
// is almost a reversed version of least_requested_priority.calculatUnusedScore
|
||||||
|
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
|
||||||
|
// keep the final formula clean and not to modify the widely used (by users
|
||||||
|
// in their default scheduling policies) calculateUSedScore.
|
||||||
|
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
|
||||||
|
if capacity == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if requested > capacity {
|
||||||
|
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||||
|
requested, capacity, node)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return (requested * 10) / capacity
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
||||||
|
// 'pods' is a list of pods currently scheduled on the node.
|
||||||
|
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
allocatableResources := nodeInfo.AllocatableResource()
|
||||||
|
totalResources := *podRequests
|
||||||
|
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
||||||
|
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
||||||
|
|
||||||
|
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
||||||
|
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
||||||
|
if glog.V(10) {
|
||||||
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
|
// not logged. There is visible performance gain from it.
|
||||||
|
glog.V(10).Infof(
|
||||||
|
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
||||||
|
pod.Name, node.Name,
|
||||||
|
allocatableResources.MilliCPU, allocatableResources.Memory,
|
||||||
|
totalResources.MilliCPU, totalResources.Memory,
|
||||||
|
cpuScore, memoryScore,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: int((cpuScore + memoryScore) / 2),
|
||||||
|
}, nil
|
||||||
|
}
|
182
plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
Normal file
182
plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMostRequested(t *testing.T) {
|
||||||
|
labels1 := map[string]string{
|
||||||
|
"foo": "bar",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
labels2 := map[string]string{
|
||||||
|
"bar": "foo",
|
||||||
|
"baz": "blah",
|
||||||
|
}
|
||||||
|
noResources := api.PodSpec{
|
||||||
|
Containers: []api.Container{},
|
||||||
|
}
|
||||||
|
cpuOnly := api.PodSpec{
|
||||||
|
NodeName: "machine1",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cpuOnly2 := cpuOnly
|
||||||
|
cpuOnly2.NodeName = "machine2"
|
||||||
|
cpuAndMemory := api.PodSpec{
|
||||||
|
NodeName: "machine2",
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("2000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Requests: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("3000"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores (used resources) on 0-10 scale
|
||||||
|
CPU Score: (0 * 10 / 4000 = 0
|
||||||
|
Memory Score: (0 * 10) / 10000 = 0
|
||||||
|
Node1 Score: (0 + 0) / 2 = 0
|
||||||
|
|
||||||
|
Node2 scores (used resources) on 0-10 scale
|
||||||
|
CPU Score: (0 * 10 / 4000 = 0
|
||||||
|
Memory Score: (0 * 10 / 10000 = 0
|
||||||
|
Node2 Score: (0 + 0) / 2 = 0
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
|
||||||
|
test: "nothing scheduled, nothing requested",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: (3000 * 10 / 4000 = 7.5
|
||||||
|
Memory Score: (5000 * 10) / 10000 = 5
|
||||||
|
Node1 Score: (7.5 + 5) / 2 = 6
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: (3000 * 10 / 6000 = 5
|
||||||
|
Memory Score: (5000 * 10 / 10000 = 5
|
||||||
|
Node2 Score: (5 + 5) / 2 = 5
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
|
||||||
|
test: "nothing scheduled, resources requested, differently sized machines",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: (6000 * 10) / 10000 = 6
|
||||||
|
Memory Score: (0 * 10) / 20000 = 10
|
||||||
|
Node1 Score: (6 + 0) / 2 = 3
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: (6000 * 10) / 10000 = 6
|
||||||
|
Memory Score: (5000 * 10) / 20000 = 2.5
|
||||||
|
Node2 Score: (6 + 2.5) / 2 = 4
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
|
||||||
|
test: "no resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}},
|
||||||
|
{Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
{Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Node1 scores on 0-10 scale
|
||||||
|
CPU Score: (6000 * 10) / 10000 = 6
|
||||||
|
Memory Score: (5000 * 10) / 20000 = 2.5
|
||||||
|
Node1 Score: (6 + 2.5) / 2 = 4
|
||||||
|
|
||||||
|
Node2 scores on 0-10 scale
|
||||||
|
CPU Score: (6000 * 10) / 10000 = 6
|
||||||
|
Memory Score: (10000 * 10) / 20000 = 5
|
||||||
|
Node2 Score: (6 + 5) / 2 = 5
|
||||||
|
*/
|
||||||
|
pod: &api.Pod{Spec: cpuAndMemory},
|
||||||
|
nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
|
||||||
|
test: "resources requested, pods scheduled with resources",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: cpuOnly},
|
||||||
|
{Spec: cpuAndMemory},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
|
list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
60
plugin/pkg/scheduler/algorithm/priorities/node_label.go
Normal file
60
plugin/pkg/scheduler/algorithm/priorities/node_label.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
type NodeLabelPrioritizer struct {
|
||||||
|
label string
|
||||||
|
presence bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
||||||
|
labelPrioritizer := &NodeLabelPrioritizer{
|
||||||
|
label: label,
|
||||||
|
presence: presence,
|
||||||
|
}
|
||||||
|
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
|
||||||
|
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
|
||||||
|
// If presence is false, prioritizes nodes that do not have the specified label.
|
||||||
|
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
exists := labels.Set(node.Labels).Has(n.label)
|
||||||
|
score := 0
|
||||||
|
if (exists && n.presence) || (!exists && !n.presence) {
|
||||||
|
score = 10
|
||||||
|
}
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: score,
|
||||||
|
}, nil
|
||||||
|
}
|
121
plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
Normal file
121
plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewNodeLabelPriority(t *testing.T) {
|
||||||
|
label1 := map[string]string{"foo": "bar"}
|
||||||
|
label2 := map[string]string{"bar": "foo"}
|
||||||
|
label3 := map[string]string{"bar": "baz"}
|
||||||
|
tests := []struct {
|
||||||
|
nodes []*api.Node
|
||||||
|
label string
|
||||||
|
presence bool
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||||
|
label: "baz",
|
||||||
|
presence: true,
|
||||||
|
test: "no match found, presence true",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
label: "baz",
|
||||||
|
presence: false,
|
||||||
|
test: "no match found, presence false",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||||
|
label: "foo",
|
||||||
|
presence: true,
|
||||||
|
test: "one match found, presence true",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
label: "foo",
|
||||||
|
presence: false,
|
||||||
|
test: "one match found, presence false",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
label: "bar",
|
||||||
|
presence: true,
|
||||||
|
test: "two matches found, presence true",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
nodes: []*api.Node{
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}},
|
||||||
|
{ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}},
|
||||||
|
},
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
|
||||||
|
label: "bar",
|
||||||
|
presence: false,
|
||||||
|
test: "two matches found, presence false",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||||
|
list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
// sort the two lists to avoid failures on account of different ordering
|
||||||
|
sort.Sort(test.expectedList)
|
||||||
|
sort.Sort(list)
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2015 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
controllerRef := priorityutil.GetControllerRef(pod)
|
||||||
|
if controllerRef != nil {
|
||||||
|
// Ignore pods that are owned by other controller than ReplicationController
|
||||||
|
// or ReplicaSet.
|
||||||
|
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
|
||||||
|
controllerRef = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if controllerRef == nil {
|
||||||
|
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
|
||||||
|
if err != nil {
|
||||||
|
// If we cannot get annotation, assume it's schedulable there.
|
||||||
|
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||||
|
}
|
||||||
|
for i := range avoids.PreferAvoidPods {
|
||||||
|
avoid := &avoids.PreferAvoidPods[i]
|
||||||
|
if controllerRef != nil {
|
||||||
|
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
|
||||||
|
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
||||||
|
}
|
@ -0,0 +1,155 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNodePreferAvoidPriority(t *testing.T) {
|
||||||
|
annotations1 := map[string]string{
|
||||||
|
api.PreferAvoidPodsAnnotationKey: `
|
||||||
|
{
|
||||||
|
"preferAvoidPods": [
|
||||||
|
{
|
||||||
|
"podSignature": {
|
||||||
|
"podController": {
|
||||||
|
"apiVersion": "v1",
|
||||||
|
"kind": "ReplicationController",
|
||||||
|
"name": "foo",
|
||||||
|
"uid": "abcdef123456",
|
||||||
|
"controller": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"reason": "some reason",
|
||||||
|
"message": "some message"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`,
|
||||||
|
}
|
||||||
|
annotations2 := map[string]string{
|
||||||
|
api.PreferAvoidPodsAnnotationKey: `
|
||||||
|
{
|
||||||
|
"preferAvoidPods": [
|
||||||
|
{
|
||||||
|
"podSignature": {
|
||||||
|
"podController": {
|
||||||
|
"apiVersion": "v1",
|
||||||
|
"kind": "ReplicaSet",
|
||||||
|
"name": "foo",
|
||||||
|
"uid": "qwert12345",
|
||||||
|
"controller": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"reason": "some reason",
|
||||||
|
"message": "some message"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`,
|
||||||
|
}
|
||||||
|
testNodes := []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: "machine1", Annotations: annotations1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: "machine2", Annotations: annotations2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: "machine3"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
trueVar := true
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
nodes []*api.Node
|
||||||
|
expectedList schedulerapi.HostPriorityList
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pod: &api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Namespace: "default",
|
||||||
|
OwnerReferences: []api.OwnerReference{
|
||||||
|
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
nodes: testNodes,
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Namespace: "default",
|
||||||
|
OwnerReferences: []api.OwnerReference{
|
||||||
|
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
nodes: testNodes,
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
test: "ownership by random controller should be ignored",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Namespace: "default",
|
||||||
|
OwnerReferences: []api.OwnerReference{
|
||||||
|
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
nodes: testNodes,
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
|
||||||
|
test: "owner without Controller field set should be ignored",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Namespace: "default",
|
||||||
|
OwnerReferences: []api.OwnerReference{
|
||||||
|
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
nodes: testNodes,
|
||||||
|
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}},
|
||||||
|
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
|
||||||
|
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
// sort the two lists to avoid failures on account of different ordering
|
||||||
|
sort.Sort(test.expectedList)
|
||||||
|
sort.Sort(list)
|
||||||
|
if !reflect.DeepEqual(test.expectedList, list) {
|
||||||
|
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,367 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2014 The Kubernetes Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package priorities
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/golang/glog"
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
|
||||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
|
||||||
)
|
|
||||||
|
|
||||||
func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
|
|
||||||
result := &schedulercache.Resource{}
|
|
||||||
for i := range pod.Spec.Containers {
|
|
||||||
container := &pod.Spec.Containers[i]
|
|
||||||
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
|
|
||||||
result.MilliCPU += cpu
|
|
||||||
result.Memory += memory
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// The unused capacity is calculated on a scale of 0-10
|
|
||||||
// 0 being the lowest priority and 10 being the highest.
|
|
||||||
// The more unused resources the higher the score is.
|
|
||||||
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
|
|
||||||
if capacity == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
if requested > capacity {
|
|
||||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
|
||||||
requested, capacity, node)
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return ((capacity - requested) * 10) / capacity
|
|
||||||
}
|
|
||||||
|
|
||||||
// The used capacity is calculated on a scale of 0-10
|
|
||||||
// 0 being the lowest priority and 10 being the highest.
|
|
||||||
// The more resources are used the higher the score is. This function
|
|
||||||
// is almost a reversed version of calculatUnusedScore (10 - calculateUnusedScore).
|
|
||||||
// The main difference is in rounding. It was added to keep the
|
|
||||||
// final formula clean and not to modify the widely used (by users
|
|
||||||
// in their default scheduling policies) calculateUSedScore.
|
|
||||||
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
|
|
||||||
if capacity == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
if requested > capacity {
|
|
||||||
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
|
||||||
requested, capacity, node)
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return (requested * 10) / capacity
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculates host priority based on the amount of unused resources.
|
|
||||||
// 'node' has information about the resources on the node.
|
|
||||||
// 'pods' is a list of pods currently scheduled on the node.
|
|
||||||
func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
allocatableResources := nodeInfo.AllocatableResource()
|
|
||||||
totalResources := *podRequests
|
|
||||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
|
||||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
|
||||||
|
|
||||||
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
|
||||||
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
|
||||||
if glog.V(10) {
|
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
|
||||||
// not logged. There is visible performance gain from it.
|
|
||||||
glog.V(10).Infof(
|
|
||||||
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
|
||||||
pod.Name, node.Name,
|
|
||||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
|
||||||
totalResources.MilliCPU, totalResources.Memory,
|
|
||||||
cpuScore, memoryScore,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return schedulerapi.HostPriority{
|
|
||||||
Host: node.Name,
|
|
||||||
Score: int((cpuScore + memoryScore) / 2),
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate the resource used on a node. 'node' has information about the resources on the node.
|
|
||||||
// 'pods' is a list of pods currently scheduled on the node.
|
|
||||||
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
allocatableResources := nodeInfo.AllocatableResource()
|
|
||||||
totalResources := *podRequests
|
|
||||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
|
||||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
|
||||||
|
|
||||||
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
|
|
||||||
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
|
|
||||||
if glog.V(10) {
|
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
|
||||||
// not logged. There is visible performance gain from it.
|
|
||||||
glog.V(10).Infof(
|
|
||||||
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
|
|
||||||
pod.Name, node.Name,
|
|
||||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
|
||||||
totalResources.MilliCPU, totalResources.Memory,
|
|
||||||
cpuScore, memoryScore,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return schedulerapi.HostPriority{
|
|
||||||
Host: node.Name,
|
|
||||||
Score: int((cpuScore + memoryScore) / 2),
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
|
|
||||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
|
||||||
// based on the minimum of the average of the fraction of requested to capacity.
|
|
||||||
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
|
|
||||||
func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
var nonZeroRequest *schedulercache.Resource
|
|
||||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
|
||||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
|
||||||
} else {
|
|
||||||
// We couldn't parse metadata - fallback to computing it.
|
|
||||||
nonZeroRequest = getNonZeroRequests(pod)
|
|
||||||
}
|
|
||||||
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
|
|
||||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
|
||||||
// based on the maximum of the average of the fraction of requested to capacity.
|
|
||||||
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
|
|
||||||
func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
var nonZeroRequest *schedulercache.Resource
|
|
||||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
|
||||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
|
||||||
} else {
|
|
||||||
// We couldn't parse metadatat - fallback to computing it.
|
|
||||||
nonZeroRequest = getNonZeroRequests(pod)
|
|
||||||
}
|
|
||||||
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
type NodeLabelPrioritizer struct {
|
|
||||||
label string
|
|
||||||
presence bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
|
||||||
labelPrioritizer := &NodeLabelPrioritizer{
|
|
||||||
label: label,
|
|
||||||
presence: presence,
|
|
||||||
}
|
|
||||||
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
|
|
||||||
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
|
|
||||||
// If presence is false, prioritizes nodes that do not have the specified label.
|
|
||||||
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
exists := labels.Set(node.Labels).Has(n.label)
|
|
||||||
score := 0
|
|
||||||
if (exists && n.presence) || (!exists && !n.presence) {
|
|
||||||
score = 10
|
|
||||||
}
|
|
||||||
return schedulerapi.HostPriority{
|
|
||||||
Host: node.Name,
|
|
||||||
Score: score,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
|
|
||||||
const (
|
|
||||||
mb int64 = 1024 * 1024
|
|
||||||
minImgSize int64 = 23 * mb
|
|
||||||
maxImgSize int64 = 1000 * mb
|
|
||||||
)
|
|
||||||
|
|
||||||
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
|
|
||||||
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
|
|
||||||
// based on the total size of those images.
|
|
||||||
// - If none of the images are present, this node will be given the lowest priority.
|
|
||||||
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
|
|
||||||
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
var sumSize int64
|
|
||||||
for i := range pod.Spec.Containers {
|
|
||||||
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
|
|
||||||
}
|
|
||||||
return schedulerapi.HostPriority{
|
|
||||||
Host: node.Name,
|
|
||||||
Score: calculateScoreFromSize(sumSize),
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
|
|
||||||
func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 {
|
|
||||||
for _, image := range node.Status.Images {
|
|
||||||
for _, name := range image.Names {
|
|
||||||
if container.Image == name {
|
|
||||||
// Should return immediately.
|
|
||||||
return image.SizeBytes
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
|
|
||||||
// 1. Split image size range into 10 buckets.
|
|
||||||
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
|
|
||||||
func calculateScoreFromSize(sumSize int64) int {
|
|
||||||
var score int
|
|
||||||
switch {
|
|
||||||
case sumSize == 0 || sumSize < minImgSize:
|
|
||||||
// score == 0 means none of the images required by this pod are present on this
|
|
||||||
// node or the total size of the images present is too small to be taken into further consideration.
|
|
||||||
score = 0
|
|
||||||
// If existing images' total size is larger than max, just make it highest priority.
|
|
||||||
case sumSize >= maxImgSize:
|
|
||||||
score = 10
|
|
||||||
default:
|
|
||||||
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
|
|
||||||
}
|
|
||||||
// Return which bucket the given size belongs to
|
|
||||||
return score
|
|
||||||
}
|
|
||||||
|
|
||||||
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
|
|
||||||
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
|
|
||||||
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
|
|
||||||
// close the two metrics are to each other.
|
|
||||||
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
|
|
||||||
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
|
|
||||||
func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
var nonZeroRequest *schedulercache.Resource
|
|
||||||
if priorityMeta, ok := meta.(*priorityMetadata); ok {
|
|
||||||
nonZeroRequest = priorityMeta.nonZeroRequest
|
|
||||||
} else {
|
|
||||||
// We couldn't parse metadatat - fallback to computing it.
|
|
||||||
nonZeroRequest = getNonZeroRequests(pod)
|
|
||||||
}
|
|
||||||
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
|
|
||||||
}
|
|
||||||
|
|
||||||
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
allocatableResources := nodeInfo.AllocatableResource()
|
|
||||||
totalResources := *podRequests
|
|
||||||
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
|
|
||||||
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
|
|
||||||
|
|
||||||
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
|
|
||||||
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
|
|
||||||
score := int(0)
|
|
||||||
if cpuFraction >= 1 || memoryFraction >= 1 {
|
|
||||||
// if requested >= capacity, the corresponding host should never be preferred.
|
|
||||||
score = 0
|
|
||||||
} else {
|
|
||||||
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
|
|
||||||
// respectively. Multilying the absolute value of the difference by 10 scales the value to
|
|
||||||
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
|
|
||||||
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
|
|
||||||
diff := math.Abs(cpuFraction - memoryFraction)
|
|
||||||
score = int(10 - diff*10)
|
|
||||||
}
|
|
||||||
if glog.V(10) {
|
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
|
||||||
// not logged. There is visible performance gain from it.
|
|
||||||
glog.V(10).Infof(
|
|
||||||
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
|
|
||||||
pod.Name, node.Name,
|
|
||||||
allocatableResources.MilliCPU, allocatableResources.Memory,
|
|
||||||
totalResources.MilliCPU, totalResources.Memory,
|
|
||||||
score,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return schedulerapi.HostPriority{
|
|
||||||
Host: node.Name,
|
|
||||||
Score: score,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fractionOfCapacity(requested, capacity int64) float64 {
|
|
||||||
if capacity == 0 {
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
return float64(requested) / float64(capacity)
|
|
||||||
}
|
|
||||||
|
|
||||||
func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
|
||||||
node := nodeInfo.Node()
|
|
||||||
if node == nil {
|
|
||||||
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
controllerRef := priorityutil.GetControllerRef(pod)
|
|
||||||
if controllerRef != nil {
|
|
||||||
// Ignore pods that are owned by other controller than ReplicationController
|
|
||||||
// or ReplicaSet.
|
|
||||||
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
|
|
||||||
controllerRef = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if controllerRef == nil {
|
|
||||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations)
|
|
||||||
if err != nil {
|
|
||||||
// If we cannot get annotation, assume it's schedulable there.
|
|
||||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
|
||||||
}
|
|
||||||
for i := range avoids.PreferAvoidPods {
|
|
||||||
avoid := &avoids.PreferAvoidPods[i]
|
|
||||||
if controllerRef != nil {
|
|
||||||
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
|
|
||||||
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
60
plugin/pkg/scheduler/algorithm/priorities/test_util.go
Normal file
60
plugin/pkg/scheduler/algorithm/priorities/test_util.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package priorities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeNode(node string, milliCPU, memory int64) *api.Node {
|
||||||
|
return &api.Node{
|
||||||
|
ObjectMeta: api.ObjectMeta{Name: node},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Capacity: api.ResourceList{
|
||||||
|
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||||
|
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||||
|
},
|
||||||
|
Allocatable: api.ResourceList{
|
||||||
|
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
|
||||||
|
"memory": *resource.NewQuantity(memory, resource.BinarySI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
|
||||||
|
return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
||||||
|
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
||||||
|
for i := range nodes {
|
||||||
|
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
result = append(result, hostResult)
|
||||||
|
}
|
||||||
|
if reduceFn != nil {
|
||||||
|
if err := reduceFn(pod, result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user