From 802ebf01e2ed7bced03d6f7b9d406b5caa4b0ada Mon Sep 17 00:00:00 2001 From: gmarek Date: Wed, 26 Oct 2016 11:51:11 +0200 Subject: [PATCH] split scheduler priorities --- .../pkg/scheduler/algorithm/priorities/BUILD | 15 +- .../balanced_resource_allocation.go | 116 ++ .../balanced_resource_allocation_test.go | 263 +++++ .../algorithm/priorities/image_locality.go | 79 ++ .../priorities/image_locality_test.go | 182 +++ .../algorithm/priorities/least_requested.go | 91 ++ .../priorities/least_requested_test.go | 263 +++++ .../algorithm/priorities/most_requested.go | 94 ++ .../priorities/most_requested_test.go | 182 +++ .../algorithm/priorities/node_label.go | 60 + .../algorithm/priorities/node_label_test.go | 121 ++ .../priorities/node_prefer_avoid_pods.go | 60 + .../priorities/node_prefer_avoid_pods_test.go | 155 +++ .../algorithm/priorities/priorities.go | 367 ------ .../algorithm/priorities/priorities_test.go | 1046 ----------------- .../algorithm/priorities/test_util.go | 60 + 16 files changed, 1740 insertions(+), 1414 deletions(-) create mode 100644 plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/image_locality.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/least_requested.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/most_requested.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/node_label.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/node_label_test.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go delete mode 100644 plugin/pkg/scheduler/algorithm/priorities/priorities.go create mode 100644 plugin/pkg/scheduler/algorithm/priorities/test_util.go diff --git a/plugin/pkg/scheduler/algorithm/priorities/BUILD b/plugin/pkg/scheduler/algorithm/priorities/BUILD index 1789c46a0e3..879e1e3f496 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/BUILD +++ b/plugin/pkg/scheduler/algorithm/priorities/BUILD @@ -13,16 +13,23 @@ load( go_library( name = "go_default_library", srcs = [ + "balanced_resource_allocation.go", + "image_locality.go", "interpod_affinity.go", + "least_requested.go", "metadata.go", + "most_requested.go", "node_affinity.go", - "priorities.go", + "node_label.go", + "node_prefer_avoid_pods.go", "selector_spreading.go", "taint_toleration.go", + "test_util.go", ], tags = ["automanaged"], deps = [ "//pkg/api:go_default_library", + "//pkg/api/resource:go_default_library", "//pkg/api/unversioned:go_default_library", "//pkg/labels:go_default_library", "//pkg/util/node:go_default_library", @@ -39,8 +46,14 @@ go_library( go_test( name = "go_default_test", srcs = [ + "balanced_resource_allocation_test.go", + "image_locality_test.go", "interpod_affinity_test.go", + "least_requested_test.go", + "most_requested_test.go", "node_affinity_test.go", + "node_label_test.go", + "node_prefer_avoid_pods_test.go", "priorities_test.go", "selector_spreading_test.go", "taint_toleration_test.go", diff --git a/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go new file mode 100644 index 00000000000..fedc16463c6 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation.go @@ -0,0 +1,116 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + "math" + + "k8s.io/kubernetes/pkg/api" + priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" + + "github.com/golang/glog" +) + +// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range. +const ( + mb int64 = 1024 * 1024 + minImgSize int64 = 23 * mb + maxImgSize int64 = 1000 * mb +) + +// Also used in most/least_requested nad metadata. +// TODO: despaghettify it +func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource { + result := &schedulercache.Resource{} + for i := range pod.Spec.Containers { + container := &pod.Spec.Containers[i] + cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests) + result.MilliCPU += cpu + result.Memory += memory + } + return result +} + +func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + allocatableResources := nodeInfo.AllocatableResource() + totalResources := *podRequests + totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU + totalResources.Memory += nodeInfo.NonZeroRequest().Memory + + cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU) + memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory) + score := int(0) + if cpuFraction >= 1 || memoryFraction >= 1 { + // if requested >= capacity, the corresponding host should never be preferred. + score = 0 + } else { + // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1 + // respectively. Multilying the absolute value of the difference by 10 scales the value to + // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from + // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced. + diff := math.Abs(cpuFraction - memoryFraction) + score = int(10 - diff*10) + } + if glog.V(10) { + // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is + // not logged. There is visible performance gain from it. + glog.V(10).Infof( + "%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d", + pod.Name, node.Name, + allocatableResources.MilliCPU, allocatableResources.Memory, + totalResources.MilliCPU, totalResources.Memory, + score, + ) + } + + return schedulerapi.HostPriority{ + Host: node.Name, + Score: score, + }, nil +} + +func fractionOfCapacity(requested, capacity int64) float64 { + if capacity == 0 { + return 1 + } + return float64(requested) / float64(capacity) +} + +// BalancedResourceAllocation favors nodes with balanced resource usage rate. +// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority. +// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how +// close the two metrics are to each other. +// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by: +// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization" +func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + var nonZeroRequest *schedulercache.Resource + if priorityMeta, ok := meta.(*priorityMetadata); ok { + nonZeroRequest = priorityMeta.nonZeroRequest + } else { + // We couldn't parse metadatat - fallback to computing it. + nonZeroRequest = getNonZeroRequests(pod) + } + return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo) +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go new file mode 100644 index 00000000000..4003ded9a8e --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go @@ -0,0 +1,263 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "testing" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestBalancedResourceAllocation(t *testing.T) { + labels1 := map[string]string{ + "foo": "bar", + "baz": "blah", + } + labels2 := map[string]string{ + "bar": "foo", + "baz": "blah", + } + machine1Spec := api.PodSpec{ + NodeName: "machine1", + } + machine2Spec := api.PodSpec{ + NodeName: "machine2", + } + noResources := api.PodSpec{ + Containers: []api.Container{}, + } + cpuOnly := api.PodSpec{ + NodeName: "machine1", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + }, + } + cpuOnly2 := cpuOnly + cpuOnly2.NodeName = "machine2" + cpuAndMemory := api.PodSpec{ + NodeName: "machine2", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("2000"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("3000"), + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []*api.Node + expectedList schedulerapi.HostPriorityList + test string + }{ + { + /* + Node1 scores (remaining resources) on 0-10 scale + CPU Fraction: 0 / 4000 = 0% + Memory Fraction: 0 / 10000 = 0% + Node1 Score: 10 - (0-0)*10 = 10 + + Node2 scores (remaining resources) on 0-10 scale + CPU Fraction: 0 / 4000 = 0 % + Memory Fraction: 0 / 10000 = 0% + Node2 Score: 10 - (0-0)*10 = 10 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, + test: "nothing scheduled, nothing requested", + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 3000 / 4000= 75% + Memory Fraction: 5000 / 10000 = 50% + Node1 Score: 10 - (0.75-0.5)*10 = 7 + + Node2 scores on 0-10 scale + CPU Fraction: 3000 / 6000= 50% + Memory Fraction: 5000/10000 = 50% + Node2 Score: 10 - (0.5-0.5)*10 = 10 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}}, + test: "nothing scheduled, resources requested, differently sized machines", + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 0 / 4000= 0% + Memory Fraction: 0 / 10000 = 0% + Node1 Score: 10 - (0-0)*10 = 10 + + Node2 scores on 0-10 scale + CPU Fraction: 0 / 4000= 0% + Memory Fraction: 0 / 10000 = 0% + Node2 Score: 10 - (0-0)*10 = 10 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, + test: "no resources requested, pods scheduled", + pods: []*api.Pod{ + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 0 / 20000 = 0% + Node1 Score: 10 - (0.6-0)*10 = 4 + + Node2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Node2 Score: 10 - (0.6-0.25)*10 = 6 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}}, + test: "no resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Node1 Score: 10 - (0.6-0.25)*10 = 6 + + Node2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 10000 / 20000 = 50% + Node2 Score: 10 - (0.6-0.5)*10 = 9 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}}, + test: "resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Node1 Score: 10 - (0.6-0.25)*10 = 6 + + Node2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 10000 / 50000 = 20% + Node2 Score: 10 - (0.6-0.2)*10 = 6 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}}, + test: "resources requested, pods scheduled with resources, differently sized machines", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 + Memory Fraction: 0 / 10000 = 0 + Node1 Score: 0 + + Node2 scores on 0-10 scale + CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 + Memory Fraction 5000 / 10000 = 50% + Node2 Score: 0 + */ + pod: &api.Pod{Spec: cpuOnly}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, + test: "requested resources exceed node capacity", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, + test: "zero node resources, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) + list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/image_locality.go b/plugin/pkg/scheduler/algorithm/priorities/image_locality.go new file mode 100644 index 00000000000..0bdd3ba3980 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/image_locality.go @@ -0,0 +1,79 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images. +// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10 +// based on the total size of those images. +// - If none of the images are present, this node will be given the lowest priority. +// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority. +func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + var sumSize int64 + for i := range pod.Spec.Containers { + sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i]) + } + return schedulerapi.HostPriority{ + Host: node.Name, + Score: calculateScoreFromSize(sumSize), + }, nil +} + +// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node. +// 1. Split image size range into 10 buckets. +// 2. Decide the priority of a given sumSize based on which bucket it belongs to. +func calculateScoreFromSize(sumSize int64) int { + var score int + switch { + case sumSize == 0 || sumSize < minImgSize: + // score == 0 means none of the images required by this pod are present on this + // node or the total size of the images present is too small to be taken into further consideration. + score = 0 + // If existing images' total size is larger than max, just make it highest priority. + case sumSize >= maxImgSize: + score = 10 + default: + score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1) + } + // Return which bucket the given size belongs to + return score +} + +// checkContainerImageOnNode checks if a container image is present on a node and returns its size. +func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 { + for _, image := range node.Status.Images { + for _, name := range image.Names { + if container.Image == name { + // Should return immediately. + return image.SizeBytes + } + } + } + return 0 +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go b/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go new file mode 100644 index 00000000000..0921e70ab92 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go @@ -0,0 +1,182 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "sort" + "testing" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestImageLocalityPriority(t *testing.T) { + test_40_250 := api.PodSpec{ + Containers: []api.Container{ + { + Image: "gcr.io/40", + }, + { + Image: "gcr.io/250", + }, + }, + } + + test_40_140 := api.PodSpec{ + Containers: []api.Container{ + { + Image: "gcr.io/40", + }, + { + Image: "gcr.io/140", + }, + }, + } + + test_min_max := api.PodSpec{ + Containers: []api.Container{ + { + Image: "gcr.io/10", + }, + { + Image: "gcr.io/2000", + }, + }, + } + + node_40_140_2000 := api.NodeStatus{ + Images: []api.ContainerImage{ + { + Names: []string{ + "gcr.io/40", + "gcr.io/40:v1", + "gcr.io/40:v1", + }, + SizeBytes: int64(40 * mb), + }, + { + Names: []string{ + "gcr.io/140", + "gcr.io/140:v1", + }, + SizeBytes: int64(140 * mb), + }, + { + Names: []string{ + "gcr.io/2000", + }, + SizeBytes: int64(2000 * mb), + }, + }, + } + + node_250_10 := api.NodeStatus{ + Images: []api.ContainerImage{ + { + Names: []string{ + "gcr.io/250", + }, + SizeBytes: int64(250 * mb), + }, + { + Names: []string{ + "gcr.io/10", + "gcr.io/10:v1", + }, + SizeBytes: int64(10 * mb), + }, + }, + } + + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []*api.Node + expectedList schedulerapi.HostPriorityList + test string + }{ + { + // Pod: gcr.io/40 gcr.io/250 + + // Node1 + // Image: gcr.io/40 40MB + // Score: (40M-23M)/97.7M + 1 = 1 + + // Node2 + // Image: gcr.io/250 250MB + // Score: (250M-23M)/97.7M + 1 = 3 + pod: &api.Pod{Spec: test_40_250}, + nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}}, + test: "two images spread on two nodes, prefer the larger image one", + }, + { + // Pod: gcr.io/40 gcr.io/140 + + // Node1 + // Image: gcr.io/40 40MB, gcr.io/140 140MB + // Score: (40M+140M-23M)/97.7M + 1 = 2 + + // Node2 + // Image: not present + // Score: 0 + pod: &api.Pod{Spec: test_40_140}, + nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}}, + test: "two images on one node, prefer this node", + }, + { + // Pod: gcr.io/2000 gcr.io/10 + + // Node1 + // Image: gcr.io/2000 2000MB + // Score: 2000 > max score = 10 + + // Node2 + // Image: gcr.io/10 10MB + // Score: 10 < min score = 0 + pod: &api.Pod{Spec: test_min_max}, + nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}}, + test: "if exceed limit, use limit", + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) + list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + sort.Sort(test.expectedList) + sort.Sort(list) + + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} + +func makeImageNode(node string, status api.NodeStatus) *api.Node { + return &api.Node{ + ObjectMeta: api.ObjectMeta{Name: node}, + Status: status, + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/least_requested.go b/plugin/pkg/scheduler/algorithm/priorities/least_requested.go new file mode 100644 index 00000000000..4e8b4289799 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/least_requested.go @@ -0,0 +1,91 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" + + "github.com/golang/glog" +) + +// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources. +// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes +// based on the minimum of the average of the fraction of requested to capacity. +// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2 +func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + var nonZeroRequest *schedulercache.Resource + if priorityMeta, ok := meta.(*priorityMetadata); ok { + nonZeroRequest = priorityMeta.nonZeroRequest + } else { + // We couldn't parse metadata - fallback to computing it. + nonZeroRequest = getNonZeroRequests(pod) + } + return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo) +} + +// The unused capacity is calculated on a scale of 0-10 +// 0 being the lowest priority and 10 being the highest. +// The more unused resources the higher the score is. +func calculateUnusedScore(requested int64, capacity int64, node string) int64 { + if capacity == 0 { + return 0 + } + if requested > capacity { + glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s", + requested, capacity, node) + return 0 + } + return ((capacity - requested) * 10) / capacity +} + +// Calculates host priority based on the amount of unused resources. +// 'node' has information about the resources on the node. +// 'pods' is a list of pods currently scheduled on the node. +func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + allocatableResources := nodeInfo.AllocatableResource() + totalResources := *podRequests + totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU + totalResources.Memory += nodeInfo.NonZeroRequest().Memory + + cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) + memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name) + if glog.V(10) { + // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is + // not logged. There is visible performance gain from it. + glog.V(10).Infof( + "%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", + pod.Name, node.Name, + allocatableResources.MilliCPU, allocatableResources.Memory, + totalResources.MilliCPU, totalResources.Memory, + cpuScore, memoryScore, + ) + } + + return schedulerapi.HostPriority{ + Host: node.Name, + Score: int((cpuScore + memoryScore) / 2), + }, nil +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go b/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go new file mode 100644 index 00000000000..55ebdb8b94a --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go @@ -0,0 +1,263 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "testing" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestLeastRequested(t *testing.T) { + labels1 := map[string]string{ + "foo": "bar", + "baz": "blah", + } + labels2 := map[string]string{ + "bar": "foo", + "baz": "blah", + } + machine1Spec := api.PodSpec{ + NodeName: "machine1", + } + machine2Spec := api.PodSpec{ + NodeName: "machine2", + } + noResources := api.PodSpec{ + Containers: []api.Container{}, + } + cpuOnly := api.PodSpec{ + NodeName: "machine1", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + }, + } + cpuOnly2 := cpuOnly + cpuOnly2.NodeName = "machine2" + cpuAndMemory := api.PodSpec{ + NodeName: "machine2", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("2000"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("3000"), + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []*api.Node + expectedList schedulerapi.HostPriorityList + test string + }{ + { + /* + Node1 scores (remaining resources) on 0-10 scale + CPU Score: ((4000 - 0) *10) / 4000 = 10 + Memory Score: ((10000 - 0) *10) / 10000 = 10 + Node1 Score: (10 + 10) / 2 = 10 + + Node2 scores (remaining resources) on 0-10 scale + CPU Score: ((4000 - 0) *10) / 4000 = 10 + Memory Score: ((10000 - 0) *10) / 10000 = 10 + Node2 Score: (10 + 10) / 2 = 10 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, + test: "nothing scheduled, nothing requested", + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((4000 - 3000) *10) / 4000 = 2.5 + Memory Score: ((10000 - 5000) *10) / 10000 = 5 + Node1 Score: (2.5 + 5) / 2 = 3 + + Node2 scores on 0-10 scale + CPU Score: ((6000 - 3000) *10) / 6000 = 5 + Memory Score: ((10000 - 5000) *10) / 10000 = 5 + Node2 Score: (5 + 5) / 2 = 5 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}}, + test: "nothing scheduled, resources requested, differently sized machines", + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((4000 - 0) *10) / 4000 = 10 + Memory Score: ((10000 - 0) *10) / 10000 = 10 + Node1 Score: (10 + 10) / 2 = 10 + + Node2 scores on 0-10 scale + CPU Score: ((4000 - 0) *10) / 4000 = 10 + Memory Score: ((10000 - 0) *10) / 10000 = 10 + Node2 Score: (10 + 10) / 2 = 10 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, + test: "no resources requested, pods scheduled", + pods: []*api.Pod{ + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((20000 - 0) *10) / 20000 = 10 + Node1 Score: (4 + 10) / 2 = 7 + + Node2 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 + Node2 Score: (4 + 7.5) / 2 = 5 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}}, + test: "no resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 + Node1 Score: (4 + 7.5) / 2 = 5 + + Node2 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((20000 - 10000) *10) / 20000 = 5 + Node2 Score: (4 + 5) / 2 = 4 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}}, + test: "resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 + Node1 Score: (4 + 7.5) / 2 = 5 + + Node2 scores on 0-10 scale + CPU Score: ((10000 - 6000) *10) / 10000 = 4 + Memory Score: ((50000 - 10000) *10) / 50000 = 8 + Node2 Score: (4 + 8) / 2 = 6 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}}, + test: "resources requested, pods scheduled with resources, differently sized machines", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: ((4000 - 6000) *10) / 4000 = 0 + Memory Score: ((10000 - 0) *10) / 10000 = 10 + Node1 Score: (0 + 10) / 2 = 5 + + Node2 scores on 0-10 scale + CPU Score: ((4000 - 6000) *10) / 4000 = 0 + Memory Score: ((10000 - 5000) *10) / 10000 = 5 + Node2 Score: (0 + 5) / 2 = 2 + */ + pod: &api.Pod{Spec: cpuOnly}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}}, + test: "requested resources exceed node capacity", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, + test: "zero node resources, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) + list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/most_requested.go b/plugin/pkg/scheduler/algorithm/priorities/most_requested.go new file mode 100644 index 00000000000..426cb6ca449 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/most_requested.go @@ -0,0 +1,94 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" + + "github.com/golang/glog" +) + +// MostRequestedPriority is a priority function that favors nodes with most requested resources. +// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes +// based on the maximum of the average of the fraction of requested to capacity. +// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2 +func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + var nonZeroRequest *schedulercache.Resource + if priorityMeta, ok := meta.(*priorityMetadata); ok { + nonZeroRequest = priorityMeta.nonZeroRequest + } else { + // We couldn't parse metadatat - fallback to computing it. + nonZeroRequest = getNonZeroRequests(pod) + } + return calculateUsedPriority(pod, nonZeroRequest, nodeInfo) +} + +// The used capacity is calculated on a scale of 0-10 +// 0 being the lowest priority and 10 being the highest. +// The more resources are used the higher the score is. This function +// is almost a reversed version of least_requested_priority.calculatUnusedScore +// (10 - calculateUnusedScore). The main difference is in rounding. It was added to +// keep the final formula clean and not to modify the widely used (by users +// in their default scheduling policies) calculateUSedScore. +func calculateUsedScore(requested int64, capacity int64, node string) int64 { + if capacity == 0 { + return 0 + } + if requested > capacity { + glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s", + requested, capacity, node) + return 0 + } + return (requested * 10) / capacity +} + +// Calculate the resource used on a node. 'node' has information about the resources on the node. +// 'pods' is a list of pods currently scheduled on the node. +func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + allocatableResources := nodeInfo.AllocatableResource() + totalResources := *podRequests + totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU + totalResources.Memory += nodeInfo.NonZeroRequest().Memory + + cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) + memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name) + if glog.V(10) { + // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is + // not logged. There is visible performance gain from it. + glog.V(10).Infof( + "%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", + pod.Name, node.Name, + allocatableResources.MilliCPU, allocatableResources.Memory, + totalResources.MilliCPU, totalResources.Memory, + cpuScore, memoryScore, + ) + } + + return schedulerapi.HostPriority{ + Host: node.Name, + Score: int((cpuScore + memoryScore) / 2), + }, nil +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go b/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go new file mode 100644 index 00000000000..a11aaf5e3c5 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go @@ -0,0 +1,182 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "testing" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestMostRequested(t *testing.T) { + labels1 := map[string]string{ + "foo": "bar", + "baz": "blah", + } + labels2 := map[string]string{ + "bar": "foo", + "baz": "blah", + } + noResources := api.PodSpec{ + Containers: []api.Container{}, + } + cpuOnly := api.PodSpec{ + NodeName: "machine1", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("0"), + }, + }, + }, + }, + } + cpuOnly2 := cpuOnly + cpuOnly2.NodeName = "machine2" + cpuAndMemory := api.PodSpec{ + NodeName: "machine2", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("2000"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("3000"), + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []*api.Node + expectedList schedulerapi.HostPriorityList + test string + }{ + { + /* + Node1 scores (used resources) on 0-10 scale + CPU Score: (0 * 10 / 4000 = 0 + Memory Score: (0 * 10) / 10000 = 0 + Node1 Score: (0 + 0) / 2 = 0 + + Node2 scores (used resources) on 0-10 scale + CPU Score: (0 * 10 / 4000 = 0 + Memory Score: (0 * 10 / 10000 = 0 + Node2 Score: (0 + 0) / 2 = 0 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, + test: "nothing scheduled, nothing requested", + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: (3000 * 10 / 4000 = 7.5 + Memory Score: (5000 * 10) / 10000 = 5 + Node1 Score: (7.5 + 5) / 2 = 6 + + Node2 scores on 0-10 scale + CPU Score: (3000 * 10 / 6000 = 5 + Memory Score: (5000 * 10 / 10000 = 5 + Node2 Score: (5 + 5) / 2 = 5 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}}, + test: "nothing scheduled, resources requested, differently sized machines", + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: (6000 * 10) / 10000 = 6 + Memory Score: (0 * 10) / 20000 = 10 + Node1 Score: (6 + 0) / 2 = 3 + + Node2 scores on 0-10 scale + CPU Score: (6000 * 10) / 10000 = 6 + Memory Score: (5000 * 10) / 20000 = 2.5 + Node2 Score: (6 + 2.5) / 2 = 4 + */ + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}}, + test: "no resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Node1 scores on 0-10 scale + CPU Score: (6000 * 10) / 10000 = 6 + Memory Score: (5000 * 10) / 20000 = 2.5 + Node1 Score: (6 + 2.5) / 2 = 4 + + Node2 scores on 0-10 scale + CPU Score: (6000 * 10) / 10000 = 6 + Memory Score: (10000 * 10) / 20000 = 5 + Node2 Score: (6 + 5) / 2 = 5 + */ + pod: &api.Pod{Spec: cpuAndMemory}, + nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}}, + test: "resources requested, pods scheduled with resources", + pods: []*api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) + list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_label.go b/plugin/pkg/scheduler/algorithm/priorities/node_label.go new file mode 100644 index 00000000000..ed177e02e4b --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/node_label.go @@ -0,0 +1,60 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/labels" + "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +type NodeLabelPrioritizer struct { + label string + presence bool +} + +func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) { + labelPrioritizer := &NodeLabelPrioritizer{ + label: label, + presence: presence, + } + return labelPrioritizer.CalculateNodeLabelPriorityMap, nil +} + +// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value. +// If presence is true, prioritizes nodes that have the specified label, regardless of value. +// If presence is false, prioritizes nodes that do not have the specified label. +func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + exists := labels.Set(node.Labels).Has(n.label) + score := 0 + if (exists && n.presence) || (!exists && !n.presence) { + score = 10 + } + return schedulerapi.HostPriority{ + Host: node.Name, + Score: score, + }, nil +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go new file mode 100644 index 00000000000..826ceb3f818 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go @@ -0,0 +1,121 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "sort" + "testing" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestNewNodeLabelPriority(t *testing.T) { + label1 := map[string]string{"foo": "bar"} + label2 := map[string]string{"bar": "foo"} + label3 := map[string]string{"bar": "baz"} + tests := []struct { + nodes []*api.Node + label string + presence bool + expectedList schedulerapi.HostPriorityList + test string + }{ + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, + label: "baz", + presence: true, + test: "no match found, presence true", + }, + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + label: "baz", + presence: false, + test: "no match found, presence false", + }, + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, + label: "foo", + presence: true, + test: "one match found, presence true", + }, + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + label: "foo", + presence: false, + test: "one match found, presence false", + }, + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + label: "bar", + presence: true, + test: "two matches found, presence true", + }, + { + nodes: []*api.Node{ + {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, + }, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, + label: "bar", + presence: false, + test: "two matches found, presence false", + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes) + list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // sort the two lists to avoid failures on account of different ordering + sort.Sort(test.expectedList) + sort.Sort(list) + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods.go b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods.go new file mode 100644 index 00000000000..d58890e75dd --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods.go @@ -0,0 +1,60 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + + controllerRef := priorityutil.GetControllerRef(pod) + if controllerRef != nil { + // Ignore pods that are owned by other controller than ReplicationController + // or ReplicaSet. + if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" { + controllerRef = nil + } + } + if controllerRef == nil { + return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil + } + + avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations) + if err != nil { + // If we cannot get annotation, assume it's schedulable there. + return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil + } + for i := range avoids.PreferAvoidPods { + avoid := &avoids.PreferAvoidPods[i] + if controllerRef != nil { + if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID { + return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil + } + } + } + return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go new file mode 100644 index 00000000000..66d00249c76 --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go @@ -0,0 +1,155 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "reflect" + "sort" + "testing" + + "k8s.io/kubernetes/pkg/api" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func TestNodePreferAvoidPriority(t *testing.T) { + annotations1 := map[string]string{ + api.PreferAvoidPodsAnnotationKey: ` + { + "preferAvoidPods": [ + { + "podSignature": { + "podController": { + "apiVersion": "v1", + "kind": "ReplicationController", + "name": "foo", + "uid": "abcdef123456", + "controller": true + } + }, + "reason": "some reason", + "message": "some message" + } + ] + }`, + } + annotations2 := map[string]string{ + api.PreferAvoidPodsAnnotationKey: ` + { + "preferAvoidPods": [ + { + "podSignature": { + "podController": { + "apiVersion": "v1", + "kind": "ReplicaSet", + "name": "foo", + "uid": "qwert12345", + "controller": true + } + }, + "reason": "some reason", + "message": "some message" + } + ] + }`, + } + testNodes := []*api.Node{ + { + ObjectMeta: api.ObjectMeta{Name: "machine1", Annotations: annotations1}, + }, + { + ObjectMeta: api.ObjectMeta{Name: "machine2", Annotations: annotations2}, + }, + { + ObjectMeta: api.ObjectMeta{Name: "machine3"}, + }, + } + trueVar := true + tests := []struct { + pod *api.Pod + nodes []*api.Node + expectedList schedulerapi.HostPriorityList + test string + }{ + { + pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Namespace: "default", + OwnerReferences: []api.OwnerReference{ + {Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar}, + }, + }, + }, + nodes: testNodes, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score", + }, + { + pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Namespace: "default", + OwnerReferences: []api.OwnerReference{ + {Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar}, + }, + }, + }, + nodes: testNodes, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + test: "ownership by random controller should be ignored", + }, + { + pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Namespace: "default", + OwnerReferences: []api.OwnerReference{ + {Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"}, + }, + }, + }, + nodes: testNodes, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, + test: "owner without Controller field set should be ignored", + }, + { + pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Namespace: "default", + OwnerReferences: []api.OwnerReference{ + {Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar}, + }, + }, + }, + nodes: testNodes, + expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}}, + test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score", + }, + } + + for _, test := range tests { + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes) + list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // sort the two lists to avoid failures on account of different ordering + sort.Sort(test.expectedList) + sort.Sort(list) + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go deleted file mode 100644 index 61380687102..00000000000 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ /dev/null @@ -1,367 +0,0 @@ -/* -Copyright 2014 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package priorities - -import ( - "fmt" - "math" - - "github.com/golang/glog" - "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/pkg/labels" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" - priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" - schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" - "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" -) - -func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource { - result := &schedulercache.Resource{} - for i := range pod.Spec.Containers { - container := &pod.Spec.Containers[i] - cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests) - result.MilliCPU += cpu - result.Memory += memory - } - return result -} - -// The unused capacity is calculated on a scale of 0-10 -// 0 being the lowest priority and 10 being the highest. -// The more unused resources the higher the score is. -func calculateUnusedScore(requested int64, capacity int64, node string) int64 { - if capacity == 0 { - return 0 - } - if requested > capacity { - glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s", - requested, capacity, node) - return 0 - } - return ((capacity - requested) * 10) / capacity -} - -// The used capacity is calculated on a scale of 0-10 -// 0 being the lowest priority and 10 being the highest. -// The more resources are used the higher the score is. This function -// is almost a reversed version of calculatUnusedScore (10 - calculateUnusedScore). -// The main difference is in rounding. It was added to keep the -// final formula clean and not to modify the widely used (by users -// in their default scheduling policies) calculateUSedScore. -func calculateUsedScore(requested int64, capacity int64, node string) int64 { - if capacity == 0 { - return 0 - } - if requested > capacity { - glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s", - requested, capacity, node) - return 0 - } - return (requested * 10) / capacity -} - -// Calculates host priority based on the amount of unused resources. -// 'node' has information about the resources on the node. -// 'pods' is a list of pods currently scheduled on the node. -func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - allocatableResources := nodeInfo.AllocatableResource() - totalResources := *podRequests - totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU - totalResources.Memory += nodeInfo.NonZeroRequest().Memory - - cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) - memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name) - if glog.V(10) { - // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is - // not logged. There is visible performance gain from it. - glog.V(10).Infof( - "%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", - pod.Name, node.Name, - allocatableResources.MilliCPU, allocatableResources.Memory, - totalResources.MilliCPU, totalResources.Memory, - cpuScore, memoryScore, - ) - } - - return schedulerapi.HostPriority{ - Host: node.Name, - Score: int((cpuScore + memoryScore) / 2), - }, nil -} - -// Calculate the resource used on a node. 'node' has information about the resources on the node. -// 'pods' is a list of pods currently scheduled on the node. -func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - allocatableResources := nodeInfo.AllocatableResource() - totalResources := *podRequests - totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU - totalResources.Memory += nodeInfo.NonZeroRequest().Memory - - cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) - memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name) - if glog.V(10) { - // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is - // not logged. There is visible performance gain from it. - glog.V(10).Infof( - "%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", - pod.Name, node.Name, - allocatableResources.MilliCPU, allocatableResources.Memory, - totalResources.MilliCPU, totalResources.Memory, - cpuScore, memoryScore, - ) - } - - return schedulerapi.HostPriority{ - Host: node.Name, - Score: int((cpuScore + memoryScore) / 2), - }, nil -} - -// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources. -// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes -// based on the minimum of the average of the fraction of requested to capacity. -// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2 -func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - var nonZeroRequest *schedulercache.Resource - if priorityMeta, ok := meta.(*priorityMetadata); ok { - nonZeroRequest = priorityMeta.nonZeroRequest - } else { - // We couldn't parse metadata - fallback to computing it. - nonZeroRequest = getNonZeroRequests(pod) - } - return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo) -} - -// MostRequestedPriority is a priority function that favors nodes with most requested resources. -// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes -// based on the maximum of the average of the fraction of requested to capacity. -// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2 -func MostRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - var nonZeroRequest *schedulercache.Resource - if priorityMeta, ok := meta.(*priorityMetadata); ok { - nonZeroRequest = priorityMeta.nonZeroRequest - } else { - // We couldn't parse metadatat - fallback to computing it. - nonZeroRequest = getNonZeroRequests(pod) - } - return calculateUsedPriority(pod, nonZeroRequest, nodeInfo) -} - -type NodeLabelPrioritizer struct { - label string - presence bool -} - -func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) { - labelPrioritizer := &NodeLabelPrioritizer{ - label: label, - presence: presence, - } - return labelPrioritizer.CalculateNodeLabelPriorityMap, nil -} - -// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value. -// If presence is true, prioritizes nodes that have the specified label, regardless of value. -// If presence is false, prioritizes nodes that do not have the specified label. -func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - exists := labels.Set(node.Labels).Has(n.label) - score := 0 - if (exists && n.presence) || (!exists && !n.presence) { - score = 10 - } - return schedulerapi.HostPriority{ - Host: node.Name, - Score: score, - }, nil -} - -// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range. -const ( - mb int64 = 1024 * 1024 - minImgSize int64 = 23 * mb - maxImgSize int64 = 1000 * mb -) - -// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images. -// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10 -// based on the total size of those images. -// - If none of the images are present, this node will be given the lowest priority. -// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority. -func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - var sumSize int64 - for i := range pod.Spec.Containers { - sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i]) - } - return schedulerapi.HostPriority{ - Host: node.Name, - Score: calculateScoreFromSize(sumSize), - }, nil -} - -// checkContainerImageOnNode checks if a container image is present on a node and returns its size. -func checkContainerImageOnNode(node *api.Node, container *api.Container) int64 { - for _, image := range node.Status.Images { - for _, name := range image.Names { - if container.Image == name { - // Should return immediately. - return image.SizeBytes - } - } - } - return 0 -} - -// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node. -// 1. Split image size range into 10 buckets. -// 2. Decide the priority of a given sumSize based on which bucket it belongs to. -func calculateScoreFromSize(sumSize int64) int { - var score int - switch { - case sumSize == 0 || sumSize < minImgSize: - // score == 0 means none of the images required by this pod are present on this - // node or the total size of the images present is too small to be taken into further consideration. - score = 0 - // If existing images' total size is larger than max, just make it highest priority. - case sumSize >= maxImgSize: - score = 10 - default: - score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1) - } - // Return which bucket the given size belongs to - return score -} - -// BalancedResourceAllocation favors nodes with balanced resource usage rate. -// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority. -// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how -// close the two metrics are to each other. -// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by: -// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization" -func BalancedResourceAllocationMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - var nonZeroRequest *schedulercache.Resource - if priorityMeta, ok := meta.(*priorityMetadata); ok { - nonZeroRequest = priorityMeta.nonZeroRequest - } else { - // We couldn't parse metadatat - fallback to computing it. - nonZeroRequest = getNonZeroRequests(pod) - } - return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo) -} - -func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - allocatableResources := nodeInfo.AllocatableResource() - totalResources := *podRequests - totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU - totalResources.Memory += nodeInfo.NonZeroRequest().Memory - - cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU) - memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory) - score := int(0) - if cpuFraction >= 1 || memoryFraction >= 1 { - // if requested >= capacity, the corresponding host should never be preferred. - score = 0 - } else { - // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1 - // respectively. Multilying the absolute value of the difference by 10 scales the value to - // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from - // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced. - diff := math.Abs(cpuFraction - memoryFraction) - score = int(10 - diff*10) - } - if glog.V(10) { - // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is - // not logged. There is visible performance gain from it. - glog.V(10).Infof( - "%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d", - pod.Name, node.Name, - allocatableResources.MilliCPU, allocatableResources.Memory, - totalResources.MilliCPU, totalResources.Memory, - score, - ) - } - - return schedulerapi.HostPriority{ - Host: node.Name, - Score: score, - }, nil -} - -func fractionOfCapacity(requested, capacity int64) float64 { - if capacity == 0 { - return 1 - } - return float64(requested) / float64(capacity) -} - -func CalculateNodePreferAvoidPodsPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { - node := nodeInfo.Node() - if node == nil { - return schedulerapi.HostPriority{}, fmt.Errorf("node not found") - } - - controllerRef := priorityutil.GetControllerRef(pod) - if controllerRef != nil { - // Ignore pods that are owned by other controller than ReplicationController - // or ReplicaSet. - if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" { - controllerRef = nil - } - } - if controllerRef == nil { - return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil - } - - avoids, err := api.GetAvoidPodsFromNodeAnnotations(node.Annotations) - if err != nil { - // If we cannot get annotation, assume it's schedulable there. - return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil - } - for i := range avoids.PreferAvoidPods { - avoid := &avoids.PreferAvoidPods[i] - if controllerRef != nil { - if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID { - return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil - } - } - } - return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil -} diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index 668a044656f..2f0acb7c785 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -20,931 +20,13 @@ import ( "fmt" "os/exec" "path/filepath" - "reflect" - "sort" "testing" "k8s.io/gengo/parser" "k8s.io/gengo/types" - "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/util/codeinspector" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" - schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" - "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) -func makeNode(node string, milliCPU, memory int64) *api.Node { - return &api.Node{ - ObjectMeta: api.ObjectMeta{Name: node}, - Status: api.NodeStatus{ - Capacity: api.ResourceList{ - "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), - "memory": *resource.NewQuantity(memory, resource.BinarySI), - }, - Allocatable: api.ResourceList{ - "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), - "memory": *resource.NewQuantity(memory, resource.BinarySI), - }, - }, - } -} - -func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction { - return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { - result := make(schedulerapi.HostPriorityList, 0, len(nodes)) - for i := range nodes { - hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name]) - if err != nil { - return nil, err - } - result = append(result, hostResult) - } - if reduceFn != nil { - if err := reduceFn(pod, result); err != nil { - return nil, err - } - } - return result, nil - } -} - -func TestLeastRequested(t *testing.T) { - labels1 := map[string]string{ - "foo": "bar", - "baz": "blah", - } - labels2 := map[string]string{ - "bar": "foo", - "baz": "blah", - } - machine1Spec := api.PodSpec{ - NodeName: "machine1", - } - machine2Spec := api.PodSpec{ - NodeName: "machine2", - } - noResources := api.PodSpec{ - Containers: []api.Container{}, - } - cpuOnly := api.PodSpec{ - NodeName: "machine1", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - }, - } - cpuOnly2 := cpuOnly - cpuOnly2.NodeName = "machine2" - cpuAndMemory := api.PodSpec{ - NodeName: "machine2", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("2000"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("3000"), - }, - }, - }, - }, - } - tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []*api.Node - expectedList schedulerapi.HostPriorityList - test string - }{ - { - /* - Node1 scores (remaining resources) on 0-10 scale - CPU Score: ((4000 - 0) *10) / 4000 = 10 - Memory Score: ((10000 - 0) *10) / 10000 = 10 - Node1 Score: (10 + 10) / 2 = 10 - - Node2 scores (remaining resources) on 0-10 scale - CPU Score: ((4000 - 0) *10) / 4000 = 10 - Memory Score: ((10000 - 0) *10) / 10000 = 10 - Node2 Score: (10 + 10) / 2 = 10 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, - test: "nothing scheduled, nothing requested", - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((4000 - 3000) *10) / 4000 = 2.5 - Memory Score: ((10000 - 5000) *10) / 10000 = 5 - Node1 Score: (2.5 + 5) / 2 = 3 - - Node2 scores on 0-10 scale - CPU Score: ((6000 - 3000) *10) / 6000 = 5 - Memory Score: ((10000 - 5000) *10) / 10000 = 5 - Node2 Score: (5 + 5) / 2 = 5 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}}, - test: "nothing scheduled, resources requested, differently sized machines", - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((4000 - 0) *10) / 4000 = 10 - Memory Score: ((10000 - 0) *10) / 10000 = 10 - Node1 Score: (10 + 10) / 2 = 10 - - Node2 scores on 0-10 scale - CPU Score: ((4000 - 0) *10) / 4000 = 10 - Memory Score: ((10000 - 0) *10) / 10000 = 10 - Node2 Score: (10 + 10) / 2 = 10 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, - test: "no resources requested, pods scheduled", - pods: []*api.Pod{ - {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}}, - {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((20000 - 0) *10) / 20000 = 10 - Node1 Score: (4 + 10) / 2 = 7 - - Node2 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 - Node2 Score: (4 + 7.5) / 2 = 5 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}}, - test: "no resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 - Node1 Score: (4 + 7.5) / 2 = 5 - - Node2 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((20000 - 10000) *10) / 20000 = 5 - Node2 Score: (4 + 5) / 2 = 4 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}}, - test: "resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((20000 - 5000) *10) / 20000 = 7.5 - Node1 Score: (4 + 7.5) / 2 = 5 - - Node2 scores on 0-10 scale - CPU Score: ((10000 - 6000) *10) / 10000 = 4 - Memory Score: ((50000 - 10000) *10) / 50000 = 8 - Node2 Score: (4 + 8) / 2 = 6 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}}, - test: "resources requested, pods scheduled with resources, differently sized machines", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: ((4000 - 6000) *10) / 4000 = 0 - Memory Score: ((10000 - 0) *10) / 10000 = 10 - Node1 Score: (0 + 10) / 2 = 5 - - Node2 scores on 0-10 scale - CPU Score: ((4000 - 6000) *10) / 4000 = 0 - Memory Score: ((10000 - 5000) *10) / 10000 = 5 - Node2 Score: (0 + 5) / 2 = 2 - */ - pod: &api.Pod{Spec: cpuOnly}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}}, - test: "requested resources exceed node capacity", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, - test: "zero node resources, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} - -func TestMostRequested(t *testing.T) { - labels1 := map[string]string{ - "foo": "bar", - "baz": "blah", - } - labels2 := map[string]string{ - "bar": "foo", - "baz": "blah", - } - noResources := api.PodSpec{ - Containers: []api.Container{}, - } - cpuOnly := api.PodSpec{ - NodeName: "machine1", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - }, - } - cpuOnly2 := cpuOnly - cpuOnly2.NodeName = "machine2" - cpuAndMemory := api.PodSpec{ - NodeName: "machine2", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("2000"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("3000"), - }, - }, - }, - }, - } - tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []*api.Node - expectedList schedulerapi.HostPriorityList - test string - }{ - { - /* - Node1 scores (used resources) on 0-10 scale - CPU Score: (0 * 10 / 4000 = 0 - Memory Score: (0 * 10) / 10000 = 0 - Node1 Score: (0 + 0) / 2 = 0 - - Node2 scores (used resources) on 0-10 scale - CPU Score: (0 * 10 / 4000 = 0 - Memory Score: (0 * 10 / 10000 = 0 - Node2 Score: (0 + 0) / 2 = 0 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, - test: "nothing scheduled, nothing requested", - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: (3000 * 10 / 4000 = 7.5 - Memory Score: (5000 * 10) / 10000 = 5 - Node1 Score: (7.5 + 5) / 2 = 6 - - Node2 scores on 0-10 scale - CPU Score: (3000 * 10 / 6000 = 5 - Memory Score: (5000 * 10 / 10000 = 5 - Node2 Score: (5 + 5) / 2 = 5 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}}, - test: "nothing scheduled, resources requested, differently sized machines", - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: (6000 * 10) / 10000 = 6 - Memory Score: (0 * 10) / 20000 = 10 - Node1 Score: (6 + 0) / 2 = 3 - - Node2 scores on 0-10 scale - CPU Score: (6000 * 10) / 10000 = 6 - Memory Score: (5000 * 10) / 20000 = 2.5 - Node2 Score: (6 + 2.5) / 2 = 4 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}}, - test: "no resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Score: (6000 * 10) / 10000 = 6 - Memory Score: (5000 * 10) / 20000 = 2.5 - Node1 Score: (6 + 2.5) / 2 = 4 - - Node2 scores on 0-10 scale - CPU Score: (6000 * 10) / 10000 = 6 - Memory Score: (10000 * 10) / 20000 = 5 - Node2 Score: (6 + 5) / 2 = 5 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}}, - test: "resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} - -func TestNewNodeLabelPriority(t *testing.T) { - label1 := map[string]string{"foo": "bar"} - label2 := map[string]string{"bar": "foo"} - label3 := map[string]string{"bar": "baz"} - tests := []struct { - nodes []*api.Node - label string - presence bool - expectedList schedulerapi.HostPriorityList - test string - }{ - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, - label: "baz", - presence: true, - test: "no match found, presence true", - }, - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - label: "baz", - presence: false, - test: "no match found, presence false", - }, - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, - label: "foo", - presence: true, - test: "one match found, presence true", - }, - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - label: "foo", - presence: false, - test: "one match found, presence false", - }, - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - label: "bar", - presence: true, - test: "two matches found, presence true", - }, - { - nodes: []*api.Node{ - {ObjectMeta: api.ObjectMeta{Name: "machine1", Labels: label1}}, - {ObjectMeta: api.ObjectMeta{Name: "machine2", Labels: label2}}, - {ObjectMeta: api.ObjectMeta{Name: "machine3", Labels: label3}}, - }, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}}, - label: "bar", - presence: false, - test: "two matches found, presence false", - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes) - list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - // sort the two lists to avoid failures on account of different ordering - sort.Sort(test.expectedList) - sort.Sort(list) - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} - -func TestBalancedResourceAllocation(t *testing.T) { - labels1 := map[string]string{ - "foo": "bar", - "baz": "blah", - } - labels2 := map[string]string{ - "bar": "foo", - "baz": "blah", - } - machine1Spec := api.PodSpec{ - NodeName: "machine1", - } - machine2Spec := api.PodSpec{ - NodeName: "machine2", - } - noResources := api.PodSpec{ - Containers: []api.Container{}, - } - cpuOnly := api.PodSpec{ - NodeName: "machine1", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("0"), - }, - }, - }, - }, - } - cpuOnly2 := cpuOnly - cpuOnly2.NodeName = "machine2" - cpuAndMemory := api.PodSpec{ - NodeName: "machine2", - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("1000m"), - "memory": resource.MustParse("2000"), - }, - }, - }, - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse("2000m"), - "memory": resource.MustParse("3000"), - }, - }, - }, - }, - } - tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []*api.Node - expectedList schedulerapi.HostPriorityList - test string - }{ - { - /* - Node1 scores (remaining resources) on 0-10 scale - CPU Fraction: 0 / 4000 = 0% - Memory Fraction: 0 / 10000 = 0% - Node1 Score: 10 - (0-0)*10 = 10 - - Node2 scores (remaining resources) on 0-10 scale - CPU Fraction: 0 / 4000 = 0 % - Memory Fraction: 0 / 10000 = 0% - Node2 Score: 10 - (0-0)*10 = 10 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, - test: "nothing scheduled, nothing requested", - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 3000 / 4000= 75% - Memory Fraction: 5000 / 10000 = 50% - Node1 Score: 10 - (0.75-0.5)*10 = 7 - - Node2 scores on 0-10 scale - CPU Fraction: 3000 / 6000= 50% - Memory Fraction: 5000/10000 = 50% - Node2 Score: 10 - (0.5-0.5)*10 = 10 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}}, - test: "nothing scheduled, resources requested, differently sized machines", - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 0 / 4000= 0% - Memory Fraction: 0 / 10000 = 0% - Node1 Score: 10 - (0-0)*10 = 10 - - Node2 scores on 0-10 scale - CPU Fraction: 0 / 4000= 0% - Memory Fraction: 0 / 10000 = 0% - Node2 Score: 10 - (0-0)*10 = 10 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}}, - test: "no resources requested, pods scheduled", - pods: []*api.Pod{ - {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}}, - {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 0 / 20000 = 0% - Node1 Score: 10 - (0.6-0)*10 = 4 - - Node2 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 5000 / 20000 = 25% - Node2 Score: 10 - (0.6-0.25)*10 = 6 - */ - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}}, - test: "no resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, - {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 5000 / 20000 = 25% - Node1 Score: 10 - (0.6-0.25)*10 = 6 - - Node2 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 10000 / 20000 = 50% - Node2 Score: 10 - (0.6-0.5)*10 = 9 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}}, - test: "resources requested, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 5000 / 20000 = 25% - Node1 Score: 10 - (0.6-0.25)*10 = 6 - - Node2 scores on 0-10 scale - CPU Fraction: 6000 / 10000 = 60% - Memory Fraction: 10000 / 50000 = 20% - Node2 Score: 10 - (0.6-0.2)*10 = 6 - */ - pod: &api.Pod{Spec: cpuAndMemory}, - nodes: []*api.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}}, - test: "resources requested, pods scheduled with resources, differently sized machines", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - /* - Node1 scores on 0-10 scale - CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 - Memory Fraction: 0 / 10000 = 0 - Node1 Score: 0 - - Node2 scores on 0-10 scale - CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 - Memory Fraction 5000 / 10000 = 50% - Node2 Score: 0 - */ - pod: &api.Pod{Spec: cpuOnly}, - nodes: []*api.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, - test: "requested resources exceed node capacity", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - { - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}}, - test: "zero node resources, pods scheduled with resources", - pods: []*api.Pod{ - {Spec: cpuOnly}, - {Spec: cpuAndMemory}, - }, - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} - -func TestImageLocalityPriority(t *testing.T) { - test_40_250 := api.PodSpec{ - Containers: []api.Container{ - { - Image: "gcr.io/40", - }, - { - Image: "gcr.io/250", - }, - }, - } - - test_40_140 := api.PodSpec{ - Containers: []api.Container{ - { - Image: "gcr.io/40", - }, - { - Image: "gcr.io/140", - }, - }, - } - - test_min_max := api.PodSpec{ - Containers: []api.Container{ - { - Image: "gcr.io/10", - }, - { - Image: "gcr.io/2000", - }, - }, - } - - node_40_140_2000 := api.NodeStatus{ - Images: []api.ContainerImage{ - { - Names: []string{ - "gcr.io/40", - "gcr.io/40:v1", - "gcr.io/40:v1", - }, - SizeBytes: int64(40 * mb), - }, - { - Names: []string{ - "gcr.io/140", - "gcr.io/140:v1", - }, - SizeBytes: int64(140 * mb), - }, - { - Names: []string{ - "gcr.io/2000", - }, - SizeBytes: int64(2000 * mb), - }, - }, - } - - node_250_10 := api.NodeStatus{ - Images: []api.ContainerImage{ - { - Names: []string{ - "gcr.io/250", - }, - SizeBytes: int64(250 * mb), - }, - { - Names: []string{ - "gcr.io/10", - "gcr.io/10:v1", - }, - SizeBytes: int64(10 * mb), - }, - }, - } - - tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []*api.Node - expectedList schedulerapi.HostPriorityList - test string - }{ - { - // Pod: gcr.io/40 gcr.io/250 - - // Node1 - // Image: gcr.io/40 40MB - // Score: (40M-23M)/97.7M + 1 = 1 - - // Node2 - // Image: gcr.io/250 250MB - // Score: (250M-23M)/97.7M + 1 = 3 - pod: &api.Pod{Spec: test_40_250}, - nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}}, - test: "two images spread on two nodes, prefer the larger image one", - }, - { - // Pod: gcr.io/40 gcr.io/140 - - // Node1 - // Image: gcr.io/40 40MB, gcr.io/140 140MB - // Score: (40M+140M-23M)/97.7M + 1 = 2 - - // Node2 - // Image: not present - // Score: 0 - pod: &api.Pod{Spec: test_40_140}, - nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}}, - test: "two images on one node, prefer this node", - }, - { - // Pod: gcr.io/2000 gcr.io/10 - - // Node1 - // Image: gcr.io/2000 2000MB - // Score: 2000 > max score = 10 - - // Node2 - // Image: gcr.io/10 10MB - // Score: 10 < min score = 0 - pod: &api.Pod{Spec: test_min_max}, - nodes: []*api.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)}, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}}, - test: "if exceed limit, use limit", - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - - sort.Sort(test.expectedList) - sort.Sort(list) - - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} - -func makeImageNode(node string, status api.NodeStatus) *api.Node { - return &api.Node{ - ObjectMeta: api.ObjectMeta{Name: node}, - Status: status, - } -} - func getPrioritySignatures() ([]*types.Signature, error) { filePath := "./../types.go" pkgName := filepath.Dir(filePath) @@ -1033,131 +115,3 @@ func TestPrioritiesRegistered(t *testing.T) { } } } - -func TestNodePreferAvoidPriority(t *testing.T) { - annotations1 := map[string]string{ - api.PreferAvoidPodsAnnotationKey: ` - { - "preferAvoidPods": [ - { - "podSignature": { - "podController": { - "apiVersion": "v1", - "kind": "ReplicationController", - "name": "foo", - "uid": "abcdef123456", - "controller": true - } - }, - "reason": "some reason", - "message": "some message" - } - ] - }`, - } - annotations2 := map[string]string{ - api.PreferAvoidPodsAnnotationKey: ` - { - "preferAvoidPods": [ - { - "podSignature": { - "podController": { - "apiVersion": "v1", - "kind": "ReplicaSet", - "name": "foo", - "uid": "qwert12345", - "controller": true - } - }, - "reason": "some reason", - "message": "some message" - } - ] - }`, - } - testNodes := []*api.Node{ - { - ObjectMeta: api.ObjectMeta{Name: "machine1", Annotations: annotations1}, - }, - { - ObjectMeta: api.ObjectMeta{Name: "machine2", Annotations: annotations2}, - }, - { - ObjectMeta: api.ObjectMeta{Name: "machine3"}, - }, - } - trueVar := true - tests := []struct { - pod *api.Pod - nodes []*api.Node - expectedList schedulerapi.HostPriorityList - test string - }{ - { - pod: &api.Pod{ - ObjectMeta: api.ObjectMeta{ - Namespace: "default", - OwnerReferences: []api.OwnerReference{ - {Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar}, - }, - }, - }, - nodes: testNodes, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score", - }, - { - pod: &api.Pod{ - ObjectMeta: api.ObjectMeta{ - Namespace: "default", - OwnerReferences: []api.OwnerReference{ - {Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar}, - }, - }, - }, - nodes: testNodes, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - test: "ownership by random controller should be ignored", - }, - { - pod: &api.Pod{ - ObjectMeta: api.ObjectMeta{ - Namespace: "default", - OwnerReferences: []api.OwnerReference{ - {Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"}, - }, - }, - }, - nodes: testNodes, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}}, - test: "owner without Controller field set should be ignored", - }, - { - pod: &api.Pod{ - ObjectMeta: api.ObjectMeta{ - Namespace: "default", - OwnerReferences: []api.OwnerReference{ - {Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar}, - }, - }, - }, - nodes: testNodes, - expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}}, - test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score", - }, - } - - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes) - list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - // sort the two lists to avoid failures on account of different ordering - sort.Sort(test.expectedList) - sort.Sort(list) - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) - } - } -} diff --git a/plugin/pkg/scheduler/algorithm/priorities/test_util.go b/plugin/pkg/scheduler/algorithm/priorities/test_util.go new file mode 100644 index 00000000000..5ed9f89bbbe --- /dev/null +++ b/plugin/pkg/scheduler/algorithm/priorities/test_util.go @@ -0,0 +1,60 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" + schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +func makeNode(node string, milliCPU, memory int64) *api.Node { + return &api.Node{ + ObjectMeta: api.ObjectMeta{Name: node}, + Status: api.NodeStatus{ + Capacity: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + "memory": *resource.NewQuantity(memory, resource.BinarySI), + }, + Allocatable: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + "memory": *resource.NewQuantity(memory, resource.BinarySI), + }, + }, + } +} + +func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction { + return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { + result := make(schedulerapi.HostPriorityList, 0, len(nodes)) + for i := range nodes { + hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name]) + if err != nil { + return nil, err + } + result = append(result, hostResult) + } + if reduceFn != nil { + if err := reduceFn(pod, result); err != nil { + return nil, err + } + } + return result, nil + } +}