diff --git a/pkg/scheduler/priorities.go b/pkg/scheduler/priorities.go index aa9c0bdb368..eab28e21cb8 100644 --- a/pkg/scheduler/priorities.go +++ b/pkg/scheduler/priorities.go @@ -17,6 +17,8 @@ limitations under the License. package scheduler import ( + "math" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/golang/glog" @@ -132,3 +134,77 @@ func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod api.Pod, podLister } return result, nil } + +// BalancedResourceAllocation favors nodes with balanced resource usage rate. +// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority. +// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how +// close the two metrics are to each other. +// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by: +// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization" +func BalancedResourceAllocation(pod api.Pod, podLister PodLister, minionLister MinionLister) (HostPriorityList, error) { + nodes, err := minionLister.List() + if err != nil { + return HostPriorityList{}, err + } + podsToMachines, err := MapPodsToMachines(podLister) + + list := HostPriorityList{} + for _, node := range nodes.Items { + list = append(list, calculateBalancedResourceAllocation(pod, node, podsToMachines[node.Name])) + } + return list, nil +} + +func calculateBalancedResourceAllocation(pod api.Pod, node api.Node, pods []api.Pod) HostPriority { + totalMilliCPU := int64(0) + totalMemory := int64(0) + score := int(0) + for _, existingPod := range pods { + for _, container := range existingPod.Spec.Containers { + totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() + totalMemory += container.Resources.Limits.Memory().Value() + } + } + // Add the resources requested by the current pod being scheduled. + // This also helps differentiate between differently sized, but empty, minions. + for _, container := range pod.Spec.Containers { + totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() + totalMemory += container.Resources.Limits.Memory().Value() + } + + capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue() + capacityMemory := node.Status.Capacity.Memory().Value() + + cpuFraction := fractionOfCapacity(totalMilliCPU, capacityMilliCPU, node.Name) + memoryFraction := fractionOfCapacity(totalMemory, capacityMemory, node.Name) + if cpuFraction >= 1 || memoryFraction >= 1 { + // if requested >= capacity, the corresponding host should never be preferrred. + score = 0 + } else { + // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1 + // respectively. Multilying the absolute value of the difference by 10 scales the value to + // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from + // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced. + diff := math.Abs(cpuFraction - memoryFraction) + score = int(10 - diff*10) + } + glog.V(4).Infof( + "%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)", + pod.Name, node.Name, + totalMilliCPU, totalMemory, + capacityMilliCPU, capacityMemory, + score, + ) + + return HostPriority{ + host: node.Name, + score: score, + } +} + +func fractionOfCapacity(requested, capacity int64, node string) float64 { + if capacity == 0 { + return 1 + } + return float64(requested) / float64(capacity) +} diff --git a/pkg/scheduler/priorities_test.go b/pkg/scheduler/priorities_test.go index 486cb0a7a39..88c5aa31f41 100644 --- a/pkg/scheduler/priorities_test.go +++ b/pkg/scheduler/priorities_test.go @@ -368,3 +368,236 @@ func TestNewNodeLabelPriority(t *testing.T) { } } } + +func TestBalancedResourceAllocation(t *testing.T) { + labels1 := map[string]string{ + "foo": "bar", + "baz": "blah", + } + labels2 := map[string]string{ + "bar": "foo", + "baz": "blah", + } + machine1Spec := api.PodSpec{ + Host: "machine1", + } + machine2Spec := api.PodSpec{ + Host: "machine2", + } + noResources := api.PodSpec{ + Containers: []api.Container{}, + } + cpuOnly := api.PodSpec{ + Host: "machine1", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + }, + }, + }, + }, + } + cpuOnly2 := cpuOnly + cpuOnly2.Host = "machine2" + cpuAndMemory := api.PodSpec{ + Host: "machine2", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("2000"), + }, + }, + }, + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("3000"), + }, + }, + }, + }, + } + tests := []struct { + pod api.Pod + pods []api.Pod + nodes []api.Node + expectedList HostPriorityList + test string + }{ + { + /* + Minion1 scores (remaining resources) on 0-10 scale + CPU Fraction: 0 / 4000 = 0% + Memory Fraction: 0 / 10000 = 0% + Minion1 Score: 10 - (0-0)*10 = 10 + + Minion2 scores (remaining resources) on 0-10 scale + CPU Fraction: 0 / 4000 = 0 % + Memory Fraction: 0 / 10000 = 0% + Minion2 Score: 10 - (0-0)*10 = 10 + */ + pod: api.Pod{Spec: noResources}, + nodes: []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)}, + expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}}, + test: "nothing scheduled, nothing requested", + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 3000 / 4000= 75% + Memory Fraction: 5000 / 10000 = 50% + Minion1 Score: 10 - (0.75-0.5)*10 = 7 + + Minion2 scores on 0-10 scale + CPU Fraction: 3000 / 6000= 50% + Memory Fraction: 5000/10000 = 50% + Minion2 Score: 10 - (0.5-0.5)*10 = 10 + */ + pod: api.Pod{Spec: cpuAndMemory}, + nodes: []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 6000, 10000)}, + expectedList: []HostPriority{{"machine1", 7}, {"machine2", 10}}, + test: "nothing scheduled, resources requested, differently sized machines", + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 0 / 4000= 0% + Memory Fraction: 0 / 10000 = 0% + Minion1 Score: 10 - (0-0)*10 = 10 + + Minion2 scores on 0-10 scale + CPU Fraction: 0 / 4000= 0% + Memory Fraction: 0 / 10000 = 0% + Minion2 Score: 10 - (0-0)*10 = 10 + */ + pod: api.Pod{Spec: noResources}, + nodes: []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)}, + expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}}, + test: "no resources requested, pods scheduled", + pods: []api.Pod{ + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: machine1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: machine2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 0 / 20000 = 0% + Minion1 Score: 10 - (0.6-0)*10 = 4 + + Minion2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Minion2 Score: 10 - (0.6-0.25)*10 = 6 + */ + pod: api.Pod{Spec: noResources}, + nodes: []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)}, + expectedList: []HostPriority{{"machine1", 4}, {"machine2", 6}}, + test: "no resources requested, pods scheduled with resources", + pods: []api.Pod{ + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels2}}, + {Spec: cpuOnly, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuOnly2, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + {Spec: cpuAndMemory, ObjectMeta: api.ObjectMeta{Labels: labels1}}, + }, + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Minion1 Score: 10 - (0.6-0.25)*10 = 6 + + Minion2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 10000 / 20000 = 50% + Minion2 Score: 10 - (0.6-0.5)*10 = 9 + */ + pod: api.Pod{Spec: cpuAndMemory}, + nodes: []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)}, + expectedList: []HostPriority{{"machine1", 6}, {"machine2", 9}}, + test: "resources requested, pods scheduled with resources", + pods: []api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 5000 / 20000 = 25% + Minion1 Score: 10 - (0.6-0.25)*10 = 6 + + Minion2 scores on 0-10 scale + CPU Fraction: 6000 / 10000 = 60% + Memory Fraction: 10000 / 50000 = 20% + Minion2 Score: 10 - (0.6-0.2)*10 = 6 + */ + pod: api.Pod{Spec: cpuAndMemory}, + nodes: []api.Node{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 50000)}, + expectedList: []HostPriority{{"machine1", 6}, {"machine2", 6}}, + test: "resources requested, pods scheduled with resources, differently sized machines", + pods: []api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + /* + Minion1 scores on 0-10 scale + CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 + Memory Fraction: 0 / 10000 = 0 + Minion1 Score: 0 + + Minion2 scores on 0-10 scale + CPU Fraction: 6000 / 4000 > 100% ==> Score := 0 + Memory Fraction 5000 / 10000 = 50% + Minion2 Score: 0 + */ + pod: api.Pod{Spec: cpuOnly}, + nodes: []api.Node{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)}, + expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}}, + test: "requested resources exceed minion capacity", + pods: []api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + { + pod: api.Pod{Spec: noResources}, + nodes: []api.Node{makeMinion("machine1", 0, 0), makeMinion("machine2", 0, 0)}, + expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}}, + test: "zero minion resources, pods scheduled with resources", + pods: []api.Pod{ + {Spec: cpuOnly}, + {Spec: cpuAndMemory}, + }, + }, + } + + for _, test := range tests { + list, err := BalancedResourceAllocation(test.pod, FakePodLister(test.pods), FakeMinionLister(api.NodeList{Items: test.nodes})) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +}