From cb9d515004b5785f23475c2558a912e5712d581e Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Wed, 1 Jul 2015 22:01:23 -0700 Subject: [PATCH 1/6] Add DumbSpreadingPriority, which tries to spread pods across nodes. --- .../algorithm/priorities/priorities.go | 41 +++++++++++++++++-- .../{spreading.go => service_spreading.go} | 0 .../algorithmprovider/defaults/defaults.go | 2 + 3 files changed, 40 insertions(+), 3 deletions(-) rename plugin/pkg/scheduler/algorithm/priorities/{spreading.go => service_spreading.go} (100%) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index 44e874a722a..e42ea0fa49f 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -39,9 +39,9 @@ func calculateScore(requested, capacity int64, node string) int { return int(((capacity - requested) * 10) / capacity) } -// Calculate the occupancy on a node. 'node' has information about the resources on the node. +// Calculate the resource occupancy on a node. 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. -func calculateOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority { +func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority { totalMilliCPU := int64(0) totalMemory := int64(0) for _, existingPod := range pods { @@ -89,7 +89,42 @@ func LeastRequestedPriority(pod *api.Pod, podLister algorithm.PodLister, minionL list := algorithm.HostPriorityList{} for _, node := range nodes.Items { - list = append(list, calculateOccupancy(pod, node, podsToMachines[node.Name])) + list = append(list, calculateResourceOccupancy(pod, node, podsToMachines[node.Name])) + } + return list, nil +} + +func min(l, r int64) (m int64) { + m = r + if l < r { + m = l + } + return m +} + +// See comment for DumbSpreadingPriority() +const dumbSpreadingDenominator int64 = 10 + +// DumbSpreadingPriority is a priority function that favors nodes with fewer pods. +// It works like LeastRequestedPeriority but instead of using 10 * percentage of machine free by resource, +// it uses 10 * percentage of machine free by pod, with "percentage of machine free by pod" claculated as +// (dumbSpreadingDenominator - number of pods already on the node + 1) / dumbSpreadingDenominator. +// dumbSpreadingDenominator serves like the machine capacity in LeasRequestedPriority but is chosen +// so that we equate one pod with a reasonable amount of resources when we combine all the scores together. +func DumbSpreadingPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) { + nodes, err := minionLister.List() + if err != nil { + return algorithm.HostPriorityList{}, err + } + podsToMachines, err := predicates.MapPodsToMachines(podLister) + + list := algorithm.HostPriorityList{} + for _, node := range nodes.Items { + npods := int64(len(podsToMachines[node.Name])) + list = append(list, algorithm.HostPriority{ + Host: node.Name, + Score: calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name), + }) } return list, nil } diff --git a/plugin/pkg/scheduler/algorithm/priorities/spreading.go b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go similarity index 100% rename from plugin/pkg/scheduler/algorithm/priorities/spreading.go rename to plugin/pkg/scheduler/algorithm/priorities/service_spreading.go diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go index 1546412b78a..5863b62b4c7 100644 --- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -65,6 +65,8 @@ func defaultPriorities() util.StringSet { factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1), // Prioritizes nodes to help achieve balanced resource usage factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1), + // Prioritizes nodes to achieve approximately equal number of pods per node + factory.RegisterPriorityFunction("DumbSpreadingPriority", priorities.DumbSpreadingPriority, 2), // spreads pods by minimizing the number of pods (belonging to the same service) on the same minion. factory.RegisterPriorityConfigFactory( "ServiceSpreadingPriority", From 53518e37a66c92ce940ef300773db6daebe2f6df Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Fri, 3 Jul 2015 01:34:07 -0700 Subject: [PATCH 2/6] Add a test for DumbSpreadingPriority. --- .../algorithm/priorities/priorities.go | 14 +++- .../algorithm/priorities/priorities_test.go | 83 +++++++++++++++++++ .../algorithm/priorities/service_spreading.go | 5 ++ ...ding_test.go => service_spreading_test.go} | 0 plugin/pkg/scheduler/generic_scheduler.go | 5 +- 5 files changed, 102 insertions(+), 5 deletions(-) rename plugin/pkg/scheduler/algorithm/priorities/{spreading_test.go => service_spreading_test.go} (100%) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index e42ea0fa49f..c1cce819f88 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -62,7 +62,8 @@ func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) al cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name) memoryScore := calculateScore(totalMemory, capacityMemory, node.Name) - glog.V(10).Infof( +// glog.V(10).Infof( + glog.Infof( "%v -> %v: Least Requested Priority, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d, %d)", pod.Name, node.Name, totalMilliCPU, totalMemory, @@ -121,9 +122,15 @@ func DumbSpreadingPriority(pod *api.Pod, podLister algorithm.PodLister, minionLi list := algorithm.HostPriorityList{} for _, node := range nodes.Items { npods := int64(len(podsToMachines[node.Name])) + score := calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name) +// glog.V(10).Infof( + glog.Infof( + "%v -> %v: DumbSpreadPriority, Old # pods (%d) Score: (%d)", + pod.Name, node.Name, npods, score, + ) list = append(list, algorithm.HostPriority{ Host: node.Name, - Score: calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name), + Score: score, }) } return list, nil @@ -225,7 +232,8 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap diff := math.Abs(cpuFraction - memoryFraction) score = int(10 - diff*10) } - glog.V(10).Infof( +// glog.V(10).Infof( + glog.Infof( "%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)", pod.Name, node.Name, totalMilliCPU, totalMemory, diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index ab43cecfedb..97c3a942841 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -23,6 +23,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" + "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" ) @@ -38,6 +39,88 @@ func makeMinion(node string, milliCPU, memory int64) api.Node { } } +func TestDumbSpreading(t *testing.T) { + noResources := api.PodSpec{ + Containers: []api.Container{}, + } + small := api.PodSpec{ + NodeName: "machine1", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("100m"), + "memory": resource.MustParse("1000"), + }, + }, + }, + }, + } + large := api.PodSpec{ + NodeName: "machine2", + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": resource.MustParse("600m"), + "memory": resource.MustParse("6000"), + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []api.Node + expectedList algorithm.HostPriorityList + test string + }{ + { + /* Minion1 CPU capacity 1000m, free 700m/7000, 3 pods + LeastRequestedPriority score 7 + BalancedResourceAllocation score 10 + ServiceSpreadingPriority score 10 + DumbSpreadingPriority score 6 + Total: 7 + 10 + 10 + 2*6 = 39 + + Minion2 CPU capacity 1000m, free 400m/4000, 1 pod + LeastRequestedPriority score 4 + BalancedResourceAllocation score 10 + ServiceSpreadingPriority score 10 + DumbSpreadingPriority score 8 + Total: 4 + 10 + 10 + 2*8 = 40 + + Moral of the story: We prefer the machine that is more heavily loaded, + because it has fewer pods. + */ + pod: &api.Pod{Spec: noResources}, + nodes: []api.Node{makeMinion("machine1", 1000, 10000), makeMinion("machine2", 1000, 10000)}, + expectedList: []algorithm.HostPriority{{"machine1", 39}, {"machine2", 40}}, + test: "nothing scheduled, nothing requested", + pods: []*api.Pod { + {Spec: small}, {Spec: small}, {Spec: small}, + {Spec: large}, + }, + }, + } + + for _, test := range tests { + list, err := scheduler.PrioritizeNodes( + test.pod, + algorithm.FakePodLister(test.pods), + []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: DumbSpreadingPriority, Weight: 2}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, + algorithm.FakeMinionLister(api.NodeList{Items: test.nodes})) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expectedList, list) { + t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + } + } +} + + func TestLeastRequested(t *testing.T) { labels1 := map[string]string{ "foo": "bar", diff --git a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go index eaddad66d0d..ff9216e62d6 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go +++ b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go @@ -20,6 +20,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" + "github.com/golang/glog" ) type ServiceSpread struct { @@ -82,6 +83,10 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount)) } result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)}) + // glog.V(10).Infof( + glog.Infof( + "%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore), + ) } return result, nil } diff --git a/plugin/pkg/scheduler/algorithm/priorities/spreading_test.go b/plugin/pkg/scheduler/algorithm/priorities/service_spreading_test.go similarity index 100% rename from plugin/pkg/scheduler/algorithm/priorities/spreading_test.go rename to plugin/pkg/scheduler/algorithm/priorities/service_spreading_test.go diff --git a/plugin/pkg/scheduler/generic_scheduler.go b/plugin/pkg/scheduler/generic_scheduler.go index 6de52d7c6a6..26a8f8cd2f5 100644 --- a/plugin/pkg/scheduler/generic_scheduler.go +++ b/plugin/pkg/scheduler/generic_scheduler.go @@ -71,7 +71,7 @@ func (g *genericScheduler) Schedule(pod *api.Pod, minionLister algorithm.MinionL return "", err } - priorityList, err := prioritizeNodes(pod, g.pods, g.prioritizers, algorithm.FakeMinionLister(filteredNodes)) + priorityList, err := PrioritizeNodes(pod, g.pods, g.prioritizers, algorithm.FakeMinionLister(filteredNodes)) if err != nil { return "", err } @@ -139,7 +139,7 @@ func findNodesThatFit(pod *api.Pod, podLister algorithm.PodLister, predicateFunc // Each priority function can also have its own weight // The minion scores returned by the priority function are multiplied by the weights to get weighted scores // All scores are finally combined (added) to get the total weighted scores of all minions -func prioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfigs []algorithm.PriorityConfig, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) { +func PrioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfigs []algorithm.PriorityConfig, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) { result := algorithm.HostPriorityList{} // If no priority configs are provided, then the EqualPriority function is applied @@ -165,6 +165,7 @@ func prioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfig } } for host, score := range combinedScores { + glog.V(10).Infof("Host %s Score %d", host, score) result = append(result, algorithm.HostPriority{Host: host, Score: score}) } return result, nil From 44ed22906956aa0c6b0596d865f01d6eaa9d1e28 Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Fri, 3 Jul 2015 01:40:00 -0700 Subject: [PATCH 3/6] Foo. --- plugin/pkg/scheduler/algorithm/priorities/priorities_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index 97c3a942841..e4262dc2e6d 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -109,6 +109,9 @@ func TestDumbSpreading(t *testing.T) { list, err := scheduler.PrioritizeNodes( test.pod, algorithm.FakePodLister(test.pods), + // This should match the configuration in defaultPriorities() in + // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want + // to test what's actually in production. []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: DumbSpreadingPriority, Weight: 2}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, algorithm.FakeMinionLister(api.NodeList{Items: test.nodes})) if err != nil { From 4ea8b8a66d879c3b02e26dea6fbb755bdaf04295 Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Sun, 5 Jul 2015 11:39:35 -0700 Subject: [PATCH 4/6] Get rid of separate DumbSpreading function and just treat zero-limit pods as having a constant non-zero memory and CPU limit. --- .../algorithm/priorities/priorities.go | 107 ++++++++---------- .../algorithm/priorities/priorities_test.go | 86 ++++++++------ .../algorithm/priorities/service_spreading.go | 3 +- .../algorithmprovider/defaults/defaults.go | 2 - 4 files changed, 102 insertions(+), 96 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index c1cce819f88..c6f7bc62ce8 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -21,6 +21,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates" "github.com/golang/glog" @@ -28,42 +29,72 @@ import ( // the unused capacity is calculated on a scale of 0-10 // 0 being the lowest priority and 10 being the highest -func calculateScore(requested, capacity int64, node string) int { +func calculateScore(requested int64, capacity int64, node string) int { if capacity == 0 { return 0 } if requested > capacity { - glog.Infof("Combined requested resources from existing pods exceeds capacity on minion: %s", node) + glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on minion: %s", + requested, capacity, node) return 0 } return int(((capacity - requested) * 10) / capacity) } +// For each of these resources, a pod that doesn't request the resource explicitly +// will be treated as having requested the amount indicated below, for the purpose +// of computing priority only. This ensures that when scheduling zero-limit pods, such +// pods will not all be scheduled to the machine with the smallest in-use limit, +// and that when scheduling regular pods, such pods will not see zero-limit pods as +// consuming no resources whatsoever. +const defaultMilliCpuLimit int64 = 100 // 0.1 core +const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB + +// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList" +// as an additional argument here) rather than using constants +func toNonzeroLimits(limits *api.ResourceList) (int64, int64) { + var out_millicpu, out_memory int64 + // Override if un-set, but not if explicitly set to zero + if (*limits.Cpu() == resource.Quantity{}) { + out_millicpu = defaultMilliCpuLimit + } else { + out_millicpu = limits.Cpu().MilliValue() + } + // Override if un-set, but not if explicitly set to zero + if (*limits.Memory() == resource.Quantity{}) { + out_memory = defaultMemoryLimit + } else { + out_memory = limits.Memory().Value() + } + return out_millicpu, out_memory +} + // Calculate the resource occupancy on a node. 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority { totalMilliCPU := int64(0) totalMemory := int64(0) + capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue() + capacityMemory := node.Status.Capacity.Memory().Value() + for _, existingPod := range pods { for _, container := range existingPod.Spec.Containers { - totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() - totalMemory += container.Resources.Limits.Memory().Value() + cpu, memory := toNonzeroLimits(&container.Resources.Limits) + totalMilliCPU += cpu + totalMemory += memory } } // Add the resources requested by the current pod being scheduled. // This also helps differentiate between differently sized, but empty, minions. for _, container := range pod.Spec.Containers { - totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() - totalMemory += container.Resources.Limits.Memory().Value() + cpu, memory := toNonzeroLimits(&container.Resources.Limits) + totalMilliCPU += cpu + totalMemory += memory } - capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue() - capacityMemory := node.Status.Capacity.Memory().Value() - cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name) memoryScore := calculateScore(totalMemory, capacityMemory, node.Name) -// glog.V(10).Infof( - glog.Infof( + glog.V(10).Infof( "%v -> %v: Least Requested Priority, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d, %d)", pod.Name, node.Name, totalMilliCPU, totalMemory, @@ -95,47 +126,6 @@ func LeastRequestedPriority(pod *api.Pod, podLister algorithm.PodLister, minionL return list, nil } -func min(l, r int64) (m int64) { - m = r - if l < r { - m = l - } - return m -} - -// See comment for DumbSpreadingPriority() -const dumbSpreadingDenominator int64 = 10 - -// DumbSpreadingPriority is a priority function that favors nodes with fewer pods. -// It works like LeastRequestedPeriority but instead of using 10 * percentage of machine free by resource, -// it uses 10 * percentage of machine free by pod, with "percentage of machine free by pod" claculated as -// (dumbSpreadingDenominator - number of pods already on the node + 1) / dumbSpreadingDenominator. -// dumbSpreadingDenominator serves like the machine capacity in LeasRequestedPriority but is chosen -// so that we equate one pod with a reasonable amount of resources when we combine all the scores together. -func DumbSpreadingPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) { - nodes, err := minionLister.List() - if err != nil { - return algorithm.HostPriorityList{}, err - } - podsToMachines, err := predicates.MapPodsToMachines(podLister) - - list := algorithm.HostPriorityList{} - for _, node := range nodes.Items { - npods := int64(len(podsToMachines[node.Name])) - score := calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name) -// glog.V(10).Infof( - glog.Infof( - "%v -> %v: DumbSpreadPriority, Old # pods (%d) Score: (%d)", - pod.Name, node.Name, npods, score, - ) - list = append(list, algorithm.HostPriority{ - Host: node.Name, - Score: score, - }) - } - return list, nil -} - type NodeLabelPrioritizer struct { label string presence bool @@ -205,15 +195,17 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap score := int(0) for _, existingPod := range pods { for _, container := range existingPod.Spec.Containers { - totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() - totalMemory += container.Resources.Limits.Memory().Value() + cpu, memory := toNonzeroLimits(&container.Resources.Limits) + totalMilliCPU += cpu + totalMemory += memory } } // Add the resources requested by the current pod being scheduled. // This also helps differentiate between differently sized, but empty, minions. for _, container := range pod.Spec.Containers { - totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() - totalMemory += container.Resources.Limits.Memory().Value() + cpu, memory := toNonzeroLimits(&container.Resources.Limits) + totalMilliCPU += cpu + totalMemory += memory } capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue() @@ -232,8 +224,7 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap diff := math.Abs(cpuFraction - memoryFraction) score = int(10 - diff*10) } -// glog.V(10).Infof( - glog.Infof( + glog.V(10).Infof( "%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)", pod.Name, node.Name, totalMilliCPU, totalMemory, diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index e4262dc2e6d..4662786fedb 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -19,6 +19,7 @@ package priorities import ( "reflect" "sort" + "strconv" "testing" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" @@ -39,72 +40,83 @@ func makeMinion(node string, milliCPU, memory int64) api.Node { } } -func TestDumbSpreading(t *testing.T) { +func TestZeroLimit(t *testing.T) { + // A pod with no resources. We expect spreading to count it as having the default resources. noResources := api.PodSpec{ - Containers: []api.Container{}, + Containers: []api.Container{ + {}, + }, } + noResources1 := noResources + noResources1.NodeName = "machine1" + // A pod with the same resources as a 0-limit pod gets by default as its resources (for spreading). small := api.PodSpec{ - NodeName: "machine1", Containers: []api.Container{ { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("100m"), - "memory": resource.MustParse("1000"), + "cpu": resource.MustParse( + strconv.FormatInt(defaultMilliCpuLimit, 10) + "m"), + "memory": resource.MustParse( + strconv.FormatInt(defaultMemoryLimit, 10)), }, }, }, }, } + small2 := small + small2.NodeName = "machine2" + // A larger pod. large := api.PodSpec{ - NodeName: "machine2", Containers: []api.Container{ { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("600m"), - "memory": resource.MustParse("6000"), + "cpu": resource.MustParse( + strconv.FormatInt(defaultMilliCpuLimit * 3, 10) + "m"), + "memory": resource.MustParse( + strconv.FormatInt(defaultMemoryLimit * 3, 10)), }, }, }, }, } + large1 := large + large1.NodeName = "machine1" + large2 := large + large2.NodeName = "machine2" tests := []struct { pod *api.Pod pods []*api.Pod nodes []api.Node - expectedList algorithm.HostPriorityList test string }{ + // The point of these tests is to show you get the same priority for a zero-limit pod + // as for a pod with the defaults limits, both when the zero-limit pod is already on the machine + // and when the zero-limit pod is the one being scheduled. { - /* Minion1 CPU capacity 1000m, free 700m/7000, 3 pods - LeastRequestedPriority score 7 - BalancedResourceAllocation score 10 - ServiceSpreadingPriority score 10 - DumbSpreadingPriority score 6 - Total: 7 + 10 + 10 + 2*6 = 39 - - Minion2 CPU capacity 1000m, free 400m/4000, 1 pod - LeastRequestedPriority score 4 - BalancedResourceAllocation score 10 - ServiceSpreadingPriority score 10 - DumbSpreadingPriority score 8 - Total: 4 + 10 + 10 + 2*8 = 40 - - Moral of the story: We prefer the machine that is more heavily loaded, - because it has fewer pods. - */ pod: &api.Pod{Spec: noResources}, - nodes: []api.Node{makeMinion("machine1", 1000, 10000), makeMinion("machine2", 1000, 10000)}, - expectedList: []algorithm.HostPriority{{"machine1", 39}, {"machine2", 40}}, - test: "nothing scheduled, nothing requested", + // match current f1-micro on GCE + nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)}, + test: "test priority of zero-limit pod with machine with zero-limit pod", pods: []*api.Pod { - {Spec: small}, {Spec: small}, {Spec: small}, - {Spec: large}, + {Spec: large1}, {Spec: noResources1}, + {Spec: large2}, {Spec: small2}, + }, + }, + { + pod: &api.Pod{Spec: small}, + // match current f1-micro on GCE + nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)}, + test: "test priority of nonzero-limit pod with machine with zero-limit pod", + pods: []*api.Pod { + {Spec: large1}, {Spec: noResources1}, + {Spec: large2}, {Spec: small2}, }, }, } + const expectedPriority int = 25 for _, test := range tests { list, err := scheduler.PrioritizeNodes( test.pod, @@ -112,13 +124,15 @@ func TestDumbSpreading(t *testing.T) { // This should match the configuration in defaultPriorities() in // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want // to test what's actually in production. - []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: DumbSpreadingPriority, Weight: 2}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, + []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, algorithm.FakeMinionLister(api.NodeList{Items: test.nodes})) if err != nil { t.Errorf("unexpected error: %v", err) } - if !reflect.DeepEqual(test.expectedList, list) { - t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) + for _, hp := range list { + if (hp.Score != expectedPriority) { + t.Errorf("%s: expected 25 for all priorities, got list %#v", list) + } } } } @@ -149,6 +163,7 @@ func TestLeastRequested(t *testing.T) { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("0"), }, }, }, @@ -156,6 +171,7 @@ func TestLeastRequested(t *testing.T) { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("0"), }, }, }, @@ -479,6 +495,7 @@ func TestBalancedResourceAllocation(t *testing.T) { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ "cpu": resource.MustParse("1000m"), + "memory": resource.MustParse("0"), }, }, }, @@ -486,6 +503,7 @@ func TestBalancedResourceAllocation(t *testing.T) { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ "cpu": resource.MustParse("2000m"), + "memory": resource.MustParse("0"), }, }, }, diff --git a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go index ff9216e62d6..3435b4e7cba 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go +++ b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go @@ -83,8 +83,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount)) } result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)}) - // glog.V(10).Infof( - glog.Infof( + glog.V(10).Infof( "%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore), ) } diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go index 5863b62b4c7..1546412b78a 100644 --- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -65,8 +65,6 @@ func defaultPriorities() util.StringSet { factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1), // Prioritizes nodes to help achieve balanced resource usage factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1), - // Prioritizes nodes to achieve approximately equal number of pods per node - factory.RegisterPriorityFunction("DumbSpreadingPriority", priorities.DumbSpreadingPriority, 2), // spreads pods by minimizing the number of pods (belonging to the same service) on the same minion. factory.RegisterPriorityConfigFactory( "ServiceSpreadingPriority", From 9fbccb4ff79aa12855dd84299eb2a3916537456b Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Sun, 5 Jul 2015 15:41:52 -0700 Subject: [PATCH 5/6] Respond to review comments. --- .../pkg/scheduler/algorithm/priorities/priorities.go | 12 ++++++------ .../algorithm/priorities/service_spreading.go | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index c6f7bc62ce8..1dffbbcc8a2 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -34,7 +34,7 @@ func calculateScore(requested int64, capacity int64, node string) int { return 0 } if requested > capacity { - glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on minion: %s", + glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s", requested, capacity, node) return 0 } @@ -52,7 +52,7 @@ const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB // TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList" // as an additional argument here) rather than using constants -func toNonzeroLimits(limits *api.ResourceList) (int64, int64) { +func getNonzeroLimits(limits *api.ResourceList) (int64, int64) { var out_millicpu, out_memory int64 // Override if un-set, but not if explicitly set to zero if (*limits.Cpu() == resource.Quantity{}) { @@ -79,7 +79,7 @@ func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) al for _, existingPod := range pods { for _, container := range existingPod.Spec.Containers { - cpu, memory := toNonzeroLimits(&container.Resources.Limits) + cpu, memory := getNonzeroLimits(&container.Resources.Limits) totalMilliCPU += cpu totalMemory += memory } @@ -87,7 +87,7 @@ func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) al // Add the resources requested by the current pod being scheduled. // This also helps differentiate between differently sized, but empty, minions. for _, container := range pod.Spec.Containers { - cpu, memory := toNonzeroLimits(&container.Resources.Limits) + cpu, memory := getNonzeroLimits(&container.Resources.Limits) totalMilliCPU += cpu totalMemory += memory } @@ -195,7 +195,7 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap score := int(0) for _, existingPod := range pods { for _, container := range existingPod.Spec.Containers { - cpu, memory := toNonzeroLimits(&container.Resources.Limits) + cpu, memory := getNonzeroLimits(&container.Resources.Limits) totalMilliCPU += cpu totalMemory += memory } @@ -203,7 +203,7 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap // Add the resources requested by the current pod being scheduled. // This also helps differentiate between differently sized, but empty, minions. for _, container := range pod.Spec.Containers { - cpu, memory := toNonzeroLimits(&container.Resources.Limits) + cpu, memory := getNonzeroLimits(&container.Resources.Limits) totalMilliCPU += cpu totalMemory += memory } diff --git a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go index 3435b4e7cba..663f638ea19 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go +++ b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go @@ -84,7 +84,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith } result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)}) glog.V(10).Infof( - "%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore), + "%v -> %v: ServiceSpreadPriority, Score: (%d)", pod.Name, minion.Name, int(fScore), ) } return result, nil From 2e3f2ea20bcca76cb37c95363aa597d6757aa543 Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Sun, 5 Jul 2015 21:31:54 -0700 Subject: [PATCH 6/6] gofmt --- .../algorithm/priorities/priorities.go | 6 +-- .../algorithm/priorities/priorities_test.go | 39 +++++++++---------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index 1dffbbcc8a2..4a4e333131a 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -20,8 +20,8 @@ import ( "math" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" - "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" + "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates" "github.com/golang/glog" @@ -47,8 +47,8 @@ func calculateScore(requested int64, capacity int64, node string) int { // pods will not all be scheduled to the machine with the smallest in-use limit, // and that when scheduling regular pods, such pods will not see zero-limit pods as // consuming no resources whatsoever. -const defaultMilliCpuLimit int64 = 100 // 0.1 core -const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB +const defaultMilliCpuLimit int64 = 100 // 0.1 core +const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB // TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList" // as an additional argument here) rather than using constants diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index 4662786fedb..b6af3446f29 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -73,9 +73,9 @@ func TestZeroLimit(t *testing.T) { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ "cpu": resource.MustParse( - strconv.FormatInt(defaultMilliCpuLimit * 3, 10) + "m"), + strconv.FormatInt(defaultMilliCpuLimit*3, 10) + "m"), "memory": resource.MustParse( - strconv.FormatInt(defaultMemoryLimit * 3, 10)), + strconv.FormatInt(defaultMemoryLimit*3, 10)), }, }, }, @@ -86,30 +86,30 @@ func TestZeroLimit(t *testing.T) { large2 := large large2.NodeName = "machine2" tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []api.Node - test string + pod *api.Pod + pods []*api.Pod + nodes []api.Node + test string }{ // The point of these tests is to show you get the same priority for a zero-limit pod // as for a pod with the defaults limits, both when the zero-limit pod is already on the machine // and when the zero-limit pod is the one being scheduled. { - pod: &api.Pod{Spec: noResources}, + pod: &api.Pod{Spec: noResources}, // match current f1-micro on GCE - nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)}, - test: "test priority of zero-limit pod with machine with zero-limit pod", - pods: []*api.Pod { + nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit*10), makeMinion("machine2", 1000, defaultMemoryLimit*10)}, + test: "test priority of zero-limit pod with machine with zero-limit pod", + pods: []*api.Pod{ {Spec: large1}, {Spec: noResources1}, {Spec: large2}, {Spec: small2}, }, }, { - pod: &api.Pod{Spec: small}, + pod: &api.Pod{Spec: small}, // match current f1-micro on GCE - nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)}, - test: "test priority of nonzero-limit pod with machine with zero-limit pod", - pods: []*api.Pod { + nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit*10), makeMinion("machine2", 1000, defaultMemoryLimit*10)}, + test: "test priority of nonzero-limit pod with machine with zero-limit pod", + pods: []*api.Pod{ {Spec: large1}, {Spec: noResources1}, {Spec: large2}, {Spec: small2}, }, @@ -130,14 +130,13 @@ func TestZeroLimit(t *testing.T) { t.Errorf("unexpected error: %v", err) } for _, hp := range list { - if (hp.Score != expectedPriority) { + if hp.Score != expectedPriority { t.Errorf("%s: expected 25 for all priorities, got list %#v", list) } } } } - func TestLeastRequested(t *testing.T) { labels1 := map[string]string{ "foo": "bar", @@ -162,7 +161,7 @@ func TestLeastRequested(t *testing.T) { { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("1000m"), + "cpu": resource.MustParse("1000m"), "memory": resource.MustParse("0"), }, }, @@ -170,7 +169,7 @@ func TestLeastRequested(t *testing.T) { { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("2000m"), + "cpu": resource.MustParse("2000m"), "memory": resource.MustParse("0"), }, }, @@ -494,7 +493,7 @@ func TestBalancedResourceAllocation(t *testing.T) { { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("1000m"), + "cpu": resource.MustParse("1000m"), "memory": resource.MustParse("0"), }, }, @@ -502,7 +501,7 @@ func TestBalancedResourceAllocation(t *testing.T) { { Resources: api.ResourceRequirements{ Limits: api.ResourceList{ - "cpu": resource.MustParse("2000m"), + "cpu": resource.MustParse("2000m"), "memory": resource.MustParse("0"), }, },