Get rid of separate DumbSpreading function and just treat zero-limit

pods as having a constant non-zero memory and CPU limit.
2025-08-17 15:50:10 +00:00 · 2015-07-05 11:39:35 -07:00 · 2015-07-05 11:39:35 -07:00 · 4ea8b8a66d
commit 4ea8b8a66d
parent 44ed229069
4 changed files with 102 additions and 96 deletions
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go
@ -21,6 +21,7 @@ import (
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
 	"github.com/golang/glog"
@ -28,42 +29,72 @@ import (
 // the unused capacity is calculated on a scale of 0-10
 // 0 being the lowest priority and 10 being the highest
-func calculateScore(requested, capacity int64, node string) int {
+func calculateScore(requested int64, capacity int64, node string) int {
 	if capacity == 0 {
 		return 0
 	}
 	if requested > capacity {
-		glog.Infof("Combined requested resources from existing pods exceeds capacity on minion: %s", node)
+		glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on minion: %s",
 			requested, capacity, node)
 		return 0
 	}
 	return int(((capacity - requested) * 10) / capacity)
 }
 // For each of these resources, a pod that doesn't request the resource explicitly
 // will be treated as having requested the amount indicated below, for the purpose
 // of computing priority only. This ensures that when scheduling zero-limit pods, such
 // pods will not all be scheduled to the machine with the smallest in-use limit,
 // and that when scheduling regular pods, such pods will not see zero-limit pods as
 // consuming no resources whatsoever.
 const defaultMilliCpuLimit int64 = 100  // 0.1 core
 const defaultMemoryLimit int64 = 60 * 1024 * 1024  // 60 MB
 // TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList"
 // as an additional argument here) rather than using constants
 func toNonzeroLimits(limits *api.ResourceList) (int64, int64) {
 	var out_millicpu, out_memory int64
 	// Override if un-set, but not if explicitly set to zero
 	if (*limits.Cpu() == resource.Quantity{}) {
 		out_millicpu = defaultMilliCpuLimit
 	} else {
 		out_millicpu = limits.Cpu().MilliValue()
 	}
 	// Override if un-set, but not if explicitly set to zero
 	if (*limits.Memory() == resource.Quantity{}) {
 		out_memory = defaultMemoryLimit
 	} else {
 		out_memory = limits.Memory().Value()
 	}
 	return out_millicpu, out_memory
 }
 // Calculate the resource occupancy on a node.  'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
 func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority {
 	totalMilliCPU := int64(0)
 	totalMemory := int64(0)
 	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
 	capacityMemory := node.Status.Capacity.Memory().Value()
 	for _, existingPod := range pods {
 		for _, container := range existingPod.Spec.Containers {
-			totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+			cpu, memory := toNonzeroLimits(&container.Resources.Limits)
-			totalMemory += container.Resources.Limits.Memory().Value()
+			totalMilliCPU += cpu
 			totalMemory += memory
 		}
 	}
 	// Add the resources requested by the current pod being scheduled.
 	// This also helps differentiate between differently sized, but empty, minions.
 	for _, container := range pod.Spec.Containers {
-		totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+		cpu, memory := toNonzeroLimits(&container.Resources.Limits)
-		totalMemory += container.Resources.Limits.Memory().Value()
+		totalMilliCPU += cpu
 		totalMemory += memory
 	}
 	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
 	capacityMemory := node.Status.Capacity.Memory().Value()
 	cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name)
 	memoryScore := calculateScore(totalMemory, capacityMemory, node.Name)
-//	glog.V(10).Infof(
+	glog.V(10).Infof(
 	glog.Infof(
 		"%v -> %v: Least Requested Priority, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d, %d)",
 		pod.Name, node.Name,
 		totalMilliCPU, totalMemory,
@ -95,47 +126,6 @@ func LeastRequestedPriority(pod *api.Pod, podLister algorithm.PodLister, minionL
 	return list, nil
 }
 func min(l, r int64) (m int64) {
 	m = r
 	if l < r {
 		m = l
 	}
 	return m
 }
 // See comment for DumbSpreadingPriority()
 const dumbSpreadingDenominator int64 = 10
 // DumbSpreadingPriority is a priority function that favors nodes with fewer pods.
 // It works like LeastRequestedPeriority but instead of using 10 * percentage of machine free by resource,
 // it uses 10 * percentage of machine free by pod, with "percentage of machine free by pod" claculated as
 // (dumbSpreadingDenominator - number of pods already on the node + 1) / dumbSpreadingDenominator.
 // dumbSpreadingDenominator serves like the machine capacity in LeasRequestedPriority but is chosen
 // so that we equate one pod with a reasonable amount of resources when we combine all the scores together.
 func DumbSpreadingPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) {
 	nodes, err := minionLister.List()
 	if err != nil {
 		return algorithm.HostPriorityList{}, err
 	}
 	podsToMachines, err := predicates.MapPodsToMachines(podLister)
 	list := algorithm.HostPriorityList{}
 	for _, node := range nodes.Items {
 		npods := int64(len(podsToMachines[node.Name]))
 		score := calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name)
 //		glog.V(10).Infof(
 		glog.Infof(
 			"%v -> %v: DumbSpreadPriority, Old # pods (%d) Score: (%d)",
 			pod.Name, node.Name, npods, score,
 		)
 		list = append(list, algorithm.HostPriority{
 			Host:  node.Name,
 			Score: score,
 		})
 	}
 	return list, nil
 }
 type NodeLabelPrioritizer struct {
 	label    string
 	presence bool
@ -205,15 +195,17 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
 	score := int(0)
 	for _, existingPod := range pods {
 		for _, container := range existingPod.Spec.Containers {
-			totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+			cpu, memory := toNonzeroLimits(&container.Resources.Limits)
-			totalMemory += container.Resources.Limits.Memory().Value()
+			totalMilliCPU += cpu
 			totalMemory += memory
 		}
 	}
 	// Add the resources requested by the current pod being scheduled.
 	// This also helps differentiate between differently sized, but empty, minions.
 	for _, container := range pod.Spec.Containers {
-		totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
+		cpu, memory := toNonzeroLimits(&container.Resources.Limits)
-		totalMemory += container.Resources.Limits.Memory().Value()
+		totalMilliCPU += cpu
 		totalMemory += memory
 	}
 	capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
@ -232,8 +224,7 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
 		diff := math.Abs(cpuFraction - memoryFraction)
 		score = int(10 - diff*10)
 	}
-//	glog.V(10).Infof(
+	glog.V(10).Infof(
 	glog.Infof(
 		"%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)",
 		pod.Name, node.Name,
 		totalMilliCPU, totalMemory,
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
@ -19,6 +19,7 @@ package priorities
 import (
 	"reflect"
 	"sort"
 	"strconv"
 	"testing"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
@ -39,72 +40,83 @@ func makeMinion(node string, milliCPU, memory int64) api.Node {
 	}
 }
-func TestDumbSpreading(t *testing.T) {
+func TestZeroLimit(t *testing.T) {
 	// A pod with no resources. We expect spreading to count it as having the default resources.
 	noResources := api.PodSpec{
-		Containers: []api.Container{},
+		Containers: []api.Container{
 			{},
 		},
 	}
 	noResources1 := noResources
 	noResources1.NodeName = "machine1"
 	// A pod with the same resources as a 0-limit pod gets by default as its resources (for spreading).
 	small := api.PodSpec{
 		NodeName: "machine1",
 		Containers: []api.Container{
 			{
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
-						"cpu": resource.MustParse("100m"),
+						"cpu": resource.MustParse(
-						"memory": resource.MustParse("1000"),
+							strconv.FormatInt(defaultMilliCpuLimit, 10) + "m"),
 						"memory": resource.MustParse(
 							strconv.FormatInt(defaultMemoryLimit, 10)),
 					},
 				},
 			},
 		},
 	}
 	small2 := small
 	small2.NodeName = "machine2"
 	// A larger pod.
 	large := api.PodSpec{
 		NodeName: "machine2",
 		Containers: []api.Container{
 			{
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
-						"cpu": resource.MustParse("600m"),
+						"cpu": resource.MustParse(
-						"memory": resource.MustParse("6000"),
+							strconv.FormatInt(defaultMilliCpuLimit * 3, 10) + "m"),
 						"memory": resource.MustParse(
 							strconv.FormatInt(defaultMemoryLimit * 3, 10)),
 					},
 				},
 			},
 		},
 	}
 	large1 := large
 	large1.NodeName = "machine1"
 	large2 := large
 	large2.NodeName = "machine2"
 	tests := []struct {
 		pod          *api.Pod
 		pods         []*api.Pod
 		nodes        []api.Node
 		expectedList algorithm.HostPriorityList
 		test         string
 	}{
 		// The point of these tests is to show you get the same priority for a zero-limit pod
 		// as for a pod with the defaults limits, both when the zero-limit pod is already on the machine
 		// and when the zero-limit pod is the one being scheduled.
 		{
 			/* Minion1 CPU capacity 1000m, free 700m/7000, 3 pods
                           LeastRequestedPriority score 7
                           BalancedResourceAllocation score 10
                           ServiceSpreadingPriority score 10
                           DumbSpreadingPriority score 6
                           Total: 7 + 10 + 10 + 2*6 = 39
                           Minion2 CPU capacity 1000m, free 400m/4000, 1 pod
                           LeastRequestedPriority score 4
                           BalancedResourceAllocation score 10
                           ServiceSpreadingPriority score 10 
                           DumbSpreadingPriority score 8
                           Total: 4 + 10 + 10 + 2*8 = 40
                           Moral of the story: We prefer the machine that is more heavily loaded,
                           because it has fewer pods.
                         */
 			pod:          &api.Pod{Spec: noResources},
-			nodes:        []api.Node{makeMinion("machine1", 1000, 10000), makeMinion("machine2", 1000, 10000)},
+			// match current f1-micro on GCE
-			expectedList: []algorithm.HostPriority{{"machine1", 39}, {"machine2", 40}},
+			nodes:        []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)},
-			test:         "nothing scheduled, nothing requested",
+			test:         "test priority of zero-limit pod with machine with zero-limit pod",
 			pods: []*api.Pod {
-				{Spec: small}, {Spec: small}, {Spec: small},
+				{Spec: large1}, {Spec: noResources1},
-				{Spec: large},
+				{Spec: large2}, {Spec: small2},
 			},
 		},
 		{
 			pod:          &api.Pod{Spec: small},
 			// match current f1-micro on GCE
 			nodes:        []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)},
 			test:         "test priority of nonzero-limit pod with machine with zero-limit pod",
 			pods: []*api.Pod {
 				{Spec: large1}, {Spec: noResources1},
 				{Spec: large2}, {Spec: small2},
 			},
 		},
 	}
 	const expectedPriority int = 25
 	for _, test := range tests {
 		list, err := scheduler.PrioritizeNodes(
 			test.pod,
@ -112,13 +124,15 @@ func TestDumbSpreading(t *testing.T) {
 			// This should match the configuration in defaultPriorities() in
 			// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
 			// to test what's actually in production.
-			[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: DumbSpreadingPriority, Weight: 2}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}},
+			[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}},
 			algorithm.FakeMinionLister(api.NodeList{Items: test.nodes}))
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
-		if !reflect.DeepEqual(test.expectedList, list) {
+		for _, hp := range list {
-			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
+			if (hp.Score != expectedPriority) {
 				t.Errorf("%s: expected 25 for all priorities, got list %#v", list)
 			}
 		}
 	}
 }
@ -149,6 +163,7 @@ func TestLeastRequested(t *testing.T) {
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
 						"cpu": resource.MustParse("1000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
@ -156,6 +171,7 @@ func TestLeastRequested(t *testing.T) {
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
 						"cpu": resource.MustParse("2000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
@ -479,6 +495,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
 						"cpu": resource.MustParse("1000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
@ -486,6 +503,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
 				Resources: api.ResourceRequirements{
 					Limits: api.ResourceList{
 						"cpu": resource.MustParse("2000m"),
 						"memory": resource.MustParse("0"),
 					},
 				},
 			},
--- a/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/service_spreading.go
@ -83,8 +83,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
 			fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount))
 		}
 		result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)})
-		// glog.V(10).Infof(
+		glog.V(10).Infof(
 		glog.Infof(
 			"%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore),
 		)
 	}
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@ -65,8 +65,6 @@ func defaultPriorities() util.StringSet {
 		factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1),
 		// Prioritizes nodes to help achieve balanced resource usage
 		factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
 		// Prioritizes nodes to achieve approximately equal number of pods per node
 		factory.RegisterPriorityFunction("DumbSpreadingPriority", priorities.DumbSpreadingPriority, 2),
 		// spreads pods by minimizing the number of pods (belonging to the same service) on the same minion.
 		factory.RegisterPriorityConfigFactory(
 			"ServiceSpreadingPriority",