From d17cebcd2a0bf676fe99dbfbaa1182720cb0adca Mon Sep 17 00:00:00 2001
From: Abhishek Gupta <abhgupta@redhat.com>
Date: Mon, 8 Dec 2014 12:51:05 -0800
Subject: [PATCH] Implementing PR feedback and adding test cases

---
 pkg/scheduler/generic_scheduler.go      |  12 +-
 pkg/scheduler/priorities.go             |  24 ++--
 pkg/scheduler/priorities_test.go        | 139 ++++++++++++++++++++++--
 pkg/scheduler/spreading.go              |  16 +--
 plugin/pkg/scheduler/factory/factory.go |   5 +-
 5 files changed, 160 insertions(+), 36 deletions(-)

diff --git a/pkg/scheduler/generic_scheduler.go b/pkg/scheduler/generic_scheduler.go
index 93174d139b9..3345886e4a5 100644
--- a/pkg/scheduler/generic_scheduler.go
+++ b/pkg/scheduler/generic_scheduler.go
@@ -66,7 +66,7 @@ func (g *genericScheduler) selectHost(priorityList HostPriorityList) (string, er
 	}
 	sort.Sort(sort.Reverse(priorityList))
 
-	hosts := getMinHosts(priorityList)
+	hosts := getBestHosts(priorityList)
 	g.randomLock.Lock()
 	defer g.randomLock.Unlock()
 
@@ -107,16 +107,16 @@ func findNodesThatFit(pod api.Pod, podLister PodLister, predicates []FitPredicat
 // Each priority function can also have its own weight
 // The minion scores returned by the priority function are multiplied by the weights to get weighted scores
 // All scores are finally combined (added) to get the total weighted scores of all minions
-func prioritizeNodes(pod api.Pod, podLister PodLister, priorities []PriorityConfig, minionLister MinionLister) (HostPriorityList, error) {
+func prioritizeNodes(pod api.Pod, podLister PodLister, priorityConfigs []PriorityConfig, minionLister MinionLister) (HostPriorityList, error) {
 	result := HostPriorityList{}
 	combinedScores := map[string]int{}
-	for _, priority := range priorities {
-		weight := priority.Weight
+	for _, priorityConfig := range priorityConfigs {
+		weight := priorityConfig.Weight
 		// skip the priority function if the weight is specified as 0
 		if weight == 0 {
 			continue
 		}
-		priorityFunc := priority.Function
+		priorityFunc := priorityConfig.Function
 		prioritizedList, err := priorityFunc(pod, podLister, minionLister)
 		if err != nil {
 			return HostPriorityList{}, err
@@ -131,7 +131,7 @@ func prioritizeNodes(pod api.Pod, podLister PodLister, priorities []PriorityConf
 	return result, nil
 }
 
-func getMinHosts(list HostPriorityList) []string {
+func getBestHosts(list HostPriorityList) []string {
 	result := []string{}
 	for _, hostEntry := range list {
 		if hostEntry.score == list[0].score {
diff --git a/pkg/scheduler/priorities.go b/pkg/scheduler/priorities.go
index 25b41aa6a15..34a5bb651e2 100644
--- a/pkg/scheduler/priorities.go
+++ b/pkg/scheduler/priorities.go
@@ -24,27 +24,37 @@ import (
 
 // the unused capacity is calculated on a scale of 0-10
 // 0 being the lowest priority and 10 being the highest
-func calculateScore(requested, capacity int) int {
+func calculateScore(requested, capacity int, node string) int {
 	if capacity == 0 {
 		return 0
 	}
+	if requested > capacity {
+		glog.Errorf("Combined requested resources from existing pods exceeds capacity on minion: %s", node)
+		return 0
+	}
 	return ((capacity - requested) * 10) / capacity
 }
 
 // Calculate the occupancy on a node.  'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
-func calculateOccupancy(node api.Minion, pods []api.Pod) HostPriority {
+func calculateOccupancy(pod api.Pod, node api.Minion, pods []api.Pod) HostPriority {
 	totalCPU := 0
 	totalMemory := 0
-	for _, pod := range pods {
-		for _, container := range pod.Spec.Containers {
+	for _, existingPod := range pods {
+		for _, container := range existingPod.Spec.Containers {
 			totalCPU += container.CPU
 			totalMemory += container.Memory
 		}
 	}
+	// Add the resources requested by the current pod being scheduled.
+	// This also helps differentiate between differently sized, but empty, minions.
+	for _, container := range pod.Spec.Containers {
+		totalCPU += container.CPU
+		totalMemory += container.Memory
+	}
 
-	cpuScore := calculateScore(totalCPU, resources.GetIntegerResource(node.Spec.Capacity, resources.CPU, 0))
-	memoryScore := calculateScore(totalMemory, resources.GetIntegerResource(node.Spec.Capacity, resources.Memory, 0))
+	cpuScore := calculateScore(totalCPU, resources.GetIntegerResource(node.Spec.Capacity, resources.CPU, 0), node.Name)
+	memoryScore := calculateScore(totalMemory, resources.GetIntegerResource(node.Spec.Capacity, resources.Memory, 0), node.Name)
 	glog.V(4).Infof("Least Requested Priority, AbsoluteRequested: (%d, %d) Score:(%d, %d)", totalCPU, totalMemory, cpuScore, memoryScore)
 
 	return HostPriority{
@@ -66,7 +76,7 @@ func LeastRequestedPriority(pod api.Pod, podLister PodLister, minionLister Minio
 
 	list := HostPriorityList{}
 	for _, node := range nodes.Items {
-		list = append(list, calculateOccupancy(node, podsToMachines[node.Name]))
+		list = append(list, calculateOccupancy(pod, node, podsToMachines[node.Name]))
 	}
 	return list, nil
 }
diff --git a/pkg/scheduler/priorities_test.go b/pkg/scheduler/priorities_test.go
index 976da176445..e8ebd7c5e86 100644
--- a/pkg/scheduler/priorities_test.go
+++ b/pkg/scheduler/priorities_test.go
@@ -52,12 +52,14 @@ func TestLeastRequested(t *testing.T) {
 	machine2Status := api.PodStatus{
 		Host: "machine2",
 	}
+	noResources := api.PodSpec{
+		Containers: []api.Container{},
+	}
 	cpuOnly := api.PodSpec{
 		Containers: []api.Container{
 			{CPU: 1000},
 			{CPU: 2000},
 		},
-		//	Host: "machine1",
 	}
 	cpuAndMemory := api.PodSpec{
 		Containers: []api.Container{
@@ -73,14 +75,55 @@ func TestLeastRequested(t *testing.T) {
 		test         string
 	}{
 		{
+			/*
+				Minion1 scores (remaining resources) on 0-10 scale
+				CPU Score: (4000 - 0) / 4000 = 10
+				Memory Score: (10000 - 0) / 10000 = 10
+				Minion1 Score: (10 + 10) / 2 = 10
+
+				Minion2 scores (remaining resources) on 0-10 scale
+				CPU Score: (4000 - 0) / 4000 = 10
+				Memory Score: (10000 - 0) / 10000 = 10
+				Minion2 Score: (10 + 10) / 2 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
 			nodes:        []api.Minion{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
 			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
-			test:         "nothing scheduled",
+			test:         "nothing scheduled, nothing requested",
 		},
 		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (4000 - 3000) / 4000 = 2.5
+				Memory Score: (10000 - 5000) / 10000 = 5
+				Minion1 Score: (2.5 + 5) / 2 = 3
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (6000 - 3000) / 6000 = 5
+				Memory Score: (10000 - 5000) / 10000 = 5
+				Minion2 Score: (5 + 5) / 2 = 5
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Minion{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 6000, 10000)},
+			expectedList: []HostPriority{{"machine1", 3}, {"machine2", 5}},
+			test:         "nothing scheduled, resources requested, differently sized machines",
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (4000 - 0) / 4000 = 10
+				Memory Score: (10000 - 0) / 10000 = 10
+				Minion1 Score: (10 + 10) / 2 = 10
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (4000 - 0) / 4000 = 10
+				Memory Score: (10000 - 0) / 10000 = 10
+				Minion2 Score: (10 + 10) / 2 = 10
+			*/
+			pod:          api.Pod{Spec: noResources},
 			nodes:        []api.Minion{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
 			expectedList: []HostPriority{{"machine1", 10}, {"machine2", 10}},
-			test:         "no resources requested",
+			test:         "no resources requested, pods scheduled",
 			pods: []api.Pod{
 				{Status: machine1Status, ObjectMeta: api.ObjectMeta{Labels: labels2}},
 				{Status: machine1Status, ObjectMeta: api.ObjectMeta{Labels: labels1}},
@@ -89,18 +132,96 @@ func TestLeastRequested(t *testing.T) {
 			},
 		},
 		{
-			nodes:        []api.Minion{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
-			expectedList: []HostPriority{{"machine1", 6 /* int(200%-75% / 2) */}, {"machine2", 3 /* int( 200%-125% / 2) */}},
-			test:         "resources requested",
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (20000 - 0) / 20000 = 10
+				Minion1 Score: (4 + 10) / 2 = 7
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (20000 - 5000) / 20000 = 7.5
+				Minion2 Score: (4 + 7.5) / 2 = 5
+			*/
+			pod:          api.Pod{Spec: noResources},
+			nodes:        []api.Minion{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 7}, {"machine2", 5}},
+			test:         "no resources requested, pods scheduled with resources",
 			pods: []api.Pod{
-				{Spec: cpuOnly, Status: api.PodStatus{Host: "machine1"}},
-				{Spec: cpuAndMemory, Status: api.PodStatus{Host: "machine2"}},
+				{Spec: cpuOnly, Status: machine1Status, ObjectMeta: api.ObjectMeta{Labels: labels2}},
+				{Spec: cpuOnly, Status: machine1Status, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuOnly, Status: machine2Status, ObjectMeta: api.ObjectMeta{Labels: labels1}},
+				{Spec: cpuAndMemory, Status: machine2Status, ObjectMeta: api.ObjectMeta{Labels: labels1}},
 			},
 		},
 		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (20000 - 5000) / 20000 = 7.5
+				Minion1 Score: (4 + 7.5) / 2 = 5
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (20000 - 10000) / 20000 = 5
+				Minion2 Score: (4 + 5) / 2 = 4
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Minion{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 20000)},
+			expectedList: []HostPriority{{"machine1", 5}, {"machine2", 4}},
+			test:         "resources requested, pods scheduled with resources",
+			pods: []api.Pod{
+				{Spec: cpuOnly, Status: machine1Status},
+				{Spec: cpuAndMemory, Status: machine2Status},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (20000 - 5000) / 20000 = 7.5
+				Minion1 Score: (4 + 7.5) / 2 = 5
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (10000 - 6000) / 10000 = 4
+				Memory Score: (50000 - 10000) / 50000 = 8
+				Minion2 Score: (4 + 8) / 2 = 6
+			*/
+			pod:          api.Pod{Spec: cpuAndMemory},
+			nodes:        []api.Minion{makeMinion("machine1", 10000, 20000), makeMinion("machine2", 10000, 50000)},
+			expectedList: []HostPriority{{"machine1", 5}, {"machine2", 6}},
+			test:         "resources requested, pods scheduled with resources, differently sized machines",
+			pods: []api.Pod{
+				{Spec: cpuOnly, Status: machine1Status},
+				{Spec: cpuAndMemory, Status: machine2Status},
+			},
+		},
+		{
+			/*
+				Minion1 scores on 0-10 scale
+				CPU Score: (4000 - 6000) / 4000 = 0
+				Memory Score: (10000 - 0) / 10000 = 10
+				Minion1 Score: (0 + 10) / 2 = 5
+
+				Minion2 scores on 0-10 scale
+				CPU Score: (4000 - 6000) / 4000 = 0
+				Memory Score: (10000 - 5000) / 10000 = 5
+				Minion2 Score: (0 + 5) / 2 = 2
+			*/
+			pod:          api.Pod{Spec: cpuOnly},
+			nodes:        []api.Minion{makeMinion("machine1", 4000, 10000), makeMinion("machine2", 4000, 10000)},
+			expectedList: []HostPriority{{"machine1", 5}, {"machine2", 2}},
+			test:         "requested resources exceed minion capacity",
+			pods: []api.Pod{
+				{Spec: cpuOnly, Status: machine1Status},
+				{Spec: cpuAndMemory, Status: machine2Status},
+			},
+		},
+		{
+			pod:          api.Pod{Spec: noResources},
 			nodes:        []api.Minion{makeMinion("machine1", 0, 0), makeMinion("machine2", 0, 0)},
 			expectedList: []HostPriority{{"machine1", 0}, {"machine2", 0}},
-			test:         "zero minion resources",
+			test:         "zero minion resources, pods scheduled with resources",
 			pods: []api.Pod{
 				{Spec: cpuOnly},
 				{Spec: cpuAndMemory},
diff --git a/pkg/scheduler/spreading.go b/pkg/scheduler/spreading.go
index 314a1d47f13..24e9a8278a1 100644
--- a/pkg/scheduler/spreading.go
+++ b/pkg/scheduler/spreading.go
@@ -18,7 +18,6 @@ package scheduler
 
 import (
 	"math/rand"
-	"sort"
 
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
@@ -44,18 +43,11 @@ func CalculateSpreadPriority(pod api.Pod, podLister PodLister, minionLister Mini
 	if len(pods) > 0 {
 		for _, pod := range pods {
 			counts[pod.Status.Host]++
+			// Compute the maximum number of pods hosted on any minion
+			if counts[pod.Status.Host] > maxCount {
+				maxCount = counts[pod.Status.Host]
+			}
 		}
-
-		// doing this separately since the pod count can be much higher
-		// than the filtered minion count
-		values := make([]int, len(counts))
-		idx := 0
-		for _, count := range counts {
-			values[idx] = count
-			idx++
-		}
-		sort.Sort(sort.IntSlice(values))
-		maxCount = values[len(values)-1]
 	}
 
 	result := []HostPriority{}
diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go
index 47bc148ea05..6fd88fbf2dd 100644
--- a/plugin/pkg/scheduler/factory/factory.go
+++ b/plugin/pkg/scheduler/factory/factory.go
@@ -57,7 +57,8 @@ type ConfigFactory struct {
 // NewConfigFactory initializes the factory.
 func NewConfigFactory(client *client.Client) *ConfigFactory {
 	// initialize the factory struct
-	factory := &ConfigFactory{Client: client,
+	factory := &ConfigFactory{
+		Client:       client,
 		PodQueue:     cache.NewFIFO(),
 		PodLister:    &storeToPodLister{cache.NewStore()},
 		MinionLister: &storeToMinionLister{cache.NewStore()},
@@ -83,7 +84,7 @@ func (factory *ConfigFactory) Create(predicateKeys, priorityKeys []string) (*sch
 	}
 
 	if priorityKeys == nil {
-		glog.V(2).Infof("Custom priority list not provided, using default priorities")
+		glog.V(2).Infof("Custom priority list not provided, using default priority: LeastRequestedPriority")
 		priorityKeys = []string{"LeastRequestedPriority"}
 	}
 	priorityConfigs, err := factory.getPriorityConfigs(priorityKeys)