Get rid of separate DumbSpreading function and just treat zero-limit

pods as having a constant non-zero memory and CPU limit.
This commit is contained in:
David Oppenheimer 2015-07-05 11:39:35 -07:00
parent 44ed229069
commit 4ea8b8a66d
4 changed files with 102 additions and 96 deletions

View File

@ -21,6 +21,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"github.com/golang/glog" "github.com/golang/glog"
@ -28,42 +29,72 @@ import (
// the unused capacity is calculated on a scale of 0-10 // the unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest // 0 being the lowest priority and 10 being the highest
func calculateScore(requested, capacity int64, node string) int { func calculateScore(requested int64, capacity int64, node string) int {
if capacity == 0 { if capacity == 0 {
return 0 return 0
} }
if requested > capacity { if requested > capacity {
glog.Infof("Combined requested resources from existing pods exceeds capacity on minion: %s", node) glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on minion: %s",
requested, capacity, node)
return 0 return 0
} }
return int(((capacity - requested) * 10) / capacity) return int(((capacity - requested) * 10) / capacity)
} }
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-limit pods, such
// pods will not all be scheduled to the machine with the smallest in-use limit,
// and that when scheduling regular pods, such pods will not see zero-limit pods as
// consuming no resources whatsoever.
const defaultMilliCpuLimit int64 = 100 // 0.1 core
const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList"
// as an additional argument here) rather than using constants
func toNonzeroLimits(limits *api.ResourceList) (int64, int64) {
var out_millicpu, out_memory int64
// Override if un-set, but not if explicitly set to zero
if (*limits.Cpu() == resource.Quantity{}) {
out_millicpu = defaultMilliCpuLimit
} else {
out_millicpu = limits.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if (*limits.Memory() == resource.Quantity{}) {
out_memory = defaultMemoryLimit
} else {
out_memory = limits.Memory().Value()
}
return out_millicpu, out_memory
}
// Calculate the resource occupancy on a node. 'node' has information about the resources on the node. // Calculate the resource occupancy on a node. 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node. // 'pods' is a list of pods currently scheduled on the node.
func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority { func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority {
totalMilliCPU := int64(0) totalMilliCPU := int64(0)
totalMemory := int64(0) totalMemory := int64(0)
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
capacityMemory := node.Status.Capacity.Memory().Value()
for _, existingPod := range pods { for _, existingPod := range pods {
for _, container := range existingPod.Spec.Containers { for _, container := range existingPod.Spec.Containers {
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() cpu, memory := toNonzeroLimits(&container.Resources.Limits)
totalMemory += container.Resources.Limits.Memory().Value() totalMilliCPU += cpu
totalMemory += memory
} }
} }
// Add the resources requested by the current pod being scheduled. // Add the resources requested by the current pod being scheduled.
// This also helps differentiate between differently sized, but empty, minions. // This also helps differentiate between differently sized, but empty, minions.
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() cpu, memory := toNonzeroLimits(&container.Resources.Limits)
totalMemory += container.Resources.Limits.Memory().Value() totalMilliCPU += cpu
totalMemory += memory
} }
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
capacityMemory := node.Status.Capacity.Memory().Value()
cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name) cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name)
memoryScore := calculateScore(totalMemory, capacityMemory, node.Name) memoryScore := calculateScore(totalMemory, capacityMemory, node.Name)
// glog.V(10).Infof( glog.V(10).Infof(
glog.Infof(
"%v -> %v: Least Requested Priority, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d, %d)", "%v -> %v: Least Requested Priority, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d, %d)",
pod.Name, node.Name, pod.Name, node.Name,
totalMilliCPU, totalMemory, totalMilliCPU, totalMemory,
@ -95,47 +126,6 @@ func LeastRequestedPriority(pod *api.Pod, podLister algorithm.PodLister, minionL
return list, nil return list, nil
} }
func min(l, r int64) (m int64) {
m = r
if l < r {
m = l
}
return m
}
// See comment for DumbSpreadingPriority()
const dumbSpreadingDenominator int64 = 10
// DumbSpreadingPriority is a priority function that favors nodes with fewer pods.
// It works like LeastRequestedPeriority but instead of using 10 * percentage of machine free by resource,
// it uses 10 * percentage of machine free by pod, with "percentage of machine free by pod" claculated as
// (dumbSpreadingDenominator - number of pods already on the node + 1) / dumbSpreadingDenominator.
// dumbSpreadingDenominator serves like the machine capacity in LeasRequestedPriority but is chosen
// so that we equate one pod with a reasonable amount of resources when we combine all the scores together.
func DumbSpreadingPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) {
nodes, err := minionLister.List()
if err != nil {
return algorithm.HostPriorityList{}, err
}
podsToMachines, err := predicates.MapPodsToMachines(podLister)
list := algorithm.HostPriorityList{}
for _, node := range nodes.Items {
npods := int64(len(podsToMachines[node.Name]))
score := calculateScore(min(npods+1, dumbSpreadingDenominator), dumbSpreadingDenominator, node.Name)
// glog.V(10).Infof(
glog.Infof(
"%v -> %v: DumbSpreadPriority, Old # pods (%d) Score: (%d)",
pod.Name, node.Name, npods, score,
)
list = append(list, algorithm.HostPriority{
Host: node.Name,
Score: score,
})
}
return list, nil
}
type NodeLabelPrioritizer struct { type NodeLabelPrioritizer struct {
label string label string
presence bool presence bool
@ -205,15 +195,17 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
score := int(0) score := int(0)
for _, existingPod := range pods { for _, existingPod := range pods {
for _, container := range existingPod.Spec.Containers { for _, container := range existingPod.Spec.Containers {
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() cpu, memory := toNonzeroLimits(&container.Resources.Limits)
totalMemory += container.Resources.Limits.Memory().Value() totalMilliCPU += cpu
totalMemory += memory
} }
} }
// Add the resources requested by the current pod being scheduled. // Add the resources requested by the current pod being scheduled.
// This also helps differentiate between differently sized, but empty, minions. // This also helps differentiate between differently sized, but empty, minions.
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue() cpu, memory := toNonzeroLimits(&container.Resources.Limits)
totalMemory += container.Resources.Limits.Memory().Value() totalMilliCPU += cpu
totalMemory += memory
} }
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue() capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
@ -232,8 +224,7 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
diff := math.Abs(cpuFraction - memoryFraction) diff := math.Abs(cpuFraction - memoryFraction)
score = int(10 - diff*10) score = int(10 - diff*10)
} }
// glog.V(10).Infof( glog.V(10).Infof(
glog.Infof(
"%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)", "%v -> %v: Balanced Resource Allocation, Absolute/Requested: (%d, %d) / (%d, %d) Score: (%d)",
pod.Name, node.Name, pod.Name, node.Name,
totalMilliCPU, totalMemory, totalMilliCPU, totalMemory,

View File

@ -19,6 +19,7 @@ package priorities
import ( import (
"reflect" "reflect"
"sort" "sort"
"strconv"
"testing" "testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
@ -39,72 +40,83 @@ func makeMinion(node string, milliCPU, memory int64) api.Node {
} }
} }
func TestDumbSpreading(t *testing.T) { func TestZeroLimit(t *testing.T) {
// A pod with no resources. We expect spreading to count it as having the default resources.
noResources := api.PodSpec{ noResources := api.PodSpec{
Containers: []api.Container{}, Containers: []api.Container{
{},
},
} }
noResources1 := noResources
noResources1.NodeName = "machine1"
// A pod with the same resources as a 0-limit pod gets by default as its resources (for spreading).
small := api.PodSpec{ small := api.PodSpec{
NodeName: "machine1",
Containers: []api.Container{ Containers: []api.Container{
{ {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("100m"), "cpu": resource.MustParse(
"memory": resource.MustParse("1000"), strconv.FormatInt(defaultMilliCpuLimit, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(defaultMemoryLimit, 10)),
}, },
}, },
}, },
}, },
} }
small2 := small
small2.NodeName = "machine2"
// A larger pod.
large := api.PodSpec{ large := api.PodSpec{
NodeName: "machine2",
Containers: []api.Container{ Containers: []api.Container{
{ {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("600m"), "cpu": resource.MustParse(
"memory": resource.MustParse("6000"), strconv.FormatInt(defaultMilliCpuLimit * 3, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(defaultMemoryLimit * 3, 10)),
}, },
}, },
}, },
}, },
} }
large1 := large
large1.NodeName = "machine1"
large2 := large
large2.NodeName = "machine2"
tests := []struct { tests := []struct {
pod *api.Pod pod *api.Pod
pods []*api.Pod pods []*api.Pod
nodes []api.Node nodes []api.Node
expectedList algorithm.HostPriorityList
test string test string
}{ }{
// The point of these tests is to show you get the same priority for a zero-limit pod
// as for a pod with the defaults limits, both when the zero-limit pod is already on the machine
// and when the zero-limit pod is the one being scheduled.
{ {
/* Minion1 CPU capacity 1000m, free 700m/7000, 3 pods
LeastRequestedPriority score 7
BalancedResourceAllocation score 10
ServiceSpreadingPriority score 10
DumbSpreadingPriority score 6
Total: 7 + 10 + 10 + 2*6 = 39
Minion2 CPU capacity 1000m, free 400m/4000, 1 pod
LeastRequestedPriority score 4
BalancedResourceAllocation score 10
ServiceSpreadingPriority score 10
DumbSpreadingPriority score 8
Total: 4 + 10 + 10 + 2*8 = 40
Moral of the story: We prefer the machine that is more heavily loaded,
because it has fewer pods.
*/
pod: &api.Pod{Spec: noResources}, pod: &api.Pod{Spec: noResources},
nodes: []api.Node{makeMinion("machine1", 1000, 10000), makeMinion("machine2", 1000, 10000)}, // match current f1-micro on GCE
expectedList: []algorithm.HostPriority{{"machine1", 39}, {"machine2", 40}}, nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)},
test: "nothing scheduled, nothing requested", test: "test priority of zero-limit pod with machine with zero-limit pod",
pods: []*api.Pod { pods: []*api.Pod {
{Spec: small}, {Spec: small}, {Spec: small}, {Spec: large1}, {Spec: noResources1},
{Spec: large}, {Spec: large2}, {Spec: small2},
},
},
{
pod: &api.Pod{Spec: small},
// match current f1-micro on GCE
nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit * 10), makeMinion("machine2", 1000, defaultMemoryLimit * 10)},
test: "test priority of nonzero-limit pod with machine with zero-limit pod",
pods: []*api.Pod {
{Spec: large1}, {Spec: noResources1},
{Spec: large2}, {Spec: small2},
}, },
}, },
} }
const expectedPriority int = 25
for _, test := range tests { for _, test := range tests {
list, err := scheduler.PrioritizeNodes( list, err := scheduler.PrioritizeNodes(
test.pod, test.pod,
@ -112,13 +124,15 @@ func TestDumbSpreading(t *testing.T) {
// This should match the configuration in defaultPriorities() in // This should match the configuration in defaultPriorities() in
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
// to test what's actually in production. // to test what's actually in production.
[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: DumbSpreadingPriority, Weight: 2}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}},
algorithm.FakeMinionLister(api.NodeList{Items: test.nodes})) algorithm.FakeMinionLister(api.NodeList{Items: test.nodes}))
if err != nil { if err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if !reflect.DeepEqual(test.expectedList, list) { for _, hp := range list {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list) if (hp.Score != expectedPriority) {
t.Errorf("%s: expected 25 for all priorities, got list %#v", list)
}
} }
} }
} }
@ -149,6 +163,7 @@ func TestLeastRequested(t *testing.T) {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("1000m"), "cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
}, },
}, },
}, },
@ -156,6 +171,7 @@ func TestLeastRequested(t *testing.T) {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("2000m"), "cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
}, },
}, },
}, },
@ -479,6 +495,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("1000m"), "cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
}, },
}, },
}, },
@ -486,6 +503,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
Resources: api.ResourceRequirements{ Resources: api.ResourceRequirements{
Limits: api.ResourceList{ Limits: api.ResourceList{
"cpu": resource.MustParse("2000m"), "cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
}, },
}, },
}, },

View File

@ -83,8 +83,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount)) fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount))
} }
result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)}) result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)})
// glog.V(10).Infof( glog.V(10).Infof(
glog.Infof(
"%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore), "%v -> %v: ServiceSpreadPriority, Sore: (%d)", pod.Name, minion.Name, int(fScore),
) )
} }

View File

@ -65,8 +65,6 @@ func defaultPriorities() util.StringSet {
factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1), factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1),
// Prioritizes nodes to help achieve balanced resource usage // Prioritizes nodes to help achieve balanced resource usage
factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1), factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
// Prioritizes nodes to achieve approximately equal number of pods per node
factory.RegisterPriorityFunction("DumbSpreadingPriority", priorities.DumbSpreadingPriority, 2),
// spreads pods by minimizing the number of pods (belonging to the same service) on the same minion. // spreads pods by minimizing the number of pods (belonging to the same service) on the same minion.
factory.RegisterPriorityConfigFactory( factory.RegisterPriorityConfigFactory(
"ServiceSpreadingPriority", "ServiceSpreadingPriority",