mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 18:00:08 +00:00
Merge pull request #10661 from davidopp/somebranch
When scheduling, treat zero limit-pods as having nonzero limit when calculating priorities
This commit is contained in:
commit
8278928b66
@ -20,6 +20,7 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||||
@ -28,38 +29,69 @@ import (
|
|||||||
|
|
||||||
// the unused capacity is calculated on a scale of 0-10
|
// the unused capacity is calculated on a scale of 0-10
|
||||||
// 0 being the lowest priority and 10 being the highest
|
// 0 being the lowest priority and 10 being the highest
|
||||||
func calculateScore(requested, capacity int64, node string) int {
|
func calculateScore(requested int64, capacity int64, node string) int {
|
||||||
if capacity == 0 {
|
if capacity == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
if requested > capacity {
|
if requested > capacity {
|
||||||
glog.Infof("Combined requested resources from existing pods exceeds capacity on minion: %s", node)
|
glog.Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
|
||||||
|
requested, capacity, node)
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
return int(((capacity - requested) * 10) / capacity)
|
return int(((capacity - requested) * 10) / capacity)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the occupancy on a node. 'node' has information about the resources on the node.
|
// For each of these resources, a pod that doesn't request the resource explicitly
|
||||||
|
// will be treated as having requested the amount indicated below, for the purpose
|
||||||
|
// of computing priority only. This ensures that when scheduling zero-limit pods, such
|
||||||
|
// pods will not all be scheduled to the machine with the smallest in-use limit,
|
||||||
|
// and that when scheduling regular pods, such pods will not see zero-limit pods as
|
||||||
|
// consuming no resources whatsoever.
|
||||||
|
const defaultMilliCpuLimit int64 = 100 // 0.1 core
|
||||||
|
const defaultMemoryLimit int64 = 60 * 1024 * 1024 // 60 MB
|
||||||
|
|
||||||
|
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity api.ResourceList"
|
||||||
|
// as an additional argument here) rather than using constants
|
||||||
|
func getNonzeroLimits(limits *api.ResourceList) (int64, int64) {
|
||||||
|
var out_millicpu, out_memory int64
|
||||||
|
// Override if un-set, but not if explicitly set to zero
|
||||||
|
if (*limits.Cpu() == resource.Quantity{}) {
|
||||||
|
out_millicpu = defaultMilliCpuLimit
|
||||||
|
} else {
|
||||||
|
out_millicpu = limits.Cpu().MilliValue()
|
||||||
|
}
|
||||||
|
// Override if un-set, but not if explicitly set to zero
|
||||||
|
if (*limits.Memory() == resource.Quantity{}) {
|
||||||
|
out_memory = defaultMemoryLimit
|
||||||
|
} else {
|
||||||
|
out_memory = limits.Memory().Value()
|
||||||
|
}
|
||||||
|
return out_millicpu, out_memory
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the resource occupancy on a node. 'node' has information about the resources on the node.
|
||||||
// 'pods' is a list of pods currently scheduled on the node.
|
// 'pods' is a list of pods currently scheduled on the node.
|
||||||
func calculateOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority {
|
func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) algorithm.HostPriority {
|
||||||
totalMilliCPU := int64(0)
|
totalMilliCPU := int64(0)
|
||||||
totalMemory := int64(0)
|
totalMemory := int64(0)
|
||||||
|
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
|
||||||
|
capacityMemory := node.Status.Capacity.Memory().Value()
|
||||||
|
|
||||||
for _, existingPod := range pods {
|
for _, existingPod := range pods {
|
||||||
for _, container := range existingPod.Spec.Containers {
|
for _, container := range existingPod.Spec.Containers {
|
||||||
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
|
cpu, memory := getNonzeroLimits(&container.Resources.Limits)
|
||||||
totalMemory += container.Resources.Limits.Memory().Value()
|
totalMilliCPU += cpu
|
||||||
|
totalMemory += memory
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add the resources requested by the current pod being scheduled.
|
// Add the resources requested by the current pod being scheduled.
|
||||||
// This also helps differentiate between differently sized, but empty, minions.
|
// This also helps differentiate between differently sized, but empty, minions.
|
||||||
for _, container := range pod.Spec.Containers {
|
for _, container := range pod.Spec.Containers {
|
||||||
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
|
cpu, memory := getNonzeroLimits(&container.Resources.Limits)
|
||||||
totalMemory += container.Resources.Limits.Memory().Value()
|
totalMilliCPU += cpu
|
||||||
|
totalMemory += memory
|
||||||
}
|
}
|
||||||
|
|
||||||
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
|
|
||||||
capacityMemory := node.Status.Capacity.Memory().Value()
|
|
||||||
|
|
||||||
cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name)
|
cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name)
|
||||||
memoryScore := calculateScore(totalMemory, capacityMemory, node.Name)
|
memoryScore := calculateScore(totalMemory, capacityMemory, node.Name)
|
||||||
glog.V(10).Infof(
|
glog.V(10).Infof(
|
||||||
@ -89,7 +121,7 @@ func LeastRequestedPriority(pod *api.Pod, podLister algorithm.PodLister, minionL
|
|||||||
|
|
||||||
list := algorithm.HostPriorityList{}
|
list := algorithm.HostPriorityList{}
|
||||||
for _, node := range nodes.Items {
|
for _, node := range nodes.Items {
|
||||||
list = append(list, calculateOccupancy(pod, node, podsToMachines[node.Name]))
|
list = append(list, calculateResourceOccupancy(pod, node, podsToMachines[node.Name]))
|
||||||
}
|
}
|
||||||
return list, nil
|
return list, nil
|
||||||
}
|
}
|
||||||
@ -163,15 +195,17 @@ func calculateBalancedResourceAllocation(pod *api.Pod, node api.Node, pods []*ap
|
|||||||
score := int(0)
|
score := int(0)
|
||||||
for _, existingPod := range pods {
|
for _, existingPod := range pods {
|
||||||
for _, container := range existingPod.Spec.Containers {
|
for _, container := range existingPod.Spec.Containers {
|
||||||
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
|
cpu, memory := getNonzeroLimits(&container.Resources.Limits)
|
||||||
totalMemory += container.Resources.Limits.Memory().Value()
|
totalMilliCPU += cpu
|
||||||
|
totalMemory += memory
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add the resources requested by the current pod being scheduled.
|
// Add the resources requested by the current pod being scheduled.
|
||||||
// This also helps differentiate between differently sized, but empty, minions.
|
// This also helps differentiate between differently sized, but empty, minions.
|
||||||
for _, container := range pod.Spec.Containers {
|
for _, container := range pod.Spec.Containers {
|
||||||
totalMilliCPU += container.Resources.Limits.Cpu().MilliValue()
|
cpu, memory := getNonzeroLimits(&container.Resources.Limits)
|
||||||
totalMemory += container.Resources.Limits.Memory().Value()
|
totalMilliCPU += cpu
|
||||||
|
totalMemory += memory
|
||||||
}
|
}
|
||||||
|
|
||||||
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
|
capacityMilliCPU := node.Status.Capacity.Cpu().MilliValue()
|
||||||
|
@ -19,10 +19,12 @@ package priorities
|
|||||||
import (
|
import (
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
|
||||||
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,6 +40,103 @@ func makeMinion(node string, milliCPU, memory int64) api.Node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestZeroLimit(t *testing.T) {
|
||||||
|
// A pod with no resources. We expect spreading to count it as having the default resources.
|
||||||
|
noResources := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
noResources1 := noResources
|
||||||
|
noResources1.NodeName = "machine1"
|
||||||
|
// A pod with the same resources as a 0-limit pod gets by default as its resources (for spreading).
|
||||||
|
small := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Limits: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse(
|
||||||
|
strconv.FormatInt(defaultMilliCpuLimit, 10) + "m"),
|
||||||
|
"memory": resource.MustParse(
|
||||||
|
strconv.FormatInt(defaultMemoryLimit, 10)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
small2 := small
|
||||||
|
small2.NodeName = "machine2"
|
||||||
|
// A larger pod.
|
||||||
|
large := api.PodSpec{
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Resources: api.ResourceRequirements{
|
||||||
|
Limits: api.ResourceList{
|
||||||
|
"cpu": resource.MustParse(
|
||||||
|
strconv.FormatInt(defaultMilliCpuLimit*3, 10) + "m"),
|
||||||
|
"memory": resource.MustParse(
|
||||||
|
strconv.FormatInt(defaultMemoryLimit*3, 10)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
large1 := large
|
||||||
|
large1.NodeName = "machine1"
|
||||||
|
large2 := large
|
||||||
|
large2.NodeName = "machine2"
|
||||||
|
tests := []struct {
|
||||||
|
pod *api.Pod
|
||||||
|
pods []*api.Pod
|
||||||
|
nodes []api.Node
|
||||||
|
test string
|
||||||
|
}{
|
||||||
|
// The point of these tests is to show you get the same priority for a zero-limit pod
|
||||||
|
// as for a pod with the defaults limits, both when the zero-limit pod is already on the machine
|
||||||
|
// and when the zero-limit pod is the one being scheduled.
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: noResources},
|
||||||
|
// match current f1-micro on GCE
|
||||||
|
nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit*10), makeMinion("machine2", 1000, defaultMemoryLimit*10)},
|
||||||
|
test: "test priority of zero-limit pod with machine with zero-limit pod",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: large1}, {Spec: noResources1},
|
||||||
|
{Spec: large2}, {Spec: small2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pod: &api.Pod{Spec: small},
|
||||||
|
// match current f1-micro on GCE
|
||||||
|
nodes: []api.Node{makeMinion("machine1", 1000, defaultMemoryLimit*10), makeMinion("machine2", 1000, defaultMemoryLimit*10)},
|
||||||
|
test: "test priority of nonzero-limit pod with machine with zero-limit pod",
|
||||||
|
pods: []*api.Pod{
|
||||||
|
{Spec: large1}, {Spec: noResources1},
|
||||||
|
{Spec: large2}, {Spec: small2},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
const expectedPriority int = 25
|
||||||
|
for _, test := range tests {
|
||||||
|
list, err := scheduler.PrioritizeNodes(
|
||||||
|
test.pod,
|
||||||
|
algorithm.FakePodLister(test.pods),
|
||||||
|
// This should match the configuration in defaultPriorities() in
|
||||||
|
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
|
||||||
|
// to test what's actually in production.
|
||||||
|
[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}},
|
||||||
|
algorithm.FakeMinionLister(api.NodeList{Items: test.nodes}))
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
for _, hp := range list {
|
||||||
|
if hp.Score != expectedPriority {
|
||||||
|
t.Errorf("%s: expected 25 for all priorities, got list %#v", list)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestLeastRequested(t *testing.T) {
|
func TestLeastRequested(t *testing.T) {
|
||||||
labels1 := map[string]string{
|
labels1 := map[string]string{
|
||||||
"foo": "bar",
|
"foo": "bar",
|
||||||
@ -63,6 +162,7 @@ func TestLeastRequested(t *testing.T) {
|
|||||||
Resources: api.ResourceRequirements{
|
Resources: api.ResourceRequirements{
|
||||||
Limits: api.ResourceList{
|
Limits: api.ResourceList{
|
||||||
"cpu": resource.MustParse("1000m"),
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -70,6 +170,7 @@ func TestLeastRequested(t *testing.T) {
|
|||||||
Resources: api.ResourceRequirements{
|
Resources: api.ResourceRequirements{
|
||||||
Limits: api.ResourceList{
|
Limits: api.ResourceList{
|
||||||
"cpu": resource.MustParse("2000m"),
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -393,6 +494,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
|
|||||||
Resources: api.ResourceRequirements{
|
Resources: api.ResourceRequirements{
|
||||||
Limits: api.ResourceList{
|
Limits: api.ResourceList{
|
||||||
"cpu": resource.MustParse("1000m"),
|
"cpu": resource.MustParse("1000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -400,6 +502,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
|
|||||||
Resources: api.ResourceRequirements{
|
Resources: api.ResourceRequirements{
|
||||||
Limits: api.ResourceList{
|
Limits: api.ResourceList{
|
||||||
"cpu": resource.MustParse("2000m"),
|
"cpu": resource.MustParse("2000m"),
|
||||||
|
"memory": resource.MustParse("0"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -20,6 +20,7 @@ import (
|
|||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ServiceSpread struct {
|
type ServiceSpread struct {
|
||||||
@ -82,6 +83,9 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
|
|||||||
fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount))
|
fScore = 10 * (float32(maxCount-counts[minion.Name]) / float32(maxCount))
|
||||||
}
|
}
|
||||||
result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)})
|
result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)})
|
||||||
|
glog.V(10).Infof(
|
||||||
|
"%v -> %v: ServiceSpreadPriority, Score: (%d)", pod.Name, minion.Name, int(fScore),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
@ -74,7 +74,7 @@ func (g *genericScheduler) Schedule(pod *api.Pod, minionLister algorithm.MinionL
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
priorityList, err := prioritizeNodes(pod, g.pods, g.prioritizers, algorithm.FakeMinionLister(filteredNodes))
|
priorityList, err := PrioritizeNodes(pod, g.pods, g.prioritizers, algorithm.FakeMinionLister(filteredNodes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@ -142,7 +142,7 @@ func findNodesThatFit(pod *api.Pod, podLister algorithm.PodLister, predicateFunc
|
|||||||
// Each priority function can also have its own weight
|
// Each priority function can also have its own weight
|
||||||
// The minion scores returned by the priority function are multiplied by the weights to get weighted scores
|
// The minion scores returned by the priority function are multiplied by the weights to get weighted scores
|
||||||
// All scores are finally combined (added) to get the total weighted scores of all minions
|
// All scores are finally combined (added) to get the total weighted scores of all minions
|
||||||
func prioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfigs []algorithm.PriorityConfig, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) {
|
func PrioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfigs []algorithm.PriorityConfig, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) {
|
||||||
result := algorithm.HostPriorityList{}
|
result := algorithm.HostPriorityList{}
|
||||||
|
|
||||||
// If no priority configs are provided, then the EqualPriority function is applied
|
// If no priority configs are provided, then the EqualPriority function is applied
|
||||||
@ -168,6 +168,7 @@ func prioritizeNodes(pod *api.Pod, podLister algorithm.PodLister, priorityConfig
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for host, score := range combinedScores {
|
for host, score := range combinedScores {
|
||||||
|
glog.V(10).Infof("Host %s Score %d", host, score)
|
||||||
result = append(result, algorithm.HostPriority{Host: host, Score: score})
|
result = append(result, algorithm.HostPriority{Host: host, Score: score})
|
||||||
}
|
}
|
||||||
return result, nil
|
return result, nil
|
||||||
|
Loading…
Reference in New Issue
Block a user