From 62e97173867a5a7817c2e2ecea78a0e91a370675 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Thu, 23 May 2019 10:47:17 -0700 Subject: [PATCH 1/2] scheduler: add pod Overhead support for requests Signed-off-by: Eric Ernst --- .../algorithm/predicates/predicates.go | 8 ++ .../algorithm/predicates/predicates_test.go | 36 +++++++++ .../priorities/resource_allocation.go | 14 ++++ pkg/scheduler/nodeinfo/node_info.go | 15 ++++ pkg/scheduler/nodeinfo/node_info_test.go | 77 +++++++++++++++---- 5 files changed, 134 insertions(+), 16 deletions(-) diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index 1f16231786d..9a5e43c4734 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -726,6 +726,9 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta PredicateMetadata, nodeI // the max in each dimension iteratively. In contrast, we sum the resource vectors for // regular containers since they run simultaneously. // +// If Pod Overhead is specified and the feature gate is set, the resources defined for Overhead +// are added to the calculated Resource request sum +// // Example: // // Pod: @@ -756,6 +759,11 @@ func GetResourceRequest(pod *v1.Pod) *schedulernodeinfo.Resource { result.SetMaxResource(container.Resources.Requests) } + // If Overhead is being utilized, add to the total requests for the pod + if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) { + result.Add(pod.Spec.Overhead) + } + return result } diff --git a/pkg/scheduler/algorithm/predicates/predicates_test.go b/pkg/scheduler/algorithm/predicates/predicates_test.go index 4e7720413fe..f45f30d5c7f 100644 --- a/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -29,7 +29,10 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + "k8s.io/kubernetes/pkg/features" schedulerapi "k8s.io/kubernetes/pkg/scheduler/api" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing" @@ -86,12 +89,20 @@ func newResourceInitPod(pod *v1.Pod, usage ...schedulernodeinfo.Resource) *v1.Po return pod } +func newResourceOverheadPod(pod *v1.Pod, overhead v1.ResourceList) *v1.Pod { + pod.Spec.Overhead = overhead + return pod +} + func GetPredicateMetadata(p *v1.Pod, nodeInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata { pm := PredicateMetadataFactory{schedulertesting.FakePodLister{p}} return pm.GetMetadata(p, nodeInfo) } func TestPodFitsResources(t *testing.T) { + + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodOverhead, true)() + enoughPodsTests := []struct { pod *v1.Pod nodeInfo *schedulernodeinfo.NodeInfo @@ -351,6 +362,31 @@ func TestPodFitsResources(t *testing.T) { ignoredExtendedResources: sets.NewString(string(extendedResourceB)), name: "skip checking ignored extended resource", }, + { + pod: newResourceOverheadPod( + newResourcePod(schedulernodeinfo.Resource{MilliCPU: 1, Memory: 1}), + v1.ResourceList{v1.ResourceCPU: resource.MustParse("3m"), v1.ResourceMemory: resource.MustParse("13")}, + ), + nodeInfo: schedulernodeinfo.NewNodeInfo( + newResourcePod(schedulernodeinfo.Resource{MilliCPU: 5, Memory: 5})), + fits: true, + ignoredExtendedResources: sets.NewString(string(extendedResourceB)), + name: "resources + pod overhead fits", + }, + { + pod: newResourceOverheadPod( + newResourcePod(schedulernodeinfo.Resource{MilliCPU: 1, Memory: 1}), + v1.ResourceList{v1.ResourceCPU: resource.MustParse("1m"), v1.ResourceMemory: resource.MustParse("15")}, + ), + nodeInfo: schedulernodeinfo.NewNodeInfo( + newResourcePod(schedulernodeinfo.Resource{MilliCPU: 5, Memory: 5})), + fits: false, + ignoredExtendedResources: sets.NewString(string(extendedResourceB)), + name: "requests + overhead does not fit for memory", + reasons: []PredicateFailureReason{ + NewInsufficientResourceError(v1.ResourceMemory, 16, 5, 20), + }, + }, } for _, test := range enoughPodsTests { diff --git a/pkg/scheduler/algorithm/priorities/resource_allocation.go b/pkg/scheduler/algorithm/priorities/resource_allocation.go index fea2e680697..9071e03c12a 100644 --- a/pkg/scheduler/algorithm/priorities/resource_allocation.go +++ b/pkg/scheduler/algorithm/priorities/resource_allocation.go @@ -91,6 +91,8 @@ func (r *ResourceAllocationPriority) PriorityMap( }, nil } +// getNonZeroRequests returns the total non-zero requests. If Overhead is defined for the pod and the +// PodOverhead feature is enabled, the Overhead is added to the result. func getNonZeroRequests(pod *v1.Pod) *schedulernodeinfo.Resource { result := &schedulernodeinfo.Resource{} for i := range pod.Spec.Containers { @@ -99,5 +101,17 @@ func getNonZeroRequests(pod *v1.Pod) *schedulernodeinfo.Resource { result.MilliCPU += cpu result.Memory += memory } + + // If Overhead is being utilized, add to the total requests for the pod + if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) { + if _, found := pod.Spec.Overhead[v1.ResourceCPU]; found { + result.MilliCPU += pod.Spec.Overhead.Cpu().MilliValue() + } + + if _, found := pod.Spec.Overhead[v1.ResourceMemory]; found { + result.Memory += pod.Spec.Overhead.Memory().Value() + } + } + return result } diff --git a/pkg/scheduler/nodeinfo/node_info.go b/pkg/scheduler/nodeinfo/node_info.go index 0dfd345c869..d74aab7088f 100644 --- a/pkg/scheduler/nodeinfo/node_info.go +++ b/pkg/scheduler/nodeinfo/node_info.go @@ -25,8 +25,10 @@ import ( v1 "k8s.io/api/core/v1" storagev1beta1 "k8s.io/api/storage/v1beta1" "k8s.io/apimachinery/pkg/api/resource" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + "k8s.io/kubernetes/pkg/features" priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util" volumeutil "k8s.io/kubernetes/pkg/volume/util" ) @@ -613,6 +615,19 @@ func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) // No non-zero resources for GPUs or opaque resources. } + // If Overhead is being utilized, add to the total requests for the pod + if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) { + resPtr.Add(pod.Spec.Overhead) + + if _, found := pod.Spec.Overhead[v1.ResourceCPU]; found { + non0CPU += pod.Spec.Overhead.Cpu().MilliValue() + } + + if _, found := pod.Spec.Overhead[v1.ResourceMemory]; found { + non0Mem += pod.Spec.Overhead.Memory().Value() + } + } + return } diff --git a/pkg/scheduler/nodeinfo/node_info_test.go b/pkg/scheduler/nodeinfo/node_info_test.go index 24744e1ac2e..3fcaede440f 100644 --- a/pkg/scheduler/nodeinfo/node_info_test.go +++ b/pkg/scheduler/nodeinfo/node_info_test.go @@ -26,6 +26,9 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" ) func TestNewResource(t *testing.T) { @@ -540,6 +543,9 @@ func TestNodeInfoClone(t *testing.T) { } func TestNodeInfoAddPod(t *testing.T) { + + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodOverhead, true)() + nodeName := "test-node" pods := []*v1.Pod{ { @@ -567,6 +573,9 @@ func TestNodeInfoAddPod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + }, }, }, { @@ -580,8 +589,7 @@ func TestNodeInfoAddPod(t *testing.T) { { Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("200m"), - v1.ResourceMemory: resource.MustParse("1Ki"), + v1.ResourceCPU: resource.MustParse("200m"), }, }, Ports: []v1.ContainerPort{ @@ -594,6 +602,10 @@ func TestNodeInfoAddPod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, } @@ -604,15 +616,15 @@ func TestNodeInfoAddPod(t *testing.T) { }, }, requestedResource: &Resource{ - MilliCPU: 300, - Memory: 1524, + MilliCPU: 1300, + Memory: 1000, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), }, nonzeroRequest: &Resource{ - MilliCPU: 300, - Memory: 1524, + MilliCPU: 1300, + Memory: 209716200, //200MB + 1000 specified in requests/overhead EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -653,6 +665,9 @@ func TestNodeInfoAddPod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + }, }, }, { @@ -666,8 +681,7 @@ func TestNodeInfoAddPod(t *testing.T) { { Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("200m"), - v1.ResourceMemory: resource.MustParse("1Ki"), + v1.ResourceCPU: resource.MustParse("200m"), }, }, Ports: []v1.ContainerPort{ @@ -680,6 +694,10 @@ func TestNodeInfoAddPod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, }, @@ -702,12 +720,23 @@ func TestNodeInfoAddPod(t *testing.T) { } func TestNodeInfoRemovePod(t *testing.T) { + + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodOverhead, true)() + nodeName := "test-node" pods := []*v1.Pod{ makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}), makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}), } + // add pod Overhead + for _, pod := range pods { + pod.Spec.Overhead = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + } + } + tests := []struct { pod *v1.Pod errExpected bool @@ -723,15 +752,15 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, requestedResource: &Resource{ - MilliCPU: 300, - Memory: 1524, + MilliCPU: 1300, + Memory: 2524, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), }, nonzeroRequest: &Resource{ - MilliCPU: 300, - Memory: 1524, + MilliCPU: 1300, + Memory: 2524, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -772,6 +801,10 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, { @@ -799,6 +832,10 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, }, @@ -830,6 +867,10 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, errExpected: false, @@ -840,15 +881,15 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, requestedResource: &Resource{ - MilliCPU: 200, - Memory: 1024, + MilliCPU: 700, + Memory: 1524, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), }, nonzeroRequest: &Resource{ - MilliCPU: 200, - Memory: 1024, + MilliCPU: 700, + Memory: 1524, EphemeralStorage: 0, AllowedPodNumber: 0, ScalarResources: map[v1.ResourceName]int64(nil), @@ -888,6 +929,10 @@ func TestNodeInfoRemovePod(t *testing.T) { }, }, NodeName: nodeName, + Overhead: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("500m"), + v1.ResourceMemory: resource.MustParse("500"), + }, }, }, }, From 9babbf8bd7f009b4b1738efbf524e90b1c340e85 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Wed, 5 Jun 2019 09:10:29 -0700 Subject: [PATCH 2/2] pod-overhead: autogenerated code for scheduling changes Signed-off-by: Eric Ernst --- pkg/scheduler/nodeinfo/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/scheduler/nodeinfo/BUILD b/pkg/scheduler/nodeinfo/BUILD index 7198d72d71f..03a5094dd07 100644 --- a/pkg/scheduler/nodeinfo/BUILD +++ b/pkg/scheduler/nodeinfo/BUILD @@ -11,12 +11,14 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/apis/core/v1/helper:go_default_library", + "//pkg/features:go_default_library", "//pkg/scheduler/algorithm/priorities/util:go_default_library", "//pkg/volume/util:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/storage/v1beta1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) @@ -30,11 +32,14 @@ go_test( ], embed = [":go_default_library"], deps = [ + "//pkg/features:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", + "//staging/src/k8s.io/component-base/featuregate/testing:go_default_library", ], )