From 231849a90853363900391aaa3f406867c8421489 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Sun, 5 Feb 2023 04:58:42 +0000 Subject: [PATCH] In-place Pod Vertical Scaling - Scheduler changes --- .../framework/plugins/feature/feature.go | 1 + .../framework/plugins/noderesources/fit.go | 27 +++-- .../plugins/noderesources/fit_test.go | 36 +++++++ pkg/scheduler/framework/plugins/registry.go | 1 + pkg/scheduler/framework/types.go | 21 +++- pkg/scheduler/framework/types_test.go | 101 ++++++++++++++++++ 6 files changed, 174 insertions(+), 13 deletions(-) diff --git a/pkg/scheduler/framework/plugins/feature/feature.go b/pkg/scheduler/framework/plugins/feature/feature.go index 04771b0d0f4..7859b01a1db 100644 --- a/pkg/scheduler/framework/plugins/feature/feature.go +++ b/pkg/scheduler/framework/plugins/feature/feature.go @@ -28,4 +28,5 @@ type Features struct { EnableMatchLabelKeysInPodTopologySpread bool EnablePodSchedulingReadiness bool EnablePodDisruptionConditions bool + EnableInPlacePodVerticalScaling bool } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index 27cb3233eef..55e7b9c49a3 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -76,9 +76,10 @@ var nodeResourceStrategyTypeMap = map[config.ScoringStrategyType]scorer{ // Fit is a plugin that checks if a node has sufficient resources. type Fit struct { - ignoredResources sets.String - ignoredResourceGroups sets.String - handle framework.Handle + ignoredResources sets.String + ignoredResourceGroups sets.String + enableInPlacePodVerticalScaling bool + handle framework.Handle resourceAllocationScorer } @@ -123,10 +124,11 @@ func NewFit(plArgs runtime.Object, h framework.Handle, fts feature.Features) (fr } return &Fit{ - ignoredResources: sets.NewString(args.IgnoredResources...), - ignoredResourceGroups: sets.NewString(args.IgnoredResourceGroups...), - handle: h, - resourceAllocationScorer: *scorePlugin(args), + ignoredResources: sets.NewString(args.IgnoredResources...), + ignoredResourceGroups: sets.NewString(args.IgnoredResourceGroups...), + enableInPlacePodVerticalScaling: fts.EnableInPlacePodVerticalScaling, + handle: h, + resourceAllocationScorer: *scorePlugin(args), }, nil } @@ -202,12 +204,15 @@ func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error // EventsToRegister returns the possible events that may make a Pod // failed by this plugin schedulable. -// NOTE: if in-place-update (KEP 1287) gets implemented, then PodUpdate event -// should be registered for this plugin since a Pod update may free up resources -// that make other Pods schedulable. func (f *Fit) EventsToRegister() []framework.ClusterEvent { + podActionType := framework.Delete + if f.enableInPlacePodVerticalScaling { + // If InPlacePodVerticalScaling (KEP 1287) is enabled, then PodUpdate event should be registered + // for this plugin since a Pod update may free up resources that make other Pods schedulable. + podActionType |= framework.Update + } return []framework.ClusterEvent{ - {Resource: framework.Pod, ActionType: framework.Delete}, + {Resource: framework.Pod, ActionType: podActionType}, {Resource: framework.Node, ActionType: framework.Add | framework.Update}, } } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index ffe5a26969b..af6e53c5d43 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -22,6 +22,7 @@ import ( "reflect" "testing" + "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/kubernetes/pkg/scheduler/apis/config" @@ -893,3 +894,38 @@ func BenchmarkTestFitScore(b *testing.B) { }) } } + +func TestEventsToRegister(t *testing.T) { + tests := []struct { + name string + inPlacePodVerticalScalingEnabled bool + expectedClusterEvents []framework.ClusterEvent + }{ + { + "Register events with InPlacePodVerticalScaling feature enabled", + true, + []framework.ClusterEvent{ + {Resource: "Pod", ActionType: framework.Update | framework.Delete}, + {Resource: "Node", ActionType: framework.Add | framework.Update}, + }, + }, + { + "Register events with InPlacePodVerticalScaling feature disabled", + false, + []framework.ClusterEvent{ + {Resource: "Pod", ActionType: framework.Delete}, + {Resource: "Node", ActionType: framework.Add | framework.Update}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fp := &Fit{enableInPlacePodVerticalScaling: test.inPlacePodVerticalScalingEnabled} + actualClusterEvents := fp.EventsToRegister() + if diff := cmp.Diff(test.expectedClusterEvents, actualClusterEvents); diff != "" { + t.Error("Cluster Events doesn't match extected events (-expected +actual):\n", diff) + } + }) + } +} diff --git a/pkg/scheduler/framework/plugins/registry.go b/pkg/scheduler/framework/plugins/registry.go index a3a9a0f1387..b4c0abe7572 100644 --- a/pkg/scheduler/framework/plugins/registry.go +++ b/pkg/scheduler/framework/plugins/registry.go @@ -55,6 +55,7 @@ func NewInTreeRegistry() runtime.Registry { EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread), EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness), EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions), + EnableInPlacePodVerticalScaling: feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling), } registry := runtime.Registry{ diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index b15a6064f69..3580850c864 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -29,7 +29,11 @@ import ( "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + quota "k8s.io/apiserver/pkg/quota/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" + "k8s.io/kubernetes/pkg/features" schedutil "k8s.io/kubernetes/pkg/scheduler/util" ) @@ -724,15 +728,28 @@ func max(a, b int64) int64 { // resourceRequest = max(sum(podSpec.Containers), podSpec.InitContainers) + overHead func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) { + inPlacePodVerticalScalingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) resPtr := &res for _, c := range pod.Spec.Containers { - resPtr.Add(c.Resources.Requests) - non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&c.Resources.Requests) + req := c.Resources.Requests + if inPlacePodVerticalScalingEnabled { + cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, c.Name) + if found { + if pod.Status.Resize == v1.PodResizeStatusInfeasible { + req = cs.ResourcesAllocated + } else { + req = quota.Max(c.Resources.Requests, cs.ResourcesAllocated) + } + } + } + resPtr.Add(req) + non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&req) non0CPU += non0CPUReq non0Mem += non0MemReq // No non-zero resources for GPUs or opaque resources. } + // Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value for _, ic := range pod.Spec.InitContainers { resPtr.SetMaxResource(ic.Resources.Requests) non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&ic.Resources.Requests) diff --git a/pkg/scheduler/framework/types_test.go b/pkg/scheduler/framework/types_test.go index de8cffc369f..0c1160285e4 100644 --- a/pkg/scheduler/framework/types_test.go +++ b/pkg/scheduler/framework/types_test.go @@ -28,6 +28,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" ) func TestNewResource(t *testing.T) { @@ -1458,3 +1461,101 @@ func TestFitError_Error(t *testing.T) { }) } } + +func TestCalculatePodResourcesWithResize(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() + cpu500m := resource.MustParse("500m") + mem500M := resource.MustParse("500Mi") + cpu700m := resource.MustParse("700m") + mem800M := resource.MustParse("800Mi") + testpod := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "pod_resize_test", + Name: "testpod", + UID: types.UID("testpod"), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}}, + }, + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + Resize: "", + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "c1", + ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + }, + }, + }, + } + + tests := []struct { + name string + requests v1.ResourceList + resourcesAllocated v1.ResourceList + resizeStatus v1.PodResizeStatus + expectedResource Resource + expectedNon0CPU int64 + expectedNon0Mem int64 + }{ + { + name: "Pod with no pending resize", + requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resizeStatus: "", + expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()}, + expectedNon0CPU: cpu500m.MilliValue(), + expectedNon0Mem: mem500M.Value(), + }, + { + name: "Pod with resize in progress", + requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resizeStatus: v1.PodResizeStatusInProgress, + expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()}, + expectedNon0CPU: cpu500m.MilliValue(), + expectedNon0Mem: mem500M.Value(), + }, + { + name: "Pod with deferred resize", + requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, + resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resizeStatus: v1.PodResizeStatusDeferred, + expectedResource: Resource{MilliCPU: cpu700m.MilliValue(), Memory: mem800M.Value()}, + expectedNon0CPU: cpu700m.MilliValue(), + expectedNon0Mem: mem800M.Value(), + }, + { + name: "Pod with infeasible resize", + requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M}, + resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, + resizeStatus: v1.PodResizeStatusInfeasible, + expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()}, + expectedNon0CPU: cpu500m.MilliValue(), + expectedNon0Mem: mem500M.Value(), + }, + } + + for _, tt := range tests { + pod := testpod.DeepCopy() + pod.Spec.Containers[0].Resources.Requests = tt.requests + pod.Status.ContainerStatuses[0].ResourcesAllocated = tt.resourcesAllocated + pod.Status.Resize = tt.resizeStatus + + res, non0CPU, non0Mem := calculateResource(pod) + if !reflect.DeepEqual(tt.expectedResource, res) { + t.Errorf("Test: %s expected resource: %+v, got: %+v", tt.name, tt.expectedResource, res) + } + if non0CPU != tt.expectedNon0CPU { + t.Errorf("Test: %s expected non0CPU: %d, got: %d", tt.name, tt.expectedNon0CPU, non0CPU) + } + if non0Mem != tt.expectedNon0Mem { + t.Errorf("Test: %s expected non0Mem: %d, got: %d", tt.name, tt.expectedNon0Mem, non0Mem) + } + } +}