From 7286d122fb7a06f0509dd0c37a9f1b2bb83c9b14 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Tue, 16 May 2023 23:41:53 +0900 Subject: [PATCH] Mark pods with restartable init containers as `UnschedulableAndUnresolvable` This marks the pods with restartable init containers as `UnschedulableAndUnresolvable` if the feature gate is disabled to avoid the inconsistency in resource calculation between the scheduler and the older kubelet. --- .../framework/plugins/feature/feature.go | 1 + .../framework/plugins/noderesources/fit.go | 20 +++++ .../plugins/noderesources/fit_test.go | 79 +++++++++++++++++++ pkg/scheduler/framework/plugins/registry.go | 1 + 4 files changed, 101 insertions(+) diff --git a/pkg/scheduler/framework/plugins/feature/feature.go b/pkg/scheduler/framework/plugins/feature/feature.go index 7859b01a1db..4d1ee444cf7 100644 --- a/pkg/scheduler/framework/plugins/feature/feature.go +++ b/pkg/scheduler/framework/plugins/feature/feature.go @@ -29,4 +29,5 @@ type Features struct { EnablePodSchedulingReadiness bool EnablePodDisruptionConditions bool EnableInPlacePodVerticalScaling bool + EnableSidecarContainers bool } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index dc72bf55f4e..04e9bcbf757 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -84,6 +84,7 @@ type Fit struct { ignoredResources sets.Set[string] ignoredResourceGroups sets.Set[string] enableInPlacePodVerticalScaling bool + enableSidecarContainers bool handle framework.Handle resourceAllocationScorer } @@ -167,6 +168,7 @@ func NewFit(plArgs runtime.Object, h framework.Handle, fts feature.Features) (fr ignoredResources: sets.New(args.IgnoredResources...), ignoredResourceGroups: sets.New(args.IgnoredResourceGroups...), enableInPlacePodVerticalScaling: fts.EnableInPlacePodVerticalScaling, + enableSidecarContainers: fts.EnableSidecarContainers, handle: h, resourceAllocationScorer: *scorePlugin(args), }, nil @@ -251,6 +253,15 @@ func (f *Fit) EventsToRegister() []framework.ClusterEventWithHint { // Checks if a node has sufficient resources, such as cpu, memory, gpu, opaque int resources etc to run a pod. // It returns a list of insufficient resources, if empty, then the node has all the resources requested by the pod. func (f *Fit) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { + if !f.enableSidecarContainers && hasRestartableInitContainer(pod) { + // Scheduler will calculate resources usage for a Pod containing + // restartable init containers that will be equal or more than kubelet will + // require to run the Pod. So there will be no overbooking. However, to + // avoid the inconsistency in resource calculation between the scheduler + // and the older (before v1.28) kubelet, make the Pod unschedulable. + return framework.NewStatus(framework.UnschedulableAndUnresolvable, "Pod has a restartable init container and the SidecarContainers feature is disabled") + } + s, err := getPreFilterState(cycleState) if err != nil { return framework.AsStatus(err) @@ -269,6 +280,15 @@ func (f *Fit) Filter(ctx context.Context, cycleState *framework.CycleState, pod return nil } +func hasRestartableInitContainer(pod *v1.Pod) bool { + for _, c := range pod.Spec.InitContainers { + if c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyAlways { + return true + } + } + return false +} + // InsufficientResource describes what kind of resource limit is hit and caused the pod to not fit the node. type InsufficientResource struct { ResourceName v1.ResourceName diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index 266e262f958..50b27ceee94 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -651,6 +651,85 @@ func TestStorageRequests(t *testing.T) { } +func TestRestartableInitContainers(t *testing.T) { + newPod := func() *v1.Pod { + return &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + {Name: "regular"}, + }, + }, + } + } + newPodWithRestartableInitContainers := func() *v1.Pod { + restartPolicyAlways := v1.ContainerRestartPolicyAlways + return &v1.Pod{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + {Name: "regular"}, + }, + InitContainers: []v1.Container{ + { + Name: "restartable-init", + RestartPolicy: &restartPolicyAlways, + }, + }, + }, + } + } + + testCases := []struct { + name string + pod *v1.Pod + enableSidecarContainers bool + wantStatus *framework.Status + }{ + { + name: "allow pod without restartable init containers if sidecar containers is disabled", + pod: newPod(), + }, + { + name: "not allow pod with restartable init containers if sidecar containers is disabled", + pod: newPodWithRestartableInitContainers(), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, "Pod has a restartable init container and the SidecarContainers feature is disabled"), + }, + { + name: "allow pod without restartable init containers if sidecar containers is enabled", + enableSidecarContainers: true, + pod: newPod(), + }, + { + name: "allow pod with restartable init containers if sidecar containers is enabled", + enableSidecarContainers: true, + pod: newPodWithRestartableInitContainers(), + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(0, 0, 1, 0, 0, 0)}} + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(&node) + + p, err := NewFit(&config.NodeResourcesFitArgs{ScoringStrategy: defaultScoringStrategy}, nil, plfeature.Features{EnableSidecarContainers: test.enableSidecarContainers}) + if err != nil { + t.Fatal(err) + } + cycleState := framework.NewCycleState() + _, preFilterStatus := p.(framework.PreFilterPlugin).PreFilter(context.Background(), cycleState, test.pod) + if !preFilterStatus.IsSuccess() { + t.Errorf("prefilter failed with status: %v", preFilterStatus) + } + + gotStatus := p.(framework.FilterPlugin).Filter(context.Background(), cycleState, test.pod, nodeInfo) + if diff := cmp.Diff(gotStatus, test.wantStatus); diff != "" { + t.Errorf("status does not match: %v, want: %v", gotStatus, test.wantStatus) + } + }) + } + +} + func TestFitScore(t *testing.T) { tests := []struct { name string diff --git a/pkg/scheduler/framework/plugins/registry.go b/pkg/scheduler/framework/plugins/registry.go index b4c0abe7572..718e9eb1d8e 100644 --- a/pkg/scheduler/framework/plugins/registry.go +++ b/pkg/scheduler/framework/plugins/registry.go @@ -56,6 +56,7 @@ func NewInTreeRegistry() runtime.Registry { EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness), EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions), EnableInPlacePodVerticalScaling: feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling), + EnableSidecarContainers: feature.DefaultFeatureGate.Enabled(features.SidecarContainers), } registry := runtime.Registry{