Merge pull request #102915 from wzshiming/feat/graceful-shutdown-based-on-pod-priority

Graceful Node Shutdown Based On Pod Priority
2025-08-04 18:00:08 +00:00 · 2021-11-17 18:45:03 -08:00 · 2021-11-17 18:45:03 -08:00 · 91b7fb4dc9
commit 91b7fb4dc9
parent ec0d3de078 7c656b55e4
15 changed files with 749 additions and 111 deletions
--- a/api/api-rules/violation_exceptions.list
+++ b/api/api-rules/violation_exceptions.list
@ -381,6 +381,7 @@ API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfi
 API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,EnforceNodeAllocatable
 API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,RegisterWithTaints
 API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ReservedMemory
 API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ShutdownGracePeriodByPodPriority
 API rule violation: list_type_missing,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,TLSCipherSuites
 API rule violation: list_type_missing,k8s.io/metrics/pkg/apis/metrics/v1alpha1,PodMetrics,Containers
 API rule violation: list_type_missing,k8s.io/metrics/pkg/apis/metrics/v1beta1,PodMetrics,Containers
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@ -585,6 +585,12 @@ const (
 	// Adds support for kubelet to detect node shutdown and gracefully terminate pods prior to the node being shutdown.
 	GracefulNodeShutdown featuregate.Feature = "GracefulNodeShutdown"
 	// owner: @wzshiming
 	// alpha: v1.23
 	//
 	// Make the kubelet use shutdown configuration based on pod priority values for graceful shutdown.
 	GracefulNodeShutdownBasedOnPodPriority featuregate.Feature = "GracefulNodeShutdownBasedOnPodPriority"
 	// owner: @andrewsykim @uablrek
 	// kep: http://kep.k8s.io/1864
 	// alpha: v1.20
@ -927,6 +933,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
 	ExecProbeTimeout:                               {Default: true, PreRelease: featuregate.GA}, // lock to default and remove after v1.22 based on KEP #1972 update
 	KubeletCredentialProviders:                     {Default: false, PreRelease: featuregate.Alpha},
 	GracefulNodeShutdown:                           {Default: true, PreRelease: featuregate.Beta},
 	GracefulNodeShutdownBasedOnPodPriority:         {Default: false, PreRelease: featuregate.Alpha},
 	ServiceLBNodePortControl:                       {Default: true, PreRelease: featuregate.Beta},
 	MixedProtocolLBService:                         {Default: false, PreRelease: featuregate.Alpha},
 	VolumeCapacityPriority:                         {Default: false, PreRelease: featuregate.Alpha},
--- a/pkg/kubelet/apis/config/helpers_test.go
+++ b/pkg/kubelet/apis/config/helpers_test.go
@ -267,6 +267,8 @@ var (
 		"SeccompDefault",
 		"SerializeImagePulls",
 		"ShowHiddenMetricsForVersion",
 		"ShutdownGracePeriodByPodPriority[*].Priority",
 		"ShutdownGracePeriodByPodPriority[*].ShutdownGracePeriodSeconds",
 		"StreamingConnectionIdleTimeout.Duration",
 		"SyncFrequency.Duration",
 		"SystemCgroups",
--- a/pkg/kubelet/apis/config/types.go
+++ b/pkg/kubelet/apis/config/types.go
@ -397,6 +397,15 @@ type KubeletConfiguration struct {
 	// +featureGate=GracefulNodeShutdown
 	// +optional
 	ShutdownGracePeriodCriticalPods metav1.Duration
 	// ShutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based
 	// on their associated priority class value.
 	// When a shutdown request is received, the Kubelet will initiate shutdown on all pods
 	// running on the node with a grace period that depends on the priority of the pod,
 	// and then wait for all pods to exit.
 	// Each entry in the array represents the graceful shutdown time a pod with a priority
 	// class value that lies in the range of that value and the next higher entry in the
 	// list when the node is shutting down.
 	ShutdownGracePeriodByPodPriority []ShutdownGracePeriodByPodPriority
 	// ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
 	// The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads.
 	// For example, if you have a NUMA0 with 10Gi of memory and the ReservedMemory was specified to reserve 1Gi of memory at NUMA0,
@ -595,6 +604,14 @@ type MemoryReservation struct {
 	Limits   v1.ResourceList
 }
 // ShutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based on their associated priority class value
 type ShutdownGracePeriodByPodPriority struct {
 	// priority is the priority value associated with the shutdown grace period
 	Priority int32
 	// shutdownGracePeriodSeconds is the shutdown grace period in seconds
 	ShutdownGracePeriodSeconds int64
 }
 type MemorySwapConfiguration struct {
 	// swapBehavior configures swap memory available to container workloads. May be one of
 	// "", "LimitedSwap": workload combined memory and swap usage cannot exceed pod memory limit
--- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go
+++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go
@ -140,6 +140,16 @@ func RegisterConversions(s *runtime.Scheme) error {
 	}); err != nil {
 		return err
 	}
 	if err := s.AddGeneratedConversionFunc((*v1beta1.ShutdownGracePeriodByPodPriority)(nil), (*config.ShutdownGracePeriodByPodPriority)(nil), func(a, b interface{}, scope conversion.Scope) error {
 		return Convert_v1beta1_ShutdownGracePeriodByPodPriority_To_config_ShutdownGracePeriodByPodPriority(a.(*v1beta1.ShutdownGracePeriodByPodPriority), b.(*config.ShutdownGracePeriodByPodPriority), scope)
 	}); err != nil {
 		return err
 	}
 	if err := s.AddGeneratedConversionFunc((*config.ShutdownGracePeriodByPodPriority)(nil), (*v1beta1.ShutdownGracePeriodByPodPriority)(nil), func(a, b interface{}, scope conversion.Scope) error {
 		return Convert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(a.(*config.ShutdownGracePeriodByPodPriority), b.(*v1beta1.ShutdownGracePeriodByPodPriority), scope)
 	}); err != nil {
 		return err
 	}
 	return nil
 }
@ -381,6 +391,7 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in
 	}
 	out.ShutdownGracePeriod = in.ShutdownGracePeriod
 	out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
 	out.ShutdownGracePeriodByPodPriority = *(*[]config.ShutdownGracePeriodByPodPriority)(unsafe.Pointer(&in.ShutdownGracePeriodByPodPriority))
 	out.ReservedMemory = *(*[]config.MemoryReservation)(unsafe.Pointer(&in.ReservedMemory))
 	if err := v1.Convert_Pointer_bool_To_bool(&in.EnableProfilingHandler, &out.EnableProfilingHandler, s); err != nil {
 		return err
@ -556,6 +567,7 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in
 	}
 	out.ShutdownGracePeriod = in.ShutdownGracePeriod
 	out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
 	out.ShutdownGracePeriodByPodPriority = *(*[]v1beta1.ShutdownGracePeriodByPodPriority)(unsafe.Pointer(&in.ShutdownGracePeriodByPodPriority))
 	out.ReservedMemory = *(*[]v1beta1.MemoryReservation)(unsafe.Pointer(&in.ReservedMemory))
 	if err := v1.Convert_bool_To_Pointer_bool(&in.EnableProfilingHandler, &out.EnableProfilingHandler, s); err != nil {
 		return err
@ -708,3 +720,25 @@ func autoConvert_config_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConf
 func Convert_config_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource(in *config.SerializedNodeConfigSource, out *v1beta1.SerializedNodeConfigSource, s conversion.Scope) error {
 	return autoConvert_config_SerializedNodeConfigSource_To_v1beta1_SerializedNodeConfigSource(in, out, s)
 }
 func autoConvert_v1beta1_ShutdownGracePeriodByPodPriority_To_config_ShutdownGracePeriodByPodPriority(in *v1beta1.ShutdownGracePeriodByPodPriority, out *config.ShutdownGracePeriodByPodPriority, s conversion.Scope) error {
 	out.Priority = in.Priority
 	out.ShutdownGracePeriodSeconds = in.ShutdownGracePeriodSeconds
 	return nil
 }
 // Convert_v1beta1_ShutdownGracePeriodByPodPriority_To_config_ShutdownGracePeriodByPodPriority is an autogenerated conversion function.
 func Convert_v1beta1_ShutdownGracePeriodByPodPriority_To_config_ShutdownGracePeriodByPodPriority(in *v1beta1.ShutdownGracePeriodByPodPriority, out *config.ShutdownGracePeriodByPodPriority, s conversion.Scope) error {
 	return autoConvert_v1beta1_ShutdownGracePeriodByPodPriority_To_config_ShutdownGracePeriodByPodPriority(in, out, s)
 }
 func autoConvert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(in *config.ShutdownGracePeriodByPodPriority, out *v1beta1.ShutdownGracePeriodByPodPriority, s conversion.Scope) error {
 	out.Priority = in.Priority
 	out.ShutdownGracePeriodSeconds = in.ShutdownGracePeriodSeconds
 	return nil
 }
 // Convert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority is an autogenerated conversion function.
 func Convert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(in *config.ShutdownGracePeriodByPodPriority, out *v1beta1.ShutdownGracePeriodByPodPriority, s conversion.Scope) error {
 	return autoConvert_config_ShutdownGracePeriodByPodPriority_To_v1beta1_ShutdownGracePeriodByPodPriority(in, out, s)
 }
--- a/pkg/kubelet/apis/config/validation/validation.go
+++ b/pkg/kubelet/apis/config/validation/validation.go
@ -166,6 +166,16 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
 	if (kc.ShutdownGracePeriod.Duration > 0 || kc.ShutdownGracePeriodCriticalPods.Duration > 0) && !localFeatureGate.Enabled(features.GracefulNodeShutdown) {
 		allErrors = append(allErrors, fmt.Errorf("invalid configuration: Specifying ShutdownGracePeriod or ShutdownGracePeriodCriticalPods requires feature gate GracefulNodeShutdown"))
 	}
 	if localFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority) {
 		if len(kc.ShutdownGracePeriodByPodPriority) != 0 && (kc.ShutdownGracePeriod.Duration > 0 || kc.ShutdownGracePeriodCriticalPods.Duration > 0) {
 			allErrors = append(allErrors, fmt.Errorf("invalid configuration: Cannot specify both shutdownGracePeriodByPodPriority and shutdownGracePeriod at the same time"))
 		}
 	}
 	if !localFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority) {
 		if len(kc.ShutdownGracePeriodByPodPriority) != 0 {
 			allErrors = append(allErrors, fmt.Errorf("invalid configuration: Specifying shutdownGracePeriodByPodPriority requires feature gate GracefulNodeShutdownBasedOnPodPriority"))
 		}
 	}
 	if localFeatureGate.Enabled(features.NodeSwap) {
 		if kc.MemorySwap.SwapBehavior != "" && kc.MemorySwap.SwapBehavior != kubetypes.LimitedSwap && kc.MemorySwap.SwapBehavior != kubetypes.UnlimitedSwap {
 			allErrors = append(allErrors, fmt.Errorf("invalid configuration: MemorySwap.SwapBehavior %v must be one of: LimitedSwap, UnlimitedSwap", kc.MemorySwap.SwapBehavior))
--- a/pkg/kubelet/apis/config/validation/validation_test.go
+++ b/pkg/kubelet/apis/config/validation/validation_test.go
@ -108,8 +108,9 @@ func TestValidateKubeletConfiguration(t *testing.T) {
 		ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
 		MemoryThrottlingFactor:          utilpointer.Float64Ptr(0.9),
 		FeatureGates: map[string]bool{
-			"CustomCPUCFSQuotaPeriod": true,
+			"CustomCPUCFSQuotaPeriod":                true,
-			"MemoryQoS":               true,
+			"MemoryQoS":                              true,
 			"GracefulNodeShutdownBasedOnPodPriority": true,
 		},
 		Logging: componentbaseconfig.LoggingConfiguration{
 			Format: "text",
@ -149,15 +150,22 @@ func TestValidateKubeletConfiguration(t *testing.T) {
 		ReservedSystemCPUs:              "0-3",
 		TopologyManagerScope:            kubeletconfig.ContainerTopologyManagerScope,
 		TopologyManagerPolicy:           kubeletconfig.NoneTopologyManagerPolicy,
-		ShutdownGracePeriod:             metav1.Duration{Duration: 10 * time.Minute},
+		ShutdownGracePeriod:             metav1.Duration{Duration: 0},
 		ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
-		MemorySwap:                      kubeletconfig.MemorySwapConfiguration{SwapBehavior: kubetypes.UnlimitedSwap},
+		ShutdownGracePeriodByPodPriority: []kubeletconfig.ShutdownGracePeriodByPodPriority{
-		MemoryThrottlingFactor:          utilpointer.Float64Ptr(0.5),
+			{
 				Priority:                   0,
 				ShutdownGracePeriodSeconds: 10,
 			},
 		},
 		MemorySwap:             kubeletconfig.MemorySwapConfiguration{SwapBehavior: kubetypes.UnlimitedSwap},
 		MemoryThrottlingFactor: utilpointer.Float64Ptr(0.5),
 		FeatureGates: map[string]bool{
-			"CustomCPUCFSQuotaPeriod": true,
+			"CustomCPUCFSQuotaPeriod":                true,
-			"GracefulNodeShutdown":    true,
+			"GracefulNodeShutdown":                   true,
-			"NodeSwap":                true,
+			"GracefulNodeShutdownBasedOnPodPriority": true,
-			"MemoryQoS":               true,
+			"NodeSwap":                               true,
 			"MemoryQoS":                              true,
 		},
 		Logging: componentbaseconfig.LoggingConfiguration{
 			Format: "text",
@ -194,12 +202,18 @@ func TestValidateKubeletConfiguration(t *testing.T) {
 		CPUCFSQuotaPeriod:               metav1.Duration{Duration: 100 * time.Millisecond},
 		ShutdownGracePeriod:             metav1.Duration{Duration: 30 * time.Second},
 		ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 60 * time.Second},
 		ShutdownGracePeriodByPodPriority: []kubeletconfig.ShutdownGracePeriodByPodPriority{
 			{
 				Priority:                   0,
 				ShutdownGracePeriodSeconds: 10,
 			},
 		},
 		Logging: componentbaseconfig.LoggingConfiguration{
 			Format: "",
 		},
 		MemorySwap: kubeletconfig.MemorySwapConfiguration{SwapBehavior: kubetypes.UnlimitedSwap},
 	}
-	const numErrsErrorCase1 = 30
+	const numErrsErrorCase1 = 31
 	if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 {
 		t.Errorf("expect %d errors, got %v", numErrsErrorCase1, len(allErrors.(utilerrors.Aggregate).Errors()))
 	}
--- a/pkg/kubelet/apis/config/zz_generated.deepcopy.go
+++ b/pkg/kubelet/apis/config/zz_generated.deepcopy.go
@ -283,6 +283,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
 	in.Logging.DeepCopyInto(&out.Logging)
 	out.ShutdownGracePeriod = in.ShutdownGracePeriod
 	out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
 	if in.ShutdownGracePeriodByPodPriority != nil {
 		in, out := &in.ShutdownGracePeriodByPodPriority, &out.ShutdownGracePeriodByPodPriority
 		*out = make([]ShutdownGracePeriodByPodPriority, len(*in))
 		copy(*out, *in)
 	}
 	if in.ReservedMemory != nil {
 		in, out := &in.ReservedMemory, &out.ReservedMemory
 		*out = make([]MemoryReservation, len(*in))
@ -438,3 +443,19 @@ func (in *SerializedNodeConfigSource) DeepCopyObject() runtime.Object {
 	}
 	return nil
 }
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ShutdownGracePeriodByPodPriority) DeepCopyInto(out *ShutdownGracePeriodByPodPriority) {
 	*out = *in
 	return
 }
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ShutdownGracePeriodByPodPriority.
 func (in *ShutdownGracePeriodByPodPriority) DeepCopy() *ShutdownGracePeriodByPodPriority {
 	if in == nil {
 		return nil
 	}
 	out := new(ShutdownGracePeriodByPodPriority)
 	in.DeepCopyInto(out)
 	return out
 }
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@ -867,14 +867,15 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 	// setup node shutdown manager
 	shutdownManager, shutdownAdmitHandler := nodeshutdown.NewManager(&nodeshutdown.Config{
-		ProbeManager:                    klet.probeManager,
+		ProbeManager:                     klet.probeManager,
-		Recorder:                        kubeDeps.Recorder,
+		Recorder:                         kubeDeps.Recorder,
-		NodeRef:                         nodeRef,
+		NodeRef:                          nodeRef,
-		GetPodsFunc:                     klet.GetActivePods,
+		GetPodsFunc:                      klet.GetActivePods,
-		KillPodFunc:                     killPodNow(klet.podWorkers, kubeDeps.Recorder),
+		KillPodFunc:                      killPodNow(klet.podWorkers, kubeDeps.Recorder),
-		SyncNodeStatusFunc:              klet.syncNodeStatus,
+		SyncNodeStatusFunc:               klet.syncNodeStatus,
-		ShutdownGracePeriodRequested:    kubeCfg.ShutdownGracePeriod.Duration,
+		ShutdownGracePeriodRequested:     kubeCfg.ShutdownGracePeriod.Duration,
-		ShutdownGracePeriodCriticalPods: kubeCfg.ShutdownGracePeriodCriticalPods.Duration,
+		ShutdownGracePeriodCriticalPods:  kubeCfg.ShutdownGracePeriodCriticalPods.Duration,
 		ShutdownGracePeriodByPodPriority: kubeCfg.ShutdownGracePeriodByPodPriority,
 	})
 	klet.shutdownManager = shutdownManager
 	klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)
--- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go
+++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager.go
@ -21,6 +21,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/tools/record"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	"k8s.io/kubernetes/pkg/kubelet/eviction"
 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 	"k8s.io/kubernetes/pkg/kubelet/prober"
@ -36,15 +37,16 @@ type Manager interface {
 // Config represents Manager configuration
 type Config struct {
-	ProbeManager                    prober.Manager
+	ProbeManager                     prober.Manager
-	Recorder                        record.EventRecorder
+	Recorder                         record.EventRecorder
-	NodeRef                         *v1.ObjectReference
+	NodeRef                          *v1.ObjectReference
-	GetPodsFunc                     eviction.ActivePodsFunc
+	GetPodsFunc                      eviction.ActivePodsFunc
-	KillPodFunc                     eviction.KillPodFunc
+	KillPodFunc                      eviction.KillPodFunc
-	SyncNodeStatusFunc              func()
+	SyncNodeStatusFunc               func()
-	ShutdownGracePeriodRequested    time.Duration
+	ShutdownGracePeriodRequested     time.Duration
-	ShutdownGracePeriodCriticalPods time.Duration
+	ShutdownGracePeriodCriticalPods  time.Duration
-	Clock                           clock.Clock
+	ShutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
 	Clock                            clock.Clock
 }
 // managerStub is a fake node shutdown managerImpl .
--- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go
+++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go
@ -22,6 +22,7 @@ package nodeshutdown
 import (
 	"fmt"
 	"sort"
 	"sync"
 	"time"
@ -29,13 +30,14 @@ import (
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/apis/scheduling"
 	"k8s.io/kubernetes/pkg/features"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
 	"k8s.io/kubernetes/pkg/kubelet/eviction"
 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 	"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
 	"k8s.io/kubernetes/pkg/kubelet/prober"
 	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 	"k8s.io/utils/clock"
 )
@ -66,8 +68,7 @@ type managerImpl struct {
 	nodeRef      *v1.ObjectReference
 	probeManager prober.Manager
-	shutdownGracePeriodRequested    time.Duration
+	shutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
 	shutdownGracePeriodCriticalPods time.Duration
 	getPods        eviction.ActivePodsFunc
 	killPodFunc    eviction.KillPodFunc
@ -84,28 +85,46 @@ type managerImpl struct {
 // NewManager returns a new node shutdown manager.
 func NewManager(conf *Config) (Manager, lifecycle.PodAdmitHandler) {
-	if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) ||
+	if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) {
 		(conf.ShutdownGracePeriodRequested == 0 && conf.ShutdownGracePeriodCriticalPods == 0) {
 		m := managerStub{}
 		return m, m
 	}
 	shutdownGracePeriodByPodPriority := conf.ShutdownGracePeriodByPodPriority
 	// Migration from the original configuration
 	if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority) ||
 		len(shutdownGracePeriodByPodPriority) == 0 {
 		shutdownGracePeriodByPodPriority = migrateConfig(conf.ShutdownGracePeriodRequested, conf.ShutdownGracePeriodCriticalPods)
 	}
 	// Disable if the configuration is empty
 	if len(shutdownGracePeriodByPodPriority) == 0 {
 		m := managerStub{}
 		return m, m
 	}
 	// Sort by priority from low to high
 	sort.Slice(shutdownGracePeriodByPodPriority, func(i, j int) bool {
 		return shutdownGracePeriodByPodPriority[i].Priority < shutdownGracePeriodByPodPriority[j].Priority
 	})
 	if conf.Clock == nil {
 		conf.Clock = clock.RealClock{}
 	}
 	manager := &managerImpl{
-		probeManager:                    conf.ProbeManager,
+		probeManager:                     conf.ProbeManager,
-		recorder:                        conf.Recorder,
+		recorder:                         conf.Recorder,
-		nodeRef:                         conf.NodeRef,
+		nodeRef:                          conf.NodeRef,
-		getPods:                         conf.GetPodsFunc,
+		getPods:                          conf.GetPodsFunc,
-		killPodFunc:                     conf.KillPodFunc,
+		killPodFunc:                      conf.KillPodFunc,
-		syncNodeStatus:                  conf.SyncNodeStatusFunc,
+		syncNodeStatus:                   conf.SyncNodeStatusFunc,
-		shutdownGracePeriodRequested:    conf.ShutdownGracePeriodRequested,
+		shutdownGracePeriodByPodPriority: shutdownGracePeriodByPodPriority,
-		shutdownGracePeriodCriticalPods: conf.ShutdownGracePeriodCriticalPods,
+		clock:                            conf.Clock,
 		clock:                           conf.Clock,
 	}
 	klog.InfoS("Creating node shutdown manager",
 		"shutdownGracePeriodRequested", conf.ShutdownGracePeriodRequested,
 		"shutdownGracePeriodCriticalPods", conf.ShutdownGracePeriodCriticalPods,
 		"shutdownGracePeriodByPodPriority", shutdownGracePeriodByPodPriority,
 	)
 	return manager, manager
 }
@ -159,9 +178,9 @@ func (m *managerImpl) start() (chan struct{}, error) {
 		return nil, err
 	}
-	// If the logind's InhibitDelayMaxUSec as configured in (logind.conf) is less than shutdownGracePeriodRequested, attempt to update the value to shutdownGracePeriodRequested.
+	// If the logind's InhibitDelayMaxUSec as configured in (logind.conf) is less than periodRequested, attempt to update the value to periodRequested.
-	if m.shutdownGracePeriodRequested > currentInhibitDelay {
+	if periodRequested := m.periodRequested(); periodRequested > currentInhibitDelay {
-		err := m.dbusCon.OverrideInhibitDelay(m.shutdownGracePeriodRequested)
+		err := m.dbusCon.OverrideInhibitDelay(periodRequested)
 		if err != nil {
 			return nil, fmt.Errorf("unable to override inhibit delay by shutdown manager: %v", err)
 		}
@ -177,8 +196,8 @@ func (m *managerImpl) start() (chan struct{}, error) {
 			return nil, err
 		}
-		if m.shutdownGracePeriodRequested > updatedInhibitDelay {
+		if periodRequested > updatedInhibitDelay {
-			return nil, fmt.Errorf("node shutdown manager was unable to update logind InhibitDelayMaxSec to %v (ShutdownGracePeriod), current value of InhibitDelayMaxSec (%v) is less than requested ShutdownGracePeriod", m.shutdownGracePeriodRequested, updatedInhibitDelay)
+			return nil, fmt.Errorf("node shutdown manager was unable to update logind InhibitDelayMaxSec to %v (ShutdownGracePeriod), current value of InhibitDelayMaxSec (%v) is less than requested ShutdownGracePeriod", periodRequested, updatedInhibitDelay)
 		}
 	}
@ -270,54 +289,54 @@ func (m *managerImpl) processShutdownEvent() error {
 	klog.V(1).InfoS("Shutdown manager processing shutdown event")
 	activePods := m.getPods()
-	nonCriticalPodGracePeriod := m.shutdownGracePeriodRequested - m.shutdownGracePeriodCriticalPods
+	groups := groupByPriority(m.shutdownGracePeriodByPodPriority, activePods)
 	for _, group := range groups {
 		// If there are no pods in a particular range,
 		// then do not wait for pods in that priority range.
 		if len(group.Pods) == 0 {
 			continue
 		}
-	var wg sync.WaitGroup
+		var wg sync.WaitGroup
-	wg.Add(len(activePods))
+		wg.Add(len(group.Pods))
-	for _, pod := range activePods {
+		for _, pod := range group.Pods {
-		go func(pod *v1.Pod) {
+			go func(pod *v1.Pod, group podShutdownGroup) {
-			defer wg.Done()
+				defer wg.Done()
-			var gracePeriodOverride int64
+				gracePeriodOverride := group.ShutdownGracePeriodSeconds
 			if kubelettypes.IsCriticalPod(pod) {
 				gracePeriodOverride = int64(m.shutdownGracePeriodCriticalPods.Seconds())
 				m.clock.Sleep(nonCriticalPodGracePeriod)
 			} else {
 				gracePeriodOverride = int64(nonCriticalPodGracePeriod.Seconds())
 			}
-			// Stop probes for the pod
+				// Stop probes for the pod
-			m.probeManager.RemovePod(pod)
+				m.probeManager.RemovePod(pod)
-			// If the pod's spec specifies a termination gracePeriod which is less than the gracePeriodOverride calculated, use the pod spec termination gracePeriod.
+				// If the pod's spec specifies a termination gracePeriod which is less than the gracePeriodOverride calculated, use the pod spec termination gracePeriod.
-			if pod.Spec.TerminationGracePeriodSeconds != nil && *pod.Spec.TerminationGracePeriodSeconds <= gracePeriodOverride {
+				if pod.Spec.TerminationGracePeriodSeconds != nil && *pod.Spec.TerminationGracePeriodSeconds <= gracePeriodOverride {
-				gracePeriodOverride = *pod.Spec.TerminationGracePeriodSeconds
+					gracePeriodOverride = *pod.Spec.TerminationGracePeriodSeconds
-			}
+				}
-			klog.V(1).InfoS("Shutdown manager killing pod with gracePeriod", "pod", klog.KObj(pod), "gracePeriod", gracePeriodOverride)
+				klog.V(1).InfoS("Shutdown manager killing pod with gracePeriod", "pod", klog.KObj(pod), "gracePeriod", gracePeriodOverride)
 			if err := m.killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
 				status.Message = nodeShutdownMessage
 				status.Reason = nodeShutdownReason
 			}); err != nil {
 				klog.V(1).InfoS("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err)
 			} else {
 				klog.V(1).InfoS("Shutdown manager finished killing pod", "pod", klog.KObj(pod))
 			}
 		}(pod)
 	}
-	c := make(chan struct{})
+				if err := m.killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
-	go func() {
+					status.Message = nodeShutdownMessage
-		defer close(c)
+					status.Reason = nodeShutdownReason
-		wg.Wait()
+				}); err != nil {
-	}()
+					klog.V(1).InfoS("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err)
 				} else {
 					klog.V(1).InfoS("Shutdown manager finished killing pod", "pod", klog.KObj(pod))
 				}
 			}(pod, group)
 		}
-	// We want to ensure that inhibitLock is released, so only wait up to the shutdownGracePeriodRequested timeout.
+		c := make(chan struct{})
-	select {
+		go func() {
-	case <-c:
+			defer close(c)
-		break
+			wg.Wait()
-	case <-time.After(m.shutdownGracePeriodRequested):
+		}()
-		klog.V(1).InfoS("Shutdown manager pod killing time out", "gracePeriod", m.shutdownGracePeriodRequested)
+
 		select {
 		case <-c:
 		case <-time.After(time.Duration(group.ShutdownGracePeriodSeconds) * time.Second):
 			klog.V(1).InfoS("Shutdown manager pod killing time out", "gracePeriod", group.ShutdownGracePeriodSeconds, "priority", group.Priority)
 		}
 	}
 	m.dbusCon.ReleaseInhibitLock(m.inhibitLock)
@ -325,3 +344,78 @@ func (m *managerImpl) processShutdownEvent() error {
 	return nil
 }
 func (m *managerImpl) periodRequested() time.Duration {
 	var sum int64
 	for _, period := range m.shutdownGracePeriodByPodPriority {
 		sum += period.ShutdownGracePeriodSeconds
 	}
 	return time.Duration(sum) * time.Second
 }
 func migrateConfig(shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) []kubeletconfig.ShutdownGracePeriodByPodPriority {
 	if shutdownGracePeriodRequested == 0 {
 		return nil
 	}
 	defaultPriority := shutdownGracePeriodRequested - shutdownGracePeriodCriticalPods
 	if defaultPriority < 0 {
 		return nil
 	}
 	criticalPriority := shutdownGracePeriodRequested - defaultPriority
 	if criticalPriority < 0 {
 		return nil
 	}
 	return []kubeletconfig.ShutdownGracePeriodByPodPriority{
 		{
 			Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
 			ShutdownGracePeriodSeconds: int64(defaultPriority / time.Second),
 		},
 		{
 			Priority:                   scheduling.SystemCriticalPriority,
 			ShutdownGracePeriodSeconds: int64(criticalPriority / time.Second),
 		},
 	}
 }
 func groupByPriority(shutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority, pods []*v1.Pod) []podShutdownGroup {
 	groups := make([]podShutdownGroup, 0, len(shutdownGracePeriodByPodPriority))
 	for _, period := range shutdownGracePeriodByPodPriority {
 		groups = append(groups, podShutdownGroup{
 			ShutdownGracePeriodByPodPriority: period,
 		})
 	}
 	for _, pod := range pods {
 		var priority int32
 		if pod.Spec.Priority != nil {
 			priority = *pod.Spec.Priority
 		}
 		// Find the group index according to the priority.
 		index := sort.Search(len(groups), func(i int) bool {
 			return groups[i].Priority >= priority
 		})
 		// 1. Those higher than the highest priority default to the highest priority
 		// 2. Those lower than the lowest priority default to the lowest priority
 		// 3. Those boundary priority default to the lower priority
 		// if priority of pod is:
 		//   groups[index-1].Priority <= pod priority < groups[index].Priority
 		// in which case we want to pick lower one (i.e index-1)
 		if index == len(groups) {
 			index = len(groups) - 1
 		} else if index < 0 {
 			index = 0
 		} else if index > 0 && groups[index].Priority > priority {
 			index--
 		}
 		groups[index].Pods = append(groups[index].Pods, pod)
 	}
 	return groups
 }
 type podShutdownGroup struct {
 	kubeletconfig.ShutdownGracePeriodByPodPriority
 	Pods []*v1.Pod
 }
--- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go
+++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go
@ -35,6 +35,7 @@ import (
 	featuregatetesting "k8s.io/component-base/featuregate/testing"
 	"k8s.io/kubernetes/pkg/apis/scheduling"
 	pkgfeatures "k8s.io/kubernetes/pkg/features"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
 	probetest "k8s.io/kubernetes/pkg/kubelet/prober/testing"
 	testingclock "k8s.io/utils/clock/testing"
@ -81,12 +82,7 @@ func (f *fakeDbus) OverrideInhibitDelay(inhibitDelayMax time.Duration) error {
 	return nil
 }
-func makePod(name string, criticalPod bool, terminationGracePeriod *int64) *v1.Pod {
+func makePod(name string, priority int32, terminationGracePeriod *int64) *v1.Pod {
 	var priority int32
 	if criticalPod {
 		priority = scheduling.SystemCriticalPriority
 	}
 	return &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Name: name,
@ -104,15 +100,15 @@ func TestManager(t *testing.T) {
 	defer func() {
 		systemDbus = systemDbusTmp
 	}()
-	normalPodNoGracePeriod := makePod("normal-pod-nil-grace-period", false /* criticalPod */, nil /* terminationGracePeriod */)
+	normalPodNoGracePeriod := makePod("normal-pod-nil-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil /* terminationGracePeriod */)
-	criticalPodNoGracePeriod := makePod("critical-pod-nil-grace-period", true /* criticalPod */, nil /* terminationGracePeriod */)
+	criticalPodNoGracePeriod := makePod("critical-pod-nil-grace-period", scheduling.SystemCriticalPriority, nil /* terminationGracePeriod */)
 	shortGracePeriod := int64(2)
-	normalPodGracePeriod := makePod("normal-pod-grace-period", false /* criticalPod */, &shortGracePeriod /* terminationGracePeriod */)
+	normalPodGracePeriod := makePod("normal-pod-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, &shortGracePeriod /* terminationGracePeriod */)
-	criticalPodGracePeriod := makePod("critical-pod-grace-period", true /* criticalPod */, &shortGracePeriod /* terminationGracePeriod */)
+	criticalPodGracePeriod := makePod("critical-pod-grace-period", scheduling.SystemCriticalPriority, &shortGracePeriod /* terminationGracePeriod */)
 	longGracePeriod := int64(1000)
-	normalPodLongGracePeriod := makePod("normal-pod-long-grace-period", false /* criticalPod */, &longGracePeriod /* terminationGracePeriod */)
+	normalPodLongGracePeriod := makePod("normal-pod-long-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, &longGracePeriod /* terminationGracePeriod */)
 	var tests = []struct {
 		desc                             string
@ -256,7 +252,9 @@ func TestManager(t *testing.T) {
 			lock.Unlock()
 			if tc.expectedError != nil {
-				if !strings.Contains(err.Error(), tc.expectedError.Error()) {
+				if err == nil {
 					t.Errorf("unexpected error message. Got: <nil> want %s", tc.expectedError.Error())
 				} else if !strings.Contains(err.Error(), tc.expectedError.Error()) {
 					t.Errorf("unexpected error message. Got: %s want %s", err.Error(), tc.expectedError.Error())
 				}
 			} else {
@ -266,7 +264,11 @@ func TestManager(t *testing.T) {
 				assert.Equal(t, manager.Admit(nil).Admit, true)
 				// Send fake shutdown event
-				fakeShutdownChan <- true
+				select {
 				case fakeShutdownChan <- true:
 				case <-time.After(1 * time.Second):
 					t.Fatal()
 				}
 				// Wait for all the pods to be killed
 				killedPodsToGracePeriods := map[string]int64{}
@ -413,3 +415,196 @@ func TestRestart(t *testing.T) {
 		shutdownChanMut.Unlock()
 	}
 }
 func Test_migrateConfig(t *testing.T) {
 	type shutdownConfig struct {
 		shutdownGracePeriodRequested    time.Duration
 		shutdownGracePeriodCriticalPods time.Duration
 	}
 	tests := []struct {
 		name string
 		args shutdownConfig
 		want []kubeletconfig.ShutdownGracePeriodByPodPriority
 	}{
 		{
 			name: "both shutdownGracePeriodRequested and shutdownGracePeriodCriticalPods",
 			args: shutdownConfig{
 				shutdownGracePeriodRequested:    300 * time.Second,
 				shutdownGracePeriodCriticalPods: 120 * time.Second,
 			},
 			want: []kubeletconfig.ShutdownGracePeriodByPodPriority{
 				{
 					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
 					ShutdownGracePeriodSeconds: 180,
 				},
 				{
 					Priority:                   scheduling.SystemCriticalPriority,
 					ShutdownGracePeriodSeconds: 120,
 				},
 			},
 		},
 		{
 			name: "only shutdownGracePeriodRequested",
 			args: shutdownConfig{
 				shutdownGracePeriodRequested:    100 * time.Second,
 				shutdownGracePeriodCriticalPods: 0 * time.Second,
 			},
 			want: []kubeletconfig.ShutdownGracePeriodByPodPriority{
 				{
 					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
 					ShutdownGracePeriodSeconds: 100,
 				},
 				{
 					Priority:                   scheduling.SystemCriticalPriority,
 					ShutdownGracePeriodSeconds: 0,
 				},
 			},
 		},
 		{
 			name: "empty configuration",
 			args: shutdownConfig{
 				shutdownGracePeriodRequested:    0 * time.Second,
 				shutdownGracePeriodCriticalPods: 0 * time.Second,
 			},
 			want: nil,
 		},
 		{
 			name: "wrong configuration",
 			args: shutdownConfig{
 				shutdownGracePeriodRequested:    1 * time.Second,
 				shutdownGracePeriodCriticalPods: 100 * time.Second,
 			},
 			want: nil,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := migrateConfig(tt.args.shutdownGracePeriodRequested, tt.args.shutdownGracePeriodCriticalPods); !assert.Equal(t, tt.want, got) {
 				t.Errorf("migrateConfig() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
 func Test_groupByPriority(t *testing.T) {
 	type args struct {
 		shutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
 		pods                             []*v1.Pod
 	}
 	tests := []struct {
 		name string
 		args args
 		want []podShutdownGroup
 	}{
 		{
 			name: "migrate config",
 			args: args{
 				shutdownGracePeriodByPodPriority: migrateConfig(300*time.Second /* shutdownGracePeriodRequested */, 120*time.Second /* shutdownGracePeriodCriticalPods */),
 				pods: []*v1.Pod{
 					makePod("normal-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil),
 					makePod("highest-user-definable-pod", scheduling.HighestUserDefinablePriority, nil),
 					makePod("critical-pod", scheduling.SystemCriticalPriority, nil),
 				},
 			},
 			want: []podShutdownGroup{
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
 						ShutdownGracePeriodSeconds: 180,
 					},
 					Pods: []*v1.Pod{
 						makePod("normal-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil),
 						makePod("highest-user-definable-pod", scheduling.HighestUserDefinablePriority, nil),
 					},
 				},
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   scheduling.SystemCriticalPriority,
 						ShutdownGracePeriodSeconds: 120,
 					},
 					Pods: []*v1.Pod{
 						makePod("critical-pod", scheduling.SystemCriticalPriority, nil),
 					},
 				},
 			},
 		},
 		{
 			name: "pod priority",
 			args: args{
 				shutdownGracePeriodByPodPriority: []kubeletconfig.ShutdownGracePeriodByPodPriority{
 					{
 						Priority:                   1,
 						ShutdownGracePeriodSeconds: 10,
 					},
 					{
 						Priority:                   2,
 						ShutdownGracePeriodSeconds: 20,
 					},
 					{
 						Priority:                   3,
 						ShutdownGracePeriodSeconds: 30,
 					},
 					{
 						Priority:                   4,
 						ShutdownGracePeriodSeconds: 40,
 					},
 				},
 				pods: []*v1.Pod{
 					makePod("pod-0", 0, nil),
 					makePod("pod-1", 1, nil),
 					makePod("pod-2", 2, nil),
 					makePod("pod-3", 3, nil),
 					makePod("pod-4", 4, nil),
 					makePod("pod-5", 5, nil),
 				},
 			},
 			want: []podShutdownGroup{
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   1,
 						ShutdownGracePeriodSeconds: 10,
 					},
 					Pods: []*v1.Pod{
 						makePod("pod-0", 0, nil),
 						makePod("pod-1", 1, nil),
 					},
 				},
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   2,
 						ShutdownGracePeriodSeconds: 20,
 					},
 					Pods: []*v1.Pod{
 						makePod("pod-2", 2, nil),
 					},
 				},
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   3,
 						ShutdownGracePeriodSeconds: 30,
 					},
 					Pods: []*v1.Pod{
 						makePod("pod-3", 3, nil),
 					},
 				},
 				{
 					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
 						Priority:                   4,
 						ShutdownGracePeriodSeconds: 40,
 					},
 					Pods: []*v1.Pod{
 						makePod("pod-4", 4, nil),
 						makePod("pod-5", 5, nil),
 					},
 				},
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := groupByPriority(tt.args.shutdownGracePeriodByPodPriority, tt.args.pods); !assert.Equal(t, tt.want, got) {
 				t.Errorf("groupByPriority() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
--- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go
+++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go
@ -989,6 +989,35 @@ type KubeletConfiguration struct {
 	// +featureGate=GracefulNodeShutdown
 	// +optional
 	ShutdownGracePeriodCriticalPods metav1.Duration `json:"shutdownGracePeriodCriticalPods,omitempty"`
 	// shutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based
 	// on their associated priority class value.
 	// When a shutdown request is received, the Kubelet will initiate shutdown on all pods
 	// running on the node with a grace period that depends on the priority of the pod,
 	// and then wait for all pods to exit.
 	// Each entry in the array represents the graceful shutdown time a pod with a priority
 	// class value that lies in the range of that value and the next higher entry in the
 	// list when the node is shutting down.
 	// For example, to allow critical pods 10s to shutdown, priority>=10000 pods 20s to
 	// shutdown, and all remaining pods 30s to shutdown.
 	//
 	// shutdownGracePeriodByPodPriority:
 	//   - priority: 2000000000
 	//     shutdownGracePeriodSeconds: 10
 	//   - priority: 10000
 	//     shutdownGracePeriodSeconds: 20
 	//   - priority: 0
 	//     shutdownGracePeriodSeconds: 30
 	//
 	// The time the Kubelet will wait before exiting will at most be the maximum of all
 	// shutdownGracePeriodSeconds for each priority class range represented on the node.
 	// When all pods have exited or reached their grace periods, the Kubelet will release
 	// the shutdown inhibit lock.
 	// Requires the GracefulNodeShutdown feature gate to be enabled.
 	// This configuration must be empty if either ShutdownGracePeriod or ShutdownGracePeriodCriticalPods is set.
 	// Default: nil
 	// +featureGate=GracefulNodeShutdownBasedOnPodPriority
 	// +optional
 	ShutdownGracePeriodByPodPriority []ShutdownGracePeriodByPodPriority `json:"shutdownGracePeriodByPodPriority,omitempty"`
 	// reservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
 	// The parameter makes sense only in the context of the memory manager feature.
 	// The memory manager will not allocate reserved memory for container workloads.
@ -1136,6 +1165,14 @@ type MemoryReservation struct {
 	Limits   v1.ResourceList `json:"limits"`
 }
 // ShutdownGracePeriodByPodPriority specifies the shutdown grace period for Pods based on their associated priority class value
 type ShutdownGracePeriodByPodPriority struct {
 	// priority is the priority value associated with the shutdown grace period
 	Priority int32 `json:"priority"`
 	// shutdownGracePeriodSeconds is the shutdown grace period in seconds
 	ShutdownGracePeriodSeconds int64 `json:"shutdownGracePeriodSeconds"`
 }
 type MemorySwapConfiguration struct {
 	// swapBehavior configures swap memory available to container workloads. May be one of
 	// "", "LimitedSwap": workload combined memory and swap usage cannot exceed pod memory limit
--- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go
+++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go
@ -318,6 +318,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
 	}
 	out.ShutdownGracePeriod = in.ShutdownGracePeriod
 	out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods
 	if in.ShutdownGracePeriodByPodPriority != nil {
 		in, out := &in.ShutdownGracePeriodByPodPriority, &out.ShutdownGracePeriodByPodPriority
 		*out = make([]ShutdownGracePeriodByPodPriority, len(*in))
 		copy(*out, *in)
 	}
 	if in.ReservedMemory != nil {
 		in, out := &in.ReservedMemory, &out.ReservedMemory
 		*out = make([]MemoryReservation, len(*in))
@ -498,3 +503,19 @@ func (in *SerializedNodeConfigSource) DeepCopyObject() runtime.Object {
 	}
 	return nil
 }
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ShutdownGracePeriodByPodPriority) DeepCopyInto(out *ShutdownGracePeriodByPodPriority) {
 	*out = *in
 	return
 }
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ShutdownGracePeriodByPodPriority.
 func (in *ShutdownGracePeriodByPodPriority) DeepCopy() *ShutdownGracePeriodByPodPriority {
 	if in == nil {
 		return nil
 	}
 	out := new(ShutdownGracePeriodByPodPriority)
 	in.DeepCopyInto(out)
 	return out
 }
--- a/test/e2e_node/node_shutdown_linux_test.go
+++ b/test/e2e_node/node_shutdown_linux_test.go
@ -35,13 +35,15 @@ import (
 	"k8s.io/kubernetes/test/e2e/framework"
 	v1 "k8s.io/api/core/v1"
 	schedulingv1 "k8s.io/api/scheduling/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/kubernetes/pkg/features"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 	testutils "k8s.io/kubernetes/test/utils"
 )
-var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShutdown]", func() {
+var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShutdown] [NodeFeature:GracefulNodeShutdownBasedOnPodPriority]", func() {
 	f := framework.NewDefaultFramework("graceful-node-shutdown")
 	ginkgo.Context("when gracefully shutting down", func() {
@ -54,6 +56,10 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
 		)
 		tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
 			initialConfig.FeatureGates = map[string]bool{
 				string(features.GracefulNodeShutdown):                   true,
 				string(features.GracefulNodeShutdownBasedOnPodPriority): false,
 			}
 			initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
 			initialConfig.ShutdownGracePeriodCriticalPods = metav1.Duration{Duration: nodeShutdownGracePeriodCriticalPods}
 		})
@ -77,10 +83,10 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
 			// Define test pods
 			pods := []*v1.Pod{
-				getGracePeriodOverrideTestPod("period-120", nodeName, 120, false),
+				getGracePeriodOverrideTestPod("period-120", nodeName, 120, ""),
-				getGracePeriodOverrideTestPod("period-5", nodeName, 5, false),
+				getGracePeriodOverrideTestPod("period-5", nodeName, 5, ""),
-				getGracePeriodOverrideTestPod("period-critical-120", nodeName, 120, true),
+				getGracePeriodOverrideTestPod("period-critical-120", nodeName, 120, scheduling.SystemNodeCritical),
-				getGracePeriodOverrideTestPod("period-critical-5", nodeName, 5, true),
+				getGracePeriodOverrideTestPod("period-critical-5", nodeName, 5, scheduling.SystemNodeCritical),
 			}
 			ginkgo.By("Creating batch pods")
@ -117,12 +123,12 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
 				for _, pod := range list.Items {
 					if kubelettypes.IsCriticalPod(&pod) {
 						if isPodShutdown(&pod) {
-							framework.Logf("Expecting critcal pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
+							framework.Logf("Expecting critical pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
 							return fmt.Errorf("critical pod should not be shutdown, phase: %s", pod.Status.Phase)
 						}
 					} else {
 						if !isPodShutdown(&pod) {
-							framework.Logf("Expecting non-critcal pod to be shutdown, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
+							framework.Logf("Expecting non-critical pod to be shutdown, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status)
 							return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
 						}
 					}
@ -207,9 +213,185 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut
 			}, nodeStatusUpdateTimeout, pollInterval).Should(gomega.BeNil())
 		})
 	})
 	ginkgo.Context("when gracefully shutting down with Pod priority", func() {
 		const (
 			pollInterval           = 1 * time.Second
 			podStatusUpdateTimeout = 10 * time.Second
 		)
 		var (
 			customClassA = getPriorityClass("custom-class-a", 100000)
 			customClassB = getPriorityClass("custom-class-b", 10000)
 			customClassC = getPriorityClass("custom-class-c", 1000)
 		)
 		tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
 			initialConfig.FeatureGates = map[string]bool{
 				string(features.GracefulNodeShutdown):                   true,
 				string(features.GracefulNodeShutdownBasedOnPodPriority): true,
 			}
 			initialConfig.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{
 				{
 					Priority:                   scheduling.SystemCriticalPriority,
 					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
 				},
 				{
 					Priority:                   customClassA.Value,
 					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
 				},
 				{
 					Priority:                   customClassB.Value,
 					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
 				},
 				{
 					Priority:                   customClassC.Value,
 					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
 				},
 				{
 					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
 					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
 				},
 			}
 		})
 		ginkgo.BeforeEach(func() {
 			ginkgo.By("Wait for the node to be ready")
 			waitForNodeReady()
 			for _, customClass := range []*schedulingv1.PriorityClass{customClassA, customClassB, customClassC} {
 				_, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(context.Background(), customClass, metav1.CreateOptions{})
 				framework.ExpectNoError(err)
 			}
 		})
 		ginkgo.AfterEach(func() {
 			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
 			err := emitSignalPrepareForShutdown(false)
 			framework.ExpectNoError(err)
 		})
 		ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func() {
 			nodeName := getNodeName(f)
 			nodeSelector := fields.Set{
 				"spec.nodeName": nodeName,
 			}.AsSelector().String()
 			// Define test pods
 			pods := []*v1.Pod{
 				getGracePeriodOverrideTestPod("period-5", nodeName, 5, ""),
 				getGracePeriodOverrideTestPod("period-c-5", nodeName, 5, customClassC.Name),
 				getGracePeriodOverrideTestPod("period-b-5", nodeName, 5, customClassB.Name),
 				getGracePeriodOverrideTestPod("period-a-5", nodeName, 5, customClassA.Name),
 				getGracePeriodOverrideTestPod("period-critical-5", nodeName, 5, scheduling.SystemNodeCritical),
 			}
 			// Expected down steps
 			downSteps := [][]string{
 				{
 					"period-5",
 				},
 				{
 					"period-5",
 					"period-c-5",
 				},
 				{
 					"period-5",
 					"period-c-5",
 					"period-b-5",
 				},
 				{
 					"period-5",
 					"period-c-5",
 					"period-b-5",
 					"period-a-5",
 				},
 				{
 					"period-5",
 					"period-c-5",
 					"period-b-5",
 					"period-a-5",
 					"period-critical-5",
 				},
 			}
 			ginkgo.By("Creating batch pods")
 			f.PodClient().CreateBatch(pods)
 			list, err := f.PodClient().List(context.TODO(), metav1.ListOptions{
 				FieldSelector: nodeSelector,
 			})
 			framework.ExpectNoError(err)
 			framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
 			ginkgo.By("Verifying batch pods are running")
 			for _, pod := range list.Items {
 				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
 					framework.Failf("Failed to start batch pod: %v", pod.Name)
 				}
 			}
 			ginkgo.By("Emitting shutdown signal")
 			err = emitSignalPrepareForShutdown(true)
 			framework.ExpectNoError(err)
 			ginkgo.By("Verifying that pods are shutdown")
 			for _, step := range downSteps {
 				gomega.Eventually(func() error {
 					list, err = f.PodClient().List(context.TODO(), metav1.ListOptions{
 						FieldSelector: nodeSelector,
 					})
 					if err != nil {
 						return err
 					}
 					framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected")
 					for _, pod := range list.Items {
 						shouldShutdown := false
 						for _, podName := range step {
 							if podName == pod.Name {
 								shouldShutdown = true
 								break
 							}
 						}
 						if !shouldShutdown {
 							if pod.Status.Phase != v1.PodRunning {
 								framework.Logf("Expecting pod to be running, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q", pod.Name, pod.Status.Phase, pod.Status.Reason)
 								return fmt.Errorf("pod should not be shutdown, phase: %s, reason: %s", pod.Status.Phase, pod.Status.Reason)
 							}
 						} else {
 							if pod.Status.Reason != podShutdownReason {
 								framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q", pod.Name, pod.Status.Phase, pod.Status.Reason)
 								for _, item := range list.Items {
 									framework.Logf("DEBUG %s, %s, %s", item.Name, item.Status.Phase, pod.Status.Reason)
 								}
 								return fmt.Errorf("pod should be shutdown, reason: %s", pod.Status.Reason)
 							}
 						}
 					}
 					return nil
 				}, podStatusUpdateTimeout, pollInterval).Should(gomega.BeNil())
 			}
 		})
 	})
 })
-func getGracePeriodOverrideTestPod(name string, node string, gracePeriod int64, critical bool) *v1.Pod {
+func getPriorityClass(name string, value int32) *schedulingv1.PriorityClass {
 	priority := &schedulingv1.PriorityClass{
 		TypeMeta: metav1.TypeMeta{
 			Kind:       "PriorityClass",
 			APIVersion: "scheduling.k8s.io/v1",
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: name,
 		},
 		Value: value,
 	}
 	return priority
 }
 func getGracePeriodOverrideTestPod(name string, node string, gracePeriod int64, priorityClassName string) *v1.Pod {
 	pod := &v1.Pod{
 		TypeMeta: metav1.TypeMeta{
 			Kind:       "Pod",
@ -238,14 +420,14 @@ while true; do sleep 5; done
 			NodeName:                      node,
 		},
 	}
-	if critical {
+	if priorityClassName == scheduling.SystemNodeCritical {
 		pod.ObjectMeta.Annotations = map[string]string{
 			kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
 		}
-		pod.Spec.PriorityClassName = scheduling.SystemNodeCritical
+		pod.Spec.PriorityClassName = priorityClassName
 		framework.ExpectEqual(kubelettypes.IsCriticalPod(pod), true, "pod should be a critical pod")
 	} else {
 		pod.Spec.PriorityClassName = priorityClassName
 		framework.ExpectEqual(kubelettypes.IsCriticalPod(pod), false, "pod should not be a critical pod")
 	}
 	return pod