Update Node Monitor Grace Period default duration to 50s

Update description Improve flag comment Update Test case value to be 50s by default Update Description Run make update Minor description fix
2025-07-28 05:57:25 +00:00 · 2024-07-23 15:56:30 +05:30 · 2024-07-23 15:56:30 +05:30 · f8bf6b97b8
commit f8bf6b97b8
parent 5420b2fe9a
7 changed files with 19 additions and 7 deletions
--- a/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go
+++ b/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go
@ -41,7 +41,8 @@ func (o *NodeLifecycleControllerOptions) AddFlags(fs *pflag.FlagSet) {
 	fs.DurationVar(&o.NodeMonitorGracePeriod.Duration, "node-monitor-grace-period", o.NodeMonitorGracePeriod.Duration,
 		"Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+
 			"Must be N times more than kubelet's nodeStatusUpdateFrequency, "+
-			"where N means number of retries allowed for kubelet to post node status.")
+			"where N means number of retries allowed for kubelet to post node status. "+
+			"This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS")
 	fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.")
 	fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
 	fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, fmt.Sprintf("Number of nodes from which %s treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller. Notice: If nodes reside in multiple zones, this threshold will be considered as zone node size threshold for each zone to determine node eviction rate independently.", names.NodeLifecycleController))
--- a/pkg/controller/nodelifecycle/config/types.go
+++ b/pkg/controller/nodelifecycle/config/types.go
@ -32,7 +32,8 @@ type NodeLifecycleControllerConfiguration struct {
 	// NodeMonitorGracePeriod is the amount of time which we allow a running node to be
 	// unresponsive before marking it unhealthy. Must be N times more than kubelet's
 	// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
-	// to post node status.
+	// to post node status. This value should also be greater than the sum of
+	// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
 	NodeMonitorGracePeriod metav1.Duration
 	// secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold
 	LargeClusterSizeThreshold int32
--- a/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go
+++ b/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go
@ -37,8 +37,13 @@ func RecommendedDefaultNodeLifecycleControllerConfiguration(obj *kubectrlmgrconf
 	if obj.PodEvictionTimeout == zero {
 		obj.PodEvictionTimeout = metav1.Duration{Duration: 5 * time.Minute}
 	}
+	// NodeMonitorGracePeriod is set to a default value of 50 seconds.
+	// This value should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
+	// and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
+	// to ensure that the server has adequate time to handle slow or idle connections
+	// properly before marking a node as unhealthy.
 	if obj.NodeMonitorGracePeriod == zero {
-		obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 40 * time.Second}
+		obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 50 * time.Second}
 	}
 	if obj.NodeStartupGracePeriod == zero {
 		obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second}
--- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go
+++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go
@ -284,7 +284,11 @@ type Controller struct {
 	//    be less than the node health signal update frequency, since there will
 	//    only be fresh values from Kubelet at an interval of node health signal
 	//    update frequency.
-	// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
+	// 2. nodeMonitorGracePeriod should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
+	// 	  and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
+	// 	  to ensure that the server has adequate time to handle slow or idle connections
+	//    properly before marking a node as unhealthy.
+	// 3. nodeMonitorGracePeriod can't be too large for user experience - larger
 	//    value takes longer for user to see up-to-date node health.
 	nodeMonitorGracePeriod time.Duration

--- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go
+++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go
@ -52,7 +52,7 @@ import (
 )

 const (
-	testNodeMonitorGracePeriod = 40 * time.Second
+	testNodeMonitorGracePeriod = 50 * time.Second
 	testNodeStartupGracePeriod = 60 * time.Second
 	testNodeMonitorPeriod      = 5 * time.Second
 	testRateLimiterQPS         = float32(100000)
--- a/pkg/generated/openapi/zz_generated.openapi.go
+++ b/pkg/generated/openapi/zz_generated.openapi.go
@ -58828,7 +58828,7 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_NodeLifecycleControlle
 					},
 					"NodeMonitorGracePeriod": {
 						SchemaProps: spec.SchemaProps{
-							Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status.",
+							Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status. This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.",
 							Ref:         ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"),
 						},
 					},
--- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go
+++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go
@ -403,7 +403,8 @@ type NodeLifecycleControllerConfiguration struct {
 	// nodeMontiorGracePeriod is the amount of time which we allow a running node to be
 	// unresponsive before marking it unhealthy. Must be N times more than kubelet's
 	// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
-	// to post node status.
+	// to post node status. This value should also be greater than the sum of
+	// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
 	NodeMonitorGracePeriod metav1.Duration
 	// podEvictionTimeout is the grace period for deleting pods on failed nodes.
 	PodEvictionTimeout metav1.Duration