From f8bf6b97b8f9842c6b8964bae2d47fbf31d3e08c Mon Sep 17 00:00:00 2001 From: devppratik Date: Tue, 23 Jul 2024 15:56:30 +0530 Subject: [PATCH] Update Node Monitor Grace Period default duration to 50s Update description Improve flag comment Update Test case value to be 50s by default Update Description Run make update Minor description fix --- .../app/options/nodelifecyclecontroller.go | 3 ++- pkg/controller/nodelifecycle/config/types.go | 3 ++- pkg/controller/nodelifecycle/config/v1alpha1/defaults.go | 7 ++++++- pkg/controller/nodelifecycle/node_lifecycle_controller.go | 6 +++++- .../nodelifecycle/node_lifecycle_controller_test.go | 2 +- pkg/generated/openapi/zz_generated.openapi.go | 2 +- .../kube-controller-manager/config/v1alpha1/types.go | 3 ++- 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go b/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go index fe5181f31b9..32499423e99 100644 --- a/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go +++ b/cmd/kube-controller-manager/app/options/nodelifecyclecontroller.go @@ -41,7 +41,8 @@ func (o *NodeLifecycleControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&o.NodeMonitorGracePeriod.Duration, "node-monitor-grace-period", o.NodeMonitorGracePeriod.Duration, "Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+ "Must be N times more than kubelet's nodeStatusUpdateFrequency, "+ - "where N means number of retries allowed for kubelet to post node status.") + "where N means number of retries allowed for kubelet to post node status. "+ + "This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS") fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.") fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.") fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, fmt.Sprintf("Number of nodes from which %s treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller. Notice: If nodes reside in multiple zones, this threshold will be considered as zone node size threshold for each zone to determine node eviction rate independently.", names.NodeLifecycleController)) diff --git a/pkg/controller/nodelifecycle/config/types.go b/pkg/controller/nodelifecycle/config/types.go index 176b0b9b637..6daa47179cb 100644 --- a/pkg/controller/nodelifecycle/config/types.go +++ b/pkg/controller/nodelifecycle/config/types.go @@ -32,7 +32,8 @@ type NodeLifecycleControllerConfiguration struct { // NodeMonitorGracePeriod is the amount of time which we allow a running node to be // unresponsive before marking it unhealthy. Must be N times more than kubelet's // nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet - // to post node status. + // to post node status. This value should also be greater than the sum of + // HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS. NodeMonitorGracePeriod metav1.Duration // secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold LargeClusterSizeThreshold int32 diff --git a/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go b/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go index bf80ce31e07..7652f9f13d4 100644 --- a/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go +++ b/pkg/controller/nodelifecycle/config/v1alpha1/defaults.go @@ -37,8 +37,13 @@ func RecommendedDefaultNodeLifecycleControllerConfiguration(obj *kubectrlmgrconf if obj.PodEvictionTimeout == zero { obj.PodEvictionTimeout = metav1.Duration{Duration: 5 * time.Minute} } + // NodeMonitorGracePeriod is set to a default value of 50 seconds. + // This value should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s) + // and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check + // to ensure that the server has adequate time to handle slow or idle connections + // properly before marking a node as unhealthy. if obj.NodeMonitorGracePeriod == zero { - obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 40 * time.Second} + obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 50 * time.Second} } if obj.NodeStartupGracePeriod == zero { obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second} diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index b642e1bd421..5153aad1a68 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -284,7 +284,11 @@ type Controller struct { // be less than the node health signal update frequency, since there will // only be fresh values from Kubelet at an interval of node health signal // update frequency. - // 2. nodeMonitorGracePeriod can't be too large for user experience - larger + // 2. nodeMonitorGracePeriod should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s) + // and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check + // to ensure that the server has adequate time to handle slow or idle connections + // properly before marking a node as unhealthy. + // 3. nodeMonitorGracePeriod can't be too large for user experience - larger // value takes longer for user to see up-to-date node health. nodeMonitorGracePeriod time.Duration diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go index 2d348d29dc2..c2416136a4b 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go @@ -52,7 +52,7 @@ import ( ) const ( - testNodeMonitorGracePeriod = 40 * time.Second + testNodeMonitorGracePeriod = 50 * time.Second testNodeStartupGracePeriod = 60 * time.Second testNodeMonitorPeriod = 5 * time.Second testRateLimiterQPS = float32(100000) diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index ddd59ddb8a3..eaddee1a5a9 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -58828,7 +58828,7 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_NodeLifecycleControlle }, "NodeMonitorGracePeriod": { SchemaProps: spec.SchemaProps{ - Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status.", + Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status. This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go index fe2e46b45ae..bb2ad0ecd48 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go @@ -403,7 +403,8 @@ type NodeLifecycleControllerConfiguration struct { // nodeMontiorGracePeriod is the amount of time which we allow a running node to be // unresponsive before marking it unhealthy. Must be N times more than kubelet's // nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet - // to post node status. + // to post node status. This value should also be greater than the sum of + // HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS. NodeMonitorGracePeriod metav1.Duration // podEvictionTimeout is the grace period for deleting pods on failed nodes. PodEvictionTimeout metav1.Duration