From 88e7e186f05eaa3ec7ee1f3ac7702d7e20619a86 Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Mon, 1 Oct 2018 11:32:56 -0700 Subject: [PATCH] Rename node status to node health in NodeLifecycleController Since we are going to treat both node status and node lease as node heartbeat/health signals, this PR makes the renmae changes, so that the follow-up PRs are easier to review. --- .../node_lifecycle_controller.go | 131 +++++++++--------- .../node_lifecycle_controller_test.go | 40 +++--- .../scheduler/rate_limited_queue.go | 6 +- 3 files changed, 90 insertions(+), 87 deletions(-) diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index 3ad98404cda..8656fd6b1ee 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -128,14 +128,14 @@ const ( ) const ( - // The amount of time the nodecontroller should sleep between retrying NodeStatus updates + // The amount of time the nodecontroller should sleep between retrying node health updates retrySleepTime = 20 * time.Millisecond ) -type nodeStatusData struct { +type nodeHealthData struct { probeTimestamp metav1.Time readyTransitionTimestamp metav1.Time - status v1.NodeStatus + status *v1.NodeStatus } // Controller is the controller that manages node's life cycle. @@ -155,8 +155,8 @@ type Controller struct { computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState) knownNodeSet map[string]*v1.Node - // per Node map storing last observed Status together with a local time when it was observed. - nodeStatusMap map[string]nodeStatusData + // per Node map storing last observed health together with a local time when it was observed. + nodeHealthMap map[string]*nodeHealthData // Lock to access evictor workers evictorLock sync.Mutex @@ -180,28 +180,31 @@ type Controller struct { recorder record.EventRecorder // Value controlling Controller monitoring period, i.e. how often does Controller - // check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod. - // TODO: Change node status monitor to watch based. + // check node health signal posted from kubelet. This value should be lower than + // nodeMonitorGracePeriod. + // TODO: Change node health monitor to watch based. nodeMonitorPeriod time.Duration - // Value used if sync_nodes_status=False, only for node startup. When node - // is just created, e.g. cluster bootstrap or node creation, we give a longer grace period. + // When node is just created, e.g. cluster bootstrap or node creation, we give + // a longer grace period. nodeStartupGracePeriod time.Duration - // Value used if sync_nodes_status=False. Controller will not proactively - // sync node status in this case, but will monitor node status updated from kubelet. If - // it doesn't receive update for this amount of time, it will start posting "NodeReady== - // ConditionUnknown". The amount of time before which Controller start evicting pods - // is controlled via flag 'pod-eviction-timeout'. - // Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency - // in kubelet. There are several constraints: - // 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where - // N means number of retries allowed for kubelet to post node status. It is pointless - // to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there - // will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency. - // The constant must be less than podEvictionTimeout. - // 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes - // longer for user to see up-to-date node status. + // Controller will not proactively sync node health, but will monitor node + // health signal updated from kubelet. If it doesn't receive update for this + // amount of time, it will start posting "NodeReady==ConditionUnknown". The + // amount of time before which Controller start evicting pods is controlled + // via flag 'pod-eviction-timeout'. + // Note: be cautious when changing the constant, it must work with + // nodeStatusUpdateFrequency in kubelet. There are several constraints: + // 1. nodeMonitorGracePeriod must be N times more than + // nodeStatusUpdateFrequency, where N means number of retries allowed for + // kubelet to post node health signal. It is pointless to make + // nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since + // there will only be fresh values from Kubelet at an interval of + // nodeStatusUpdateFrequency. The constant must be less than + // podEvictionTimeout. + // 2. nodeMonitorGracePeriod can't be too large for user experience - larger + // value takes longer for user to see up-to-date node health. nodeMonitorGracePeriod time.Duration podEvictionTimeout time.Duration @@ -259,7 +262,7 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer, kubeClient: kubeClient, now: metav1.Now, knownNodeSet: make(map[string]*v1.Node), - nodeStatusMap: make(map[string]nodeStatusData), + nodeHealthMap: make(map[string]*nodeHealthData), nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) { return nodeutil.ExistsInCloudProvider(cloud, nodeName) }, @@ -419,10 +422,10 @@ func (nc *Controller) Run(stopCh <-chan struct{}) { go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh) } - // Incorporate the results of node status pushed from kubelet to master. + // Incorporate the results of node health signal pushed from kubelet to master. go wait.Until(func() { - if err := nc.monitorNodeStatus(); err != nil { - glog.Errorf("Error monitoring node status: %v", err) + if err := nc.monitorNodeHealth(); err != nil { + glog.Errorf("Error monitoring node health: %v", err) } }, nc.nodeMonitorPeriod, stopCh) @@ -608,10 +611,10 @@ func (nc *Controller) doEvictionPass() { } } -// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, -// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or -// not reachable for a long period of time. -func (nc *Controller) monitorNodeStatus() error { +// monitorNodeHealth verifies node health are constantly updated by kubelet, and +// if not, post "NodeReady==ConditionUnknown". It also evicts all pods if node +// is not ready or not reachable for a long period of time. +func (nc *Controller) monitorNodeHealth() error { // We are listing nodes from local cache as we can tolerate some small delays // comparing to state from etcd and there is eventual consistency anyway. nodes, err := nc.nodeLister.List(labels.Everything()) @@ -648,20 +651,20 @@ func (nc *Controller) monitorNodeStatus() error { var observedReadyCondition v1.NodeCondition var currentReadyCondition *v1.NodeCondition node := nodes[i].DeepCopy() - if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeStatusUpdateRetry, func() (bool, error) { - gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) + if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) { + gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node) if err == nil { return true, nil } name := node.Name node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{}) if err != nil { - glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) + glog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name) return false, err } return false, nil }); err != nil { - glog.Errorf("Update status of Node '%v' from Controller error: %v. "+ + glog.Errorf("Update health of Node '%v' from Controller error: %v. "+ "Skipping - no pods will be evicted.", node.Name, err) continue } @@ -689,12 +692,12 @@ func (nc *Controller) monitorNodeStatus() error { ) } } else { - if decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { + if decisionTimestamp.After(nc.nodeHealthMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v", node.Name, decisionTimestamp, - nc.nodeStatusMap[node.Name].readyTransitionTimestamp, + nc.nodeHealthMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout, ) } @@ -716,12 +719,12 @@ func (nc *Controller) monitorNodeStatus() error { ) } } else { - if decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { + if decisionTimestamp.After(nc.nodeHealthMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v", node.Name, decisionTimestamp, - nc.nodeStatusMap[node.Name].readyTransitionTimestamp, + nc.nodeHealthMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod, ) } @@ -799,9 +802,9 @@ func (nc *Controller) monitorNodeStatus() error { return nil } -// tryUpdateNodeStatus checks a given node's conditions and tries to update it. Returns grace period to +// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to // which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred. -func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) { +func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) { var err error var gracePeriod time.Duration var observedReadyCondition v1.NodeCondition @@ -817,8 +820,8 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node LastTransitionTime: node.CreationTimestamp, } gracePeriod = nc.nodeStartupGracePeriod - nc.nodeStatusMap[node.Name] = nodeStatusData{ - status: node.Status, + nc.nodeHealthMap[node.Name] = &nodeHealthData{ + status: &node.Status, probeTimestamp: node.CreationTimestamp, readyTransitionTimestamp: node.CreationTimestamp, } @@ -828,7 +831,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node gracePeriod = nc.nodeMonitorGracePeriod } - savedNodeStatus, found := nc.nodeStatusMap[node.Name] + savedNodeHealth, found := nc.nodeHealthMap[node.Name] // There are following cases to check: // - both saved and new status have no Ready Condition set - we leave everything as it is, // - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd, @@ -845,28 +848,28 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node // if that's the case, but it does not seem necessary. var savedCondition *v1.NodeCondition if found { - _, savedCondition = v1node.GetNodeCondition(&savedNodeStatus.status, v1.NodeReady) + _, savedCondition = v1node.GetNodeCondition(savedNodeHealth.status, v1.NodeReady) } _, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady) if !found { glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) - savedNodeStatus = nodeStatusData{ - status: node.Status, + savedNodeHealth = &nodeHealthData{ + status: &node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } } else if savedCondition == nil && observedCondition != nil { glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name) - savedNodeStatus = nodeStatusData{ - status: node.Status, + savedNodeHealth = &nodeHealthData{ + status: &node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } } else if savedCondition != nil && observedCondition == nil { glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name) // TODO: figure out what to do in this case. For now we do the same thing as above. - savedNodeStatus = nodeStatusData{ - status: node.Status, + savedNodeHealth = &nodeHealthData{ + status: &node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } @@ -878,22 +881,22 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition) transitionTime = nc.now() } else { - transitionTime = savedNodeStatus.readyTransitionTimestamp + transitionTime = savedNodeHealth.readyTransitionTimestamp } if glog.V(5) { - glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status) + glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status) } else { glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name) } - savedNodeStatus = nodeStatusData{ - status: node.Status, + savedNodeHealth = &nodeHealthData{ + status: &node.Status, probeTimestamp: nc.now(), readyTransitionTimestamp: transitionTime, } } - nc.nodeStatusMap[node.Name] = savedNodeStatus + nc.nodeHealthMap[node.Name] = savedNodeHealth - if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) { + if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) { // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown // (regardless of its current value) in the master. if currentReadyCondition == nil { @@ -908,7 +911,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node }) } else { glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v", - node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition) + node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition) if observedReadyCondition.Status != v1.ConditionUnknown { currentReadyCondition.Status = v1.ConditionUnknown currentReadyCondition.Reason = "NodeStatusUnknown" @@ -944,7 +947,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node }) } else { glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v", - node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition) + node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition) if currentCondition.Status != v1.ConditionUnknown { currentCondition.Status = v1.ConditionUnknown currentCondition.Reason = "NodeStatusUnknown" @@ -960,9 +963,9 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node glog.Errorf("Error updating node %s: %v", node.Name, err) return gracePeriod, observedReadyCondition, currentReadyCondition, err } - nc.nodeStatusMap[node.Name] = nodeStatusData{ - status: node.Status, - probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp, + nc.nodeHealthMap[node.Name] = &nodeHealthData{ + status: &node.Status, + probeTimestamp: nc.nodeHealthMap[node.Name].probeTimestamp, readyTransitionTimestamp: nc.now(), } return gracePeriod, observedReadyCondition, currentReadyCondition, nil @@ -1044,10 +1047,10 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod // When exiting disruption mode update probe timestamps on all Nodes. now := nc.now() for i := range nodes { - v := nc.nodeStatusMap[nodes[i].Name] + v := nc.nodeHealthMap[nodes[i].Name] v.probeTimestamp = now v.readyTransitionTimestamp = now - nc.nodeStatusMap[nodes[i].Name] = v + nc.nodeHealthMap[nodes[i].Name] = v } // We reset all rate limiters to settings appropriate for the given state. for k := range nc.zoneStates { diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go index 23db5623a02..89870111448 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go @@ -146,7 +146,7 @@ func newNodeLifecycleControllerFromClient( return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil } -func TestMonitorNodeStatusEvictPods(t *testing.T) { +func TestMonitorNodeHealthEvictPods(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) evictionTimeout := 10 * time.Minute labels := map[string]string{ @@ -628,7 +628,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } if item.timeToPass > 0 { @@ -643,7 +643,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } zones := testutil.GetZones(item.fakeNodeHandler) @@ -787,7 +787,7 @@ func TestPodStatusChange(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } if item.timeToPass > 0 { @@ -798,7 +798,7 @@ func TestPodStatusChange(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } zones := testutil.GetZones(item.fakeNodeHandler) @@ -827,7 +827,7 @@ func TestPodStatusChange(t *testing.T) { } } -func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) { +func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) evictionTimeout := 10 * time.Minute timeToPass := 60 * time.Minute @@ -1318,7 +1318,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("%v: unexpected error: %v", item.description, err) } @@ -1336,7 +1336,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("%v: unexpected error: %v", item.description, err) } for zone, state := range item.expectedFollowingStates { @@ -1449,7 +1449,7 @@ func TestCloudProviderNodeShutdown(t *testing.T) { if err := nodeController.syncNodeStore(fnh); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } @@ -1520,11 +1520,11 @@ func TestCloudProviderNoRateLimit(t *testing.T) { nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) { return false, nil } - // monitorNodeStatus should allow this node to be immediately deleted + // monitorNodeHealth should allow this node to be immediately deleted if err := nodeController.syncNodeStore(fnh); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } select { @@ -1540,7 +1540,7 @@ func TestCloudProviderNoRateLimit(t *testing.T) { } } -func TestMonitorNodeStatusUpdateStatus(t *testing.T) { +func TestMonitorNodeHealthUpdateStatus(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) table := []struct { fakeNodeHandler *testutil.FakeNodeHandler @@ -1804,7 +1804,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } if item.timeToPass > 0 { @@ -1813,7 +1813,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } } @@ -1829,7 +1829,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { } } -func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { +func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) table := []struct { fakeNodeHandler *testutil.FakeNodeHandler @@ -1951,7 +1951,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("Case[%d] unexpected error: %v", i, err) } if item.timeToPass > 0 { @@ -1960,7 +1960,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("Case[%d] unexpected error: %v", i, err) } } @@ -2071,7 +2071,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } nodeController.doNoExecuteTaintingPass() @@ -2109,7 +2109,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } nodeController.doNoExecuteTaintingPass() @@ -2421,7 +2421,7 @@ func TestNodeEventGeneration(t *testing.T) { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) } - if err := nodeController.monitorNodeStatus(); err != nil { + if err := nodeController.monitorNodeHealth(); err != nil { t.Errorf("unexpected error: %v", err) } if len(fakeRecorder.Events) != 2 { diff --git a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go index 2a0a92f6010..5d562fb71fc 100644 --- a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go +++ b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go @@ -28,9 +28,9 @@ import ( ) const ( - // NodeStatusUpdateRetry controls the number of retries of writing - // NodeStatus update. - NodeStatusUpdateRetry = 5 + // NodeHealthUpdateRetry controls the number of retries of writing + // node health update. + NodeHealthUpdateRetry = 5 // NodeEvictionPeriod controls how often NodeController will try to // evict Pods from non-responsive Nodes. NodeEvictionPeriod = 100 * time.Millisecond