Rename node status to node health in NodeLifecycleController

Since we are going to treat both node status and node lease as node
heartbeat/health signals, this PR makes the renmae changes, so that the
follow-up PRs are easier to review.
This commit is contained in:
Zhen Wang 2018-10-01 11:32:56 -07:00
parent e567c791aa
commit 88e7e186f0
3 changed files with 90 additions and 87 deletions

View File

@ -128,14 +128,14 @@ const (
) )
const ( const (
// The amount of time the nodecontroller should sleep between retrying NodeStatus updates // The amount of time the nodecontroller should sleep between retrying node health updates
retrySleepTime = 20 * time.Millisecond retrySleepTime = 20 * time.Millisecond
) )
type nodeStatusData struct { type nodeHealthData struct {
probeTimestamp metav1.Time probeTimestamp metav1.Time
readyTransitionTimestamp metav1.Time readyTransitionTimestamp metav1.Time
status v1.NodeStatus status *v1.NodeStatus
} }
// Controller is the controller that manages node's life cycle. // Controller is the controller that manages node's life cycle.
@ -155,8 +155,8 @@ type Controller struct {
computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState) computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState)
knownNodeSet map[string]*v1.Node knownNodeSet map[string]*v1.Node
// per Node map storing last observed Status together with a local time when it was observed. // per Node map storing last observed health together with a local time when it was observed.
nodeStatusMap map[string]nodeStatusData nodeHealthMap map[string]*nodeHealthData
// Lock to access evictor workers // Lock to access evictor workers
evictorLock sync.Mutex evictorLock sync.Mutex
@ -180,28 +180,31 @@ type Controller struct {
recorder record.EventRecorder recorder record.EventRecorder
// Value controlling Controller monitoring period, i.e. how often does Controller // Value controlling Controller monitoring period, i.e. how often does Controller
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod. // check node health signal posted from kubelet. This value should be lower than
// TODO: Change node status monitor to watch based. // nodeMonitorGracePeriod.
// TODO: Change node health monitor to watch based.
nodeMonitorPeriod time.Duration nodeMonitorPeriod time.Duration
// Value used if sync_nodes_status=False, only for node startup. When node // When node is just created, e.g. cluster bootstrap or node creation, we give
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period. // a longer grace period.
nodeStartupGracePeriod time.Duration nodeStartupGracePeriod time.Duration
// Value used if sync_nodes_status=False. Controller will not proactively // Controller will not proactively sync node health, but will monitor node
// sync node status in this case, but will monitor node status updated from kubelet. If // health signal updated from kubelet. If it doesn't receive update for this
// it doesn't receive update for this amount of time, it will start posting "NodeReady== // amount of time, it will start posting "NodeReady==ConditionUnknown". The
// ConditionUnknown". The amount of time before which Controller start evicting pods // amount of time before which Controller start evicting pods is controlled
// is controlled via flag 'pod-eviction-timeout'. // via flag 'pod-eviction-timeout'.
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency // Note: be cautious when changing the constant, it must work with
// in kubelet. There are several constraints: // nodeStatusUpdateFrequency in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where // 1. nodeMonitorGracePeriod must be N times more than
// N means number of retries allowed for kubelet to post node status. It is pointless // nodeStatusUpdateFrequency, where N means number of retries allowed for
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there // kubelet to post node health signal. It is pointless to make
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency. // nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since
// The constant must be less than podEvictionTimeout. // there will only be fresh values from Kubelet at an interval of
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes // nodeStatusUpdateFrequency. The constant must be less than
// longer for user to see up-to-date node status. // podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
// value takes longer for user to see up-to-date node health.
nodeMonitorGracePeriod time.Duration nodeMonitorGracePeriod time.Duration
podEvictionTimeout time.Duration podEvictionTimeout time.Duration
@ -259,7 +262,7 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
kubeClient: kubeClient, kubeClient: kubeClient,
now: metav1.Now, now: metav1.Now,
knownNodeSet: make(map[string]*v1.Node), knownNodeSet: make(map[string]*v1.Node),
nodeStatusMap: make(map[string]nodeStatusData), nodeHealthMap: make(map[string]*nodeHealthData),
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) { nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ExistsInCloudProvider(cloud, nodeName) return nodeutil.ExistsInCloudProvider(cloud, nodeName)
}, },
@ -419,10 +422,10 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh) go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
} }
// Incorporate the results of node status pushed from kubelet to master. // Incorporate the results of node health signal pushed from kubelet to master.
go wait.Until(func() { go wait.Until(func() {
if err := nc.monitorNodeStatus(); err != nil { if err := nc.monitorNodeHealth(); err != nil {
glog.Errorf("Error monitoring node status: %v", err) glog.Errorf("Error monitoring node health: %v", err)
} }
}, nc.nodeMonitorPeriod, stopCh) }, nc.nodeMonitorPeriod, stopCh)
@ -608,10 +611,10 @@ func (nc *Controller) doEvictionPass() {
} }
} }
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // monitorNodeHealth verifies node health are constantly updated by kubelet, and
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // if not, post "NodeReady==ConditionUnknown". It also evicts all pods if node
// not reachable for a long period of time. // is not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeStatus() error { func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays // We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway. // comparing to state from etcd and there is eventual consistency anyway.
nodes, err := nc.nodeLister.List(labels.Everything()) nodes, err := nc.nodeLister.List(labels.Everything())
@ -648,20 +651,20 @@ func (nc *Controller) monitorNodeStatus() error {
var observedReadyCondition v1.NodeCondition var observedReadyCondition v1.NodeCondition
var currentReadyCondition *v1.NodeCondition var currentReadyCondition *v1.NodeCondition
node := nodes[i].DeepCopy() node := nodes[i].DeepCopy()
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeStatusUpdateRetry, func() (bool, error) { if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node)
if err == nil { if err == nil {
return true, nil return true, nil
} }
name := node.Name name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{}) node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{})
if err != nil { if err != nil {
glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) glog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
return false, err return false, err
} }
return false, nil return false, nil
}); err != nil { }); err != nil {
glog.Errorf("Update status of Node '%v' from Controller error: %v. "+ glog.Errorf("Update health of Node '%v' from Controller error: %v. "+
"Skipping - no pods will be evicted.", node.Name, err) "Skipping - no pods will be evicted.", node.Name, err)
continue continue
} }
@ -689,12 +692,12 @@ func (nc *Controller) monitorNodeStatus() error {
) )
} }
} else { } else {
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if decisionTimestamp.After(nc.nodeHealthMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) { if nc.evictPods(node) {
glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v", glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
node.Name, node.Name,
decisionTimestamp, decisionTimestamp,
nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
nc.podEvictionTimeout, nc.podEvictionTimeout,
) )
} }
@ -716,12 +719,12 @@ func (nc *Controller) monitorNodeStatus() error {
) )
} }
} else { } else {
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if decisionTimestamp.After(nc.nodeHealthMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) { if nc.evictPods(node) {
glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v", glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
node.Name, node.Name,
decisionTimestamp, decisionTimestamp,
nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
nc.podEvictionTimeout-gracePeriod, nc.podEvictionTimeout-gracePeriod,
) )
} }
@ -799,9 +802,9 @@ func (nc *Controller) monitorNodeStatus() error {
return nil return nil
} }
// tryUpdateNodeStatus checks a given node's conditions and tries to update it. Returns grace period to // tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred. // which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) { func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
var err error var err error
var gracePeriod time.Duration var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition var observedReadyCondition v1.NodeCondition
@ -817,8 +820,8 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
LastTransitionTime: node.CreationTimestamp, LastTransitionTime: node.CreationTimestamp,
} }
gracePeriod = nc.nodeStartupGracePeriod gracePeriod = nc.nodeStartupGracePeriod
nc.nodeStatusMap[node.Name] = nodeStatusData{ nc.nodeHealthMap[node.Name] = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: node.CreationTimestamp, probeTimestamp: node.CreationTimestamp,
readyTransitionTimestamp: node.CreationTimestamp, readyTransitionTimestamp: node.CreationTimestamp,
} }
@ -828,7 +831,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
gracePeriod = nc.nodeMonitorGracePeriod gracePeriod = nc.nodeMonitorGracePeriod
} }
savedNodeStatus, found := nc.nodeStatusMap[node.Name] savedNodeHealth, found := nc.nodeHealthMap[node.Name]
// There are following cases to check: // There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is, // - both saved and new status have no Ready Condition set - we leave everything as it is,
// - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd, // - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd,
@ -845,28 +848,28 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
// if that's the case, but it does not seem necessary. // if that's the case, but it does not seem necessary.
var savedCondition *v1.NodeCondition var savedCondition *v1.NodeCondition
if found { if found {
_, savedCondition = v1node.GetNodeCondition(&savedNodeStatus.status, v1.NodeReady) _, savedCondition = v1node.GetNodeCondition(savedNodeHealth.status, v1.NodeReady)
} }
_, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady) _, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady)
if !found { if !found {
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeStatus = nodeStatusData{ savedNodeHealth = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
} else if savedCondition == nil && observedCondition != nil { } else if savedCondition == nil && observedCondition != nil {
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name) glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
savedNodeStatus = nodeStatusData{ savedNodeHealth = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
} else if savedCondition != nil && observedCondition == nil { } else if savedCondition != nil && observedCondition == nil {
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name) glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
// TODO: figure out what to do in this case. For now we do the same thing as above. // TODO: figure out what to do in this case. For now we do the same thing as above.
savedNodeStatus = nodeStatusData{ savedNodeHealth = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
@ -878,22 +881,22 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition) glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
transitionTime = nc.now() transitionTime = nc.now()
} else { } else {
transitionTime = savedNodeStatus.readyTransitionTimestamp transitionTime = savedNodeHealth.readyTransitionTimestamp
} }
if glog.V(5) { if glog.V(5) {
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status) glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status)
} else { } else {
glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name) glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
} }
savedNodeStatus = nodeStatusData{ savedNodeHealth = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: transitionTime, readyTransitionTimestamp: transitionTime,
} }
} }
nc.nodeStatusMap[node.Name] = savedNodeStatus nc.nodeHealthMap[node.Name] = savedNodeHealth
if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) { if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) {
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master. // (regardless of its current value) in the master.
if currentReadyCondition == nil { if currentReadyCondition == nil {
@ -908,7 +911,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
}) })
} else { } else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v", glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition) node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition)
if observedReadyCondition.Status != v1.ConditionUnknown { if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown" currentReadyCondition.Reason = "NodeStatusUnknown"
@ -944,7 +947,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
}) })
} else { } else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v", glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition) node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
if currentCondition.Status != v1.ConditionUnknown { if currentCondition.Status != v1.ConditionUnknown {
currentCondition.Status = v1.ConditionUnknown currentCondition.Status = v1.ConditionUnknown
currentCondition.Reason = "NodeStatusUnknown" currentCondition.Reason = "NodeStatusUnknown"
@ -960,9 +963,9 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
glog.Errorf("Error updating node %s: %v", node.Name, err) glog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err return gracePeriod, observedReadyCondition, currentReadyCondition, err
} }
nc.nodeStatusMap[node.Name] = nodeStatusData{ nc.nodeHealthMap[node.Name] = &nodeHealthData{
status: node.Status, status: &node.Status,
probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp, probeTimestamp: nc.nodeHealthMap[node.Name].probeTimestamp,
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
return gracePeriod, observedReadyCondition, currentReadyCondition, nil return gracePeriod, observedReadyCondition, currentReadyCondition, nil
@ -1044,10 +1047,10 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
// When exiting disruption mode update probe timestamps on all Nodes. // When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now() now := nc.now()
for i := range nodes { for i := range nodes {
v := nc.nodeStatusMap[nodes[i].Name] v := nc.nodeHealthMap[nodes[i].Name]
v.probeTimestamp = now v.probeTimestamp = now
v.readyTransitionTimestamp = now v.readyTransitionTimestamp = now
nc.nodeStatusMap[nodes[i].Name] = v nc.nodeHealthMap[nodes[i].Name] = v
} }
// We reset all rate limiters to settings appropriate for the given state. // We reset all rate limiters to settings appropriate for the given state.
for k := range nc.zoneStates { for k := range nc.zoneStates {

View File

@ -146,7 +146,7 @@ func newNodeLifecycleControllerFromClient(
return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil
} }
func TestMonitorNodeStatusEvictPods(t *testing.T) { func TestMonitorNodeHealthEvictPods(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute evictionTimeout := 10 * time.Minute
labels := map[string]string{ labels := map[string]string{
@ -628,7 +628,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if item.timeToPass > 0 { if item.timeToPass > 0 {
@ -643,7 +643,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
@ -787,7 +787,7 @@ func TestPodStatusChange(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if item.timeToPass > 0 { if item.timeToPass > 0 {
@ -798,7 +798,7 @@ func TestPodStatusChange(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
@ -827,7 +827,7 @@ func TestPodStatusChange(t *testing.T) {
} }
} }
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) { func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute evictionTimeout := 10 * time.Minute
timeToPass := 60 * time.Minute timeToPass := 60 * time.Minute
@ -1318,7 +1318,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err) t.Errorf("%v: unexpected error: %v", item.description, err)
} }
@ -1336,7 +1336,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err) t.Errorf("%v: unexpected error: %v", item.description, err)
} }
for zone, state := range item.expectedFollowingStates { for zone, state := range item.expectedFollowingStates {
@ -1449,7 +1449,7 @@ func TestCloudProviderNodeShutdown(t *testing.T) {
if err := nodeController.syncNodeStore(fnh); err != nil { if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
@ -1520,11 +1520,11 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) { nodeController.nodeShutdownInCloudProvider = func(ctx context.Context, node *v1.Node) (bool, error) {
return false, nil return false, nil
} }
// monitorNodeStatus should allow this node to be immediately deleted // monitorNodeHealth should allow this node to be immediately deleted
if err := nodeController.syncNodeStore(fnh); err != nil { if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
select { select {
@ -1540,7 +1540,7 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
} }
} }
func TestMonitorNodeStatusUpdateStatus(t *testing.T) { func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct { table := []struct {
fakeNodeHandler *testutil.FakeNodeHandler fakeNodeHandler *testutil.FakeNodeHandler
@ -1804,7 +1804,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if item.timeToPass > 0 { if item.timeToPass > 0 {
@ -1813,7 +1813,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
} }
@ -1829,7 +1829,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
} }
} }
func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct { table := []struct {
fakeNodeHandler *testutil.FakeNodeHandler fakeNodeHandler *testutil.FakeNodeHandler
@ -1951,7 +1951,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err) t.Errorf("Case[%d] unexpected error: %v", i, err)
} }
if item.timeToPass > 0 { if item.timeToPass > 0 {
@ -1960,7 +1960,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err) t.Errorf("Case[%d] unexpected error: %v", i, err)
} }
} }
@ -2071,7 +2071,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
nodeController.doNoExecuteTaintingPass() nodeController.doNoExecuteTaintingPass()
@ -2109,7 +2109,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
nodeController.doNoExecuteTaintingPass() nodeController.doNoExecuteTaintingPass()
@ -2421,7 +2421,7 @@ func TestNodeEventGeneration(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil { if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if err := nodeController.monitorNodeStatus(); err != nil { if err := nodeController.monitorNodeHealth(); err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
if len(fakeRecorder.Events) != 2 { if len(fakeRecorder.Events) != 2 {

View File

@ -28,9 +28,9 @@ import (
) )
const ( const (
// NodeStatusUpdateRetry controls the number of retries of writing // NodeHealthUpdateRetry controls the number of retries of writing
// NodeStatus update. // node health update.
NodeStatusUpdateRetry = 5 NodeHealthUpdateRetry = 5
// NodeEvictionPeriod controls how often NodeController will try to // NodeEvictionPeriod controls how often NodeController will try to
// evict Pods from non-responsive Nodes. // evict Pods from non-responsive Nodes.
NodeEvictionPeriod = 100 * time.Millisecond NodeEvictionPeriod = 100 * time.Millisecond