From 0b94834b17cf8347ce65951a6c3f9ec8890074d5 Mon Sep 17 00:00:00 2001 From: NickrenREN Date: Thu, 5 Jan 2017 20:22:35 +0800 Subject: [PATCH] fix nodeStatusUpdateRetry count exceeding condition judgement When tryUpdateNodeStatus() return err,err!=nil, but nc.kubeClient.Core().Nodes().Get() return no err, err==nil, And we run nodeStatusUpdateRetry times, when for loop ends, err == nil, we can not print error info and run continue, so the condition judgement is wrong. --- pkg/controller/node/nodecontroller.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index c30035575dd..2a2454f728a 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -449,24 +449,24 @@ func (nc *NodeController) monitorNodeStatus() error { continue } node := nodeCopy.(*v1.Node) - for rep := 0; rep < nodeStatusUpdateRetry; rep++ { + if err := wait.PollImmediate(retrySleepTime, retrySleepTime*nodeStatusUpdateRetry, func() (bool, error) { gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { - break + return true, nil } name := node.Name node, err = nc.kubeClient.Core().Nodes().Get(name, metav1.GetOptions{}) if err != nil { glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) - break + return false, err } - time.Sleep(retrySleepTime) - } - if err != nil { - glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+ - "Skipping - no pods will be evicted.", node.Name) + return false, nil + }); err != nil { + glog.Errorf("Update status of Node %v from NodeController error : %v. "+ + "Skipping - no pods will be evicted.", node.Name, err) continue } + // We do not treat a master node as a part of the cluster for network disruption checking. if !system.IsMasterNode(node.Name) { zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)