From 081ec69386257e557c0fd3ab0ea0cad3a0f9253d Mon Sep 17 00:00:00 2001 From: Thomas Hartland Date: Wed, 8 May 2019 10:54:33 +0200 Subject: [PATCH] Abort node initialization if cloud taint was already removed If node events are received at a faster rate than they can be processed then initialization for some nodes will be delayed. Once they are eventually processed their cloud taint is removed, but there may already be several update events for those nodes with the cloud taint still on them already in the event queue. To avoid re-initializing those nodes, the cloud taint is checked for again after requesting the current state of the node. If the cloud taint is no longer on the node then nil is returned from the RetryOnConflict, as an error does not need to be logged. The logging for a successful initialization is also moved inside the RetryOnConflict so that the early nil return does not cause the aborted initialization to be logged as a success. --- pkg/controller/cloud/node_controller.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/controller/cloud/node_controller.go b/pkg/controller/cloud/node_controller.go index b592ef74666..182e9bcd05d 100644 --- a/pkg/controller/cloud/node_controller.go +++ b/pkg/controller/cloud/node_controller.go @@ -256,6 +256,13 @@ func (cnc *CloudNodeController) initializeNode(node *v1.Node) { return err } + cloudTaint := getCloudTaint(curNode.Spec.Taints) + if cloudTaint == nil { + // Node object received from event had the cloud taint but was outdated, + // the node has actually already been initialized. + return nil + } + if curNode.Spec.ProviderID == "" { providerID, err := cloudprovider.GetInstanceProviderID(context.TODO(), cnc.cloud, types.NodeName(curNode.Name)) if err == nil { @@ -312,14 +319,14 @@ func (cnc *CloudNodeController) initializeNode(node *v1.Node) { // After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses // So that users do not see any significant delay in IP addresses being filled into the node cnc.updateNodeAddress(curNode, instances) + + klog.Infof("Successfully initialized node %s with cloud provider", node.Name) return nil }) if err != nil { utilruntime.HandleError(err) return } - - klog.Infof("Successfully initialized node %s with cloud provider", node.Name) } func getCloudTaint(taints []v1.Taint) *v1.Taint {