Merge pull request #68131 from wojtek-t/fix_ipam_retrying

Automatic merge from submit-queue (batch tested with PRs 67756, 64149, 68076, 68131, 68120). If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md. Fix ipam controller to not drop nodes forever in case of failures https://github.com/kubernetes/kubernetes/pull/68084 introduced a problem that when 10 attempts to assign a cidr would fail, we stop retrying but the node is still marked as "in-processing" so it would never be reconciled again. This PR is fixing this problem.
2025-09-06 11:42:14 +00:00 · 2018-08-31 09:06:39 -07:00
parent 36187c6a2e fcd2882722
commit 240a086cec
1 changed files with 1 additions and 1 deletions
--- a/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go
+++ b/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go
@@ -157,7 +157,6 @@ func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
 			}
 			if err := ca.updateCIDRAllocation(workItem); err == nil {
 				glog.V(3).Infof("Updated CIDR for %q", workItem)
-				ca.removeNodeFromProcessing(workItem)
 			} else {
 				glog.Errorf("Error updating CIDR for %q: %v", workItem, err)
 				if canRetry, timeout := ca.retryParams(workItem); canRetry {
@@ -170,6 +169,7 @@ func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
 				}
 				glog.Errorf("Exceeded retry count for %q, dropping from queue", workItem)
 			}
+			ca.removeNodeFromProcessing(workItem)
 		case <-stopChan:
 			return
 		}