mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
Merge pull request #56352 from shyamjvs/rate-limited-queue-in-cidr-allocator
Automatic merge from submit-queue (batch tested with PRs 56759, 57851, 56352). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Refactor retry logic away from updateCIDRAllocation() Fixes https://github.com/kubernetes/kubernetes/issues/52292 (this is the last improvement left under it) /cc @wojtek-t ```release-note NONE ``` cc @kubernetes/sig-network-misc
This commit is contained in:
commit
29aff5bf47
@ -68,7 +68,7 @@ const (
|
|||||||
cidrUpdateQueueSize = 5000
|
cidrUpdateQueueSize = 5000
|
||||||
|
|
||||||
// cidrUpdateRetries is the no. of times a NodeSpec update will be retried before dropping it.
|
// cidrUpdateRetries is the no. of times a NodeSpec update will be retried before dropping it.
|
||||||
cidrUpdateRetries = 10
|
cidrUpdateRetries = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
// CIDRAllocator is an interface implemented by things that know how
|
// CIDRAllocator is an interface implemented by things that know how
|
||||||
|
@ -210,35 +210,34 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
|||||||
}
|
}
|
||||||
podCIDR := cidr.String()
|
podCIDR := cidr.String()
|
||||||
|
|
||||||
for rep := 0; rep < cidrUpdateRetries; rep++ {
|
|
||||||
node, err = ca.nodeLister.Get(nodeName)
|
node, err = ca.nodeLister.Get(nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", nodeName, err)
|
glog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", nodeName, err)
|
||||||
continue
|
return err
|
||||||
}
|
}
|
||||||
if node.Spec.PodCIDR != "" {
|
|
||||||
if node.Spec.PodCIDR == podCIDR {
|
if node.Spec.PodCIDR == podCIDR {
|
||||||
glog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, podCIDR)
|
glog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, podCIDR)
|
||||||
// We don't return to set the NetworkUnavailable condition if needed.
|
// We don't return here, in order to set the NetworkUnavailable condition later below.
|
||||||
break
|
} else {
|
||||||
}
|
if node.Spec.PodCIDR != "" {
|
||||||
glog.Errorf("PodCIDR being reassigned! Node %v spec has %v, but cloud provider has assigned %v",
|
glog.Errorf("PodCIDR being reassigned! Node %v spec has %v, but cloud provider has assigned %v", node.Name, node.Spec.PodCIDR, podCIDR)
|
||||||
node.Name, node.Spec.PodCIDR, podCIDR)
|
|
||||||
// We fall through and set the CIDR despite this error. This
|
// We fall through and set the CIDR despite this error. This
|
||||||
// implements the same logic as implemented in the
|
// implements the same logic as implemented in the
|
||||||
// rangeAllocator.
|
// rangeAllocator.
|
||||||
//
|
//
|
||||||
// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248
|
// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248
|
||||||
}
|
}
|
||||||
|
for i := 0; i < cidrUpdateRetries; i++ {
|
||||||
if err = utilnode.PatchNodeCIDR(ca.client, types.NodeName(node.Name), podCIDR); err == nil {
|
if err = utilnode.PatchNodeCIDR(ca.client, types.NodeName(node.Name), podCIDR); err == nil {
|
||||||
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
|
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
glog.Errorf("Failed to update node %v PodCIDR to %v (%d retries left): %v", node.Name, podCIDR, cidrUpdateRetries-rep-1, err)
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
|
nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
|
||||||
glog.Errorf("CIDR assignment for node %v failed: %v.", nodeName, err)
|
glog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, podCIDR, err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,39 +286,40 @@ func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
|
|||||||
defer r.removeNodeFromProcessing(data.nodeName)
|
defer r.removeNodeFromProcessing(data.nodeName)
|
||||||
|
|
||||||
podCIDR := data.cidr.String()
|
podCIDR := data.cidr.String()
|
||||||
for rep := 0; rep < cidrUpdateRetries; rep++ {
|
|
||||||
node, err = r.nodeLister.Get(data.nodeName)
|
node, err = r.nodeLister.Get(data.nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
|
glog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", data.nodeName, err)
|
||||||
continue
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.Spec.PodCIDR == podCIDR {
|
||||||
|
glog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, podCIDR)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
if node.Spec.PodCIDR != "" {
|
if node.Spec.PodCIDR != "" {
|
||||||
glog.V(4).Infof("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR)
|
glog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one %v.", node.Name, node.Spec.PodCIDR, podCIDR)
|
||||||
if node.Spec.PodCIDR != podCIDR {
|
|
||||||
glog.Errorf("Node %q PodCIDR seems to have changed (original=%v, current=%v), releasing original and occupying new CIDR",
|
|
||||||
node.Name, node.Spec.PodCIDR, podCIDR)
|
|
||||||
if err := r.cidrs.Release(data.cidr); err != nil {
|
if err := r.cidrs.Release(data.cidr); err != nil {
|
||||||
glog.Errorf("Error when releasing CIDR %v", podCIDR)
|
glog.Errorf("Error when releasing CIDR %v", podCIDR)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// If we reached here, it means that the node has no CIDR currently assigned. So we set it.
|
||||||
|
for i := 0; i < cidrUpdateRetries; i++ {
|
||||||
if err = utilnode.PatchNodeCIDR(r.client, types.NodeName(node.Name), podCIDR); err == nil {
|
if err = utilnode.PatchNodeCIDR(r.client, types.NodeName(node.Name), podCIDR); err == nil {
|
||||||
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
|
glog.Infof("Set node %v PodCIDR to %v", node.Name, podCIDR)
|
||||||
break
|
return nil
|
||||||
}
|
}
|
||||||
glog.Errorf("Failed to update node %v PodCIDR to %v (%d retries left): %v", node.Name, podCIDR, cidrUpdateRetries-rep-1, err)
|
|
||||||
}
|
}
|
||||||
if err != nil {
|
glog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, podCIDR, err)
|
||||||
nodeutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
nodeutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
||||||
// We accept the fact that we may leek CIDRs here. This is safer than releasing
|
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
||||||
// them in case when we don't know if request went through.
|
// them in case when we don't know if request went through.
|
||||||
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
||||||
if !apierrors.IsServerTimeout(err) {
|
if !apierrors.IsServerTimeout(err) {
|
||||||
glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
|
glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err)
|
||||||
if releaseErr := r.cidrs.Release(data.cidr); releaseErr != nil {
|
if releaseErr := r.cidrs.Release(data.cidr); releaseErr != nil {
|
||||||
glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
|
glog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
Loading…
Reference in New Issue
Block a user