mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Merge pull request #48967 from jackfrancis/azure-lb-backoff
Automatic merge from submit-queue (batch tested with PRs 49218, 48253, 48967, 48460, 49230) additional backoff in azure cloudprovider Fixes #48971 **What this PR does / why we need it**: We want to be able to opt in to backoff retry logic for kubelet-originating request behavior: node IP address resolution and node load balancer pool membership enforcement. **Special notes for your reviewer**: The use-case for this is azure cloudprovider clusters with large node counts, especially during cluster installation, or other scenarios when lots of nodes come online at once and attempt to register all resources with the backend API. To allow clusters at scale more control over the API request rate in-cluster, backoff config has the ability to meaningful slow down this rate, when appropriate. **Release note**: ```additional backoff in azure cloudprovider ```
This commit is contained in:
commit
ecadada7ef
@ -43,6 +43,38 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
|
||||
return machine, exists, err
|
||||
}
|
||||
|
||||
// VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry
|
||||
func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) {
|
||||
var machine compute.VirtualMachine
|
||||
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
var retryErr error
|
||||
machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types)
|
||||
if retryErr != nil {
|
||||
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
|
||||
return false, nil
|
||||
}
|
||||
glog.V(2).Infof("backoff: success")
|
||||
return true, nil
|
||||
})
|
||||
return machine, err
|
||||
}
|
||||
|
||||
// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry
|
||||
func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) {
|
||||
var ip string
|
||||
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
var retryErr error
|
||||
ip, retryErr = az.getIPForMachine(name)
|
||||
if retryErr != nil {
|
||||
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
|
||||
return false, nil
|
||||
}
|
||||
glog.V(2).Infof("backoff: success")
|
||||
return true, nil
|
||||
})
|
||||
return ip, err
|
||||
}
|
||||
|
||||
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
|
@ -41,9 +41,18 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
|
||||
}
|
||||
ip, err := az.getIPForMachine(name)
|
||||
if err != nil {
|
||||
if az.CloudProviderBackoff {
|
||||
glog.V(2).Infof("NodeAddresses(%s) backing off", name)
|
||||
ip, err = az.GetIPForMachineWithRetry(name)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return []v1.NodeAddress{
|
||||
{Type: v1.NodeInternalIP, Address: ip},
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
serviceapi "k8s.io/kubernetes/pkg/api/v1/service"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/arm/network"
|
||||
"github.com/Azure/go-autorest/autorest/to"
|
||||
"github.com/golang/glog"
|
||||
@ -871,12 +872,22 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b
|
||||
// This ensures the given VM's Primary NIC's Primary IP Configuration is
|
||||
// participating in the specified LoadBalancer Backend Pool.
|
||||
func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error {
|
||||
var machine compute.VirtualMachine
|
||||
vmName := mapNodeNameToVMName(nodeName)
|
||||
az.operationPollRateLimiter.Accept()
|
||||
machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
|
||||
if err != nil {
|
||||
if az.CloudProviderBackoff {
|
||||
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) backing off", serviceName, nodeName, backendPoolID)
|
||||
machine, err = az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "")
|
||||
if err != nil {
|
||||
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
primaryNicID, err := getPrimaryInterfaceID(machine)
|
||||
if err != nil {
|
||||
|
@ -249,6 +249,7 @@ outer:
|
||||
}
|
||||
|
||||
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
machine, exists, err := az.getVirtualMachine(nodeName)
|
||||
if !exists {
|
||||
return "", cloudprovider.InstanceNotFound
|
||||
|
Loading…
Reference in New Issue
Block a user