Merge pull request #48967 from jackfrancis/azure-lb-backoff

Automatic merge from submit-queue (batch tested with PRs 49218, 48253, 48967, 48460, 49230)

additional backoff in azure cloudprovider

Fixes #48971

**What this PR does / why we need it**:

We want to be able to opt in to backoff retry logic for kubelet-originating request behavior: node IP address resolution and node load balancer pool membership enforcement.

**Special notes for your reviewer**:

The use-case for this is azure cloudprovider clusters with large node counts, especially during cluster installation, or other scenarios when lots of nodes come online at once and attempt to register all resources with the backend API. To allow clusters at scale more control over the API request rate in-cluster, backoff config has the ability to meaningful slow down this rate, when appropriate.

**Release note**:

```additional backoff in azure cloudprovider
```
This commit is contained in:
Kubernetes Submit Queue 2017-07-19 20:05:34 -07:00 committed by GitHub
commit ecadada7ef
4 changed files with 56 additions and 3 deletions

View File

@ -43,6 +43,38 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
return machine, exists, err
}
// VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry
func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) {
var machine compute.VirtualMachine
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
var retryErr error
machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types)
if retryErr != nil {
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
return false, nil
}
glog.V(2).Infof("backoff: success")
return true, nil
})
return machine, err
}
// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry
func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) {
var ip string
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
var retryErr error
ip, retryErr = az.getIPForMachine(name)
if retryErr != nil {
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
return false, nil
}
glog.V(2).Infof("backoff: success")
return true, nil
})
return ip, err
}
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {

View File

@ -41,9 +41,18 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
}
ip, err := az.getIPForMachine(name)
if err != nil {
if az.CloudProviderBackoff {
glog.V(2).Infof("NodeAddresses(%s) backing off", name)
ip, err = az.GetIPForMachineWithRetry(name)
if err != nil {
glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
return nil, err
}
} else {
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
return nil, err
}
}
return []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: ip},

View File

@ -25,6 +25,7 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
serviceapi "k8s.io/kubernetes/pkg/api/v1/service"
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/Azure/go-autorest/autorest/to"
"github.com/golang/glog"
@ -871,12 +872,22 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b
// This ensures the given VM's Primary NIC's Primary IP Configuration is
// participating in the specified LoadBalancer Backend Pool.
func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error {
var machine compute.VirtualMachine
vmName := mapNodeNameToVMName(nodeName)
az.operationPollRateLimiter.Accept()
machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
if err != nil {
if az.CloudProviderBackoff {
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) backing off", serviceName, nodeName, backendPoolID)
machine, err = az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "")
if err != nil {
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID)
return err
}
} else {
return err
}
}
primaryNicID, err := getPrimaryInterfaceID(machine)
if err != nil {

View File

@ -249,6 +249,7 @@ outer:
}
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) {
az.operationPollRateLimiter.Accept()
machine, exists, err := az.getVirtualMachine(nodeName)
if !exists {
return "", cloudprovider.InstanceNotFound