From 2525ef9983dfc3fb1da5d64b3836ac15d98cdeaa Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Fri, 14 Jul 2017 15:16:47 -0700 Subject: [PATCH 1/2] VirtualMachinesClient.Get backoff in lb pool logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EnsureHostInPool() submits a GET to azure API for VM info. We’re seeing this on agent node kubelets and would like to enable configurable backoff engagement for 4xx responses to be able to slow down the rate of reconciliation, when appropriate. --- .../providers/azure/azure_backoff.go | 16 ++++++++++++++++ .../providers/azure/azure_loadbalancer.go | 13 ++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index 839592f3035..8d092983c13 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -43,6 +43,22 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua return machine, exists, err } +// VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry +func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) { + var machine compute.VirtualMachine + err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + var retryErr error + machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types) + if retryErr != nil { + glog.Errorf("backoff: failure, will retry,err=%v", retryErr) + return false, nil + } + glog.V(2).Infof("backoff: success") + return true, nil + }) + return machine, err +} + // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 9b959b4e4cb..868cb611a73 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -25,6 +25,7 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" serviceapi "k8s.io/kubernetes/pkg/api/v1/service" + "github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/go-autorest/autorest/to" "github.com/golang/glog" @@ -871,11 +872,21 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b // This ensures the given VM's Primary NIC's Primary IP Configuration is // participating in the specified LoadBalancer Backend Pool. func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error { + var machine compute.VirtualMachine vmName := mapNodeNameToVMName(nodeName) az.operationPollRateLimiter.Accept() machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "") if err != nil { - return err + if az.CloudProviderBackoff { + glog.V(2).Infof("ensureHostInPool(%s, %s, %s) backing off", serviceName, nodeName, backendPoolID) + machine, err = az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "") + if err != nil { + glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID) + return err + } + } else { + return err + } } primaryNicID, err := getPrimaryInterfaceID(machine) From f76ef29512146b60a5afb6b69f2321c328432c17 Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Fri, 14 Jul 2017 17:13:40 -0700 Subject: [PATCH 2/2] backing off az.getIPForMachine in az.NodeAddresses also rate limiting the call to az.getVirtualMachine inside az.getIPForMachine --- .../providers/azure/azure_backoff.go | 16 ++++++++++++++++ .../providers/azure/azure_instances.go | 13 +++++++++++-- pkg/cloudprovider/providers/azure/azure_util.go | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index 8d092983c13..00544b1b988 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -59,6 +59,22 @@ func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, return machine, err } +// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry +func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) { + var ip string + err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { + var retryErr error + ip, retryErr = az.getIPForMachine(name) + if retryErr != nil { + glog.Errorf("backoff: failure, will retry,err=%v", retryErr) + return false, nil + } + glog.V(2).Infof("backoff: success") + return true, nil + }) + return ip, err +} + // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error { return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) { diff --git a/pkg/cloudprovider/providers/azure/azure_instances.go b/pkg/cloudprovider/providers/azure/azure_instances.go index 256428d8a39..f683093ac3a 100644 --- a/pkg/cloudprovider/providers/azure/azure_instances.go +++ b/pkg/cloudprovider/providers/azure/azure_instances.go @@ -41,8 +41,17 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) { } ip, err := az.getIPForMachine(name) if err != nil { - glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err) - return nil, err + if az.CloudProviderBackoff { + glog.V(2).Infof("NodeAddresses(%s) backing off", name) + ip, err = az.GetIPForMachineWithRetry(name) + if err != nil { + glog.V(2).Infof("NodeAddresses(%s) abort backoff", name) + return nil, err + } + } else { + glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err) + return nil, err + } } return []v1.NodeAddress{ diff --git a/pkg/cloudprovider/providers/azure/azure_util.go b/pkg/cloudprovider/providers/azure/azure_util.go index 5f59da85918..8bbbb1104fc 100644 --- a/pkg/cloudprovider/providers/azure/azure_util.go +++ b/pkg/cloudprovider/providers/azure/azure_util.go @@ -249,6 +249,7 @@ outer: } func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) { + az.operationPollRateLimiter.Accept() machine, exists, err := az.getVirtualMachine(nodeName) if !exists { return "", cloudprovider.InstanceNotFound