diff --git a/pkg/cloudprovider/providers/azure/azure_backoff_test.go b/pkg/cloudprovider/providers/azure/azure_backoff_test.go index 767c7cc30e3..d0a1399e27c 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff_test.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff_test.go @@ -50,7 +50,6 @@ func TestShouldRetryHTTPRequest(t *testing.T) { expected: false, }, } - for _, test := range tests { resp := &http.Response{ StatusCode: test.code, @@ -85,7 +84,7 @@ func TestIsSuccessResponse(t *testing.T) { resp := http.Response{ StatusCode: test.code, } - res := isSuccessHTTPResponse(resp) + res := isSuccessHTTPResponse(&resp) if res != test.expected { t.Errorf("expected: %v, saw: %v", test.expected, res) } diff --git a/pkg/cloudprovider/providers/azure/azure_client.go b/pkg/cloudprovider/providers/azure/azure_client.go index 94a97034275..e7d2a9d6cec 100644 --- a/pkg/cloudprovider/providers/azure/azure_client.go +++ b/pkg/cloudprovider/providers/azure/azure_client.go @@ -145,6 +145,10 @@ type azClientConfig struct { //Details: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits rateLimiterReader flowcontrol.RateLimiter rateLimiterWriter flowcontrol.RateLimiter + + CloudProviderBackoffRetries int + CloudProviderBackoffDuration int + ShouldOmitCloudProviderBackoff bool } // azVirtualMachinesClient implements VirtualMachinesClient. @@ -163,6 +167,10 @@ func newAzVirtualMachinesClient(config *azClientConfig) *azVirtualMachinesClient virtualMachinesClient.BaseURI = config.resourceManagerEndpoint virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachinesClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + virtualMachinesClient.RetryAttempts = config.CloudProviderBackoffRetries + virtualMachinesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&virtualMachinesClient.Client) return &azVirtualMachinesClient{ @@ -254,6 +262,10 @@ func newAzInterfacesClient(config *azClientConfig) *azInterfacesClient { interfacesClient.BaseURI = config.resourceManagerEndpoint interfacesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) interfacesClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + interfacesClient.RetryAttempts = config.CloudProviderBackoffRetries + interfacesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&interfacesClient.Client) return &azInterfacesClient{ @@ -333,6 +345,10 @@ func newAzLoadBalancersClient(config *azClientConfig) *azLoadBalancersClient { loadBalancerClient.BaseURI = config.resourceManagerEndpoint loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) loadBalancerClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + loadBalancerClient.RetryAttempts = config.CloudProviderBackoffRetries + loadBalancerClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&loadBalancerClient.Client) return &azLoadBalancersClient{ @@ -449,6 +465,10 @@ func newAzPublicIPAddressesClient(config *azClientConfig) *azPublicIPAddressesCl publicIPAddressClient.BaseURI = config.resourceManagerEndpoint publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) publicIPAddressClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + publicIPAddressClient.RetryAttempts = config.CloudProviderBackoffRetries + publicIPAddressClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&publicIPAddressClient.Client) return &azPublicIPAddressesClient{ @@ -564,6 +584,10 @@ func newAzSubnetsClient(config *azClientConfig) *azSubnetsClient { subnetsClient.BaseURI = config.resourceManagerEndpoint subnetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) subnetsClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + subnetsClient.RetryAttempts = config.CloudProviderBackoffRetries + subnetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&subnetsClient.Client) return &azSubnetsClient{ @@ -679,6 +703,10 @@ func newAzSecurityGroupsClient(config *azClientConfig) *azSecurityGroupsClient { securityGroupsClient.BaseURI = config.resourceManagerEndpoint securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) securityGroupsClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + securityGroupsClient.RetryAttempts = config.CloudProviderBackoffRetries + securityGroupsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&securityGroupsClient.Client) return &azSecurityGroupsClient{ @@ -794,6 +822,10 @@ func newAzVirtualMachineScaleSetsClient(config *azClientConfig) *azVirtualMachin virtualMachineScaleSetsClient.BaseURI = config.resourceManagerEndpoint virtualMachineScaleSetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachineScaleSetsClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + virtualMachineScaleSetsClient.RetryAttempts = config.CloudProviderBackoffRetries + virtualMachineScaleSetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&virtualMachineScaleSetsClient.Client) return &azVirtualMachineScaleSetsClient{ @@ -910,6 +942,10 @@ func newAzVirtualMachineScaleSetVMsClient(config *azClientConfig) *azVirtualMach virtualMachineScaleSetVMsClient.BaseURI = config.resourceManagerEndpoint virtualMachineScaleSetVMsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachineScaleSetVMsClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + virtualMachineScaleSetVMsClient.RetryAttempts = config.CloudProviderBackoffRetries + virtualMachineScaleSetVMsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&virtualMachineScaleSetVMsClient.Client) return &azVirtualMachineScaleSetVMsClient{ @@ -1018,6 +1054,10 @@ func newAzRoutesClient(config *azClientConfig) *azRoutesClient { routesClient.BaseURI = config.resourceManagerEndpoint routesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) routesClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + routesClient.RetryAttempts = config.CloudProviderBackoffRetries + routesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&routesClient.Client) return &azRoutesClient{ @@ -1087,6 +1127,10 @@ func newAzRouteTablesClient(config *azClientConfig) *azRouteTablesClient { routeTablesClient.BaseURI = config.resourceManagerEndpoint routeTablesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) routeTablesClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + routeTablesClient.RetryAttempts = config.CloudProviderBackoffRetries + routeTablesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&routeTablesClient.Client) return &azRouteTablesClient{ @@ -1148,6 +1192,10 @@ func newAzStorageAccountClient(config *azClientConfig) *azStorageAccountClient { storageAccountClient := storage.NewAccountsClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID) storageAccountClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) storageAccountClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + storageAccountClient.RetryAttempts = config.CloudProviderBackoffRetries + storageAccountClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&storageAccountClient.Client) return &azStorageAccountClient{ @@ -1259,6 +1307,10 @@ func newAzDisksClient(config *azClientConfig) *azDisksClient { disksClient := compute.NewDisksClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID) disksClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) disksClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + disksClient.RetryAttempts = config.CloudProviderBackoffRetries + disksClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&disksClient.Client) return &azDisksClient{ @@ -1345,6 +1397,10 @@ func newAzVirtualMachineSizesClient(config *azClientConfig) *azVirtualMachineSiz VirtualMachineSizesClient.BaseURI = config.resourceManagerEndpoint VirtualMachineSizesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) VirtualMachineSizesClient.PollingDelay = 5 * time.Second + if config.ShouldOmitCloudProviderBackoff { + VirtualMachineSizesClient.RetryAttempts = config.CloudProviderBackoffRetries + VirtualMachineSizesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second + } configureUserAgent(&VirtualMachineSizesClient.Client) return &azVirtualMachineSizesClient{ diff --git a/pkg/cloudprovider/providers/azure/azure_instances.go b/pkg/cloudprovider/providers/azure/azure_instances.go index 158ffb976ca..69d90aec39c 100644 --- a/pkg/cloudprovider/providers/azure/azure_instances.go +++ b/pkg/cloudprovider/providers/azure/azure_instances.go @@ -48,7 +48,7 @@ func (az *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.N } addressGetter := func(nodeName types.NodeName) ([]v1.NodeAddress, error) { - ip, publicIP, err := az.GetIPForMachineWithRetry(nodeName) + ip, publicIP, err := az.getIPForMachine(nodeName) if err != nil { klog.V(2).Infof("NodeAddresses(%s) abort backoff: %v", nodeName, err) return nil, err diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go index 77bfdd20de2..bab5f1daaff 100644 --- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go +++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go @@ -27,11 +27,11 @@ import ( "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog" serviceapi "k8s.io/kubernetes/pkg/api/v1/service" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/go-autorest/autorest/to" - "k8s.io/klog" ) const ( @@ -210,7 +210,7 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string, primaryVMSetName := az.vmSet.GetPrimaryVMSetName() defaultLBName := az.getAzureLoadBalancerName(clusterName, primaryVMSetName, isInternal) - existingLBs, err := az.ListLBWithRetry(service) + existingLBs, err := az.ListLB(service) if err != nil { return nil, nil, false, err } @@ -387,7 +387,7 @@ func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service) pipResourceGroup := az.getPublicIPAddressResourceGroup(service) - pips, err := az.ListPIPWithRetry(service, pipResourceGroup) + pips, err := az.ListPIP(service, pipResourceGroup) if err != nil { return "", err } @@ -474,13 +474,13 @@ func (az *Cloud) ensurePublicIPExists(service *v1.Service, pipName string, domai } klog.V(2).Infof("ensurePublicIPExists for service(%s): pip(%s) - creating", serviceName, *pip.Name) - klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): start", pipResourceGroup, *pip.Name) - err = az.CreateOrUpdatePIPWithRetry(service, pipResourceGroup, pip) + klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): start", pipResourceGroup, *pip.Name) + err = az.CreateOrUpdatePIP(service, pipResourceGroup, pip) if err != nil { klog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name) return nil, err } - klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): end", pipResourceGroup, *pip.Name) + klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): end", pipResourceGroup, *pip.Name) ctx, cancel := getContextWithCancel() defer cancel() @@ -818,16 +818,16 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, klog.V(10).Infof("EnsureBackendPoolDeleted(%s, %s): end", lbBackendPoolID, vmSetName) // Remove the LB. - klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLBWithRetry(%q): start", lbName) - err = az.DeleteLBWithRetry(service, lbName) + klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLB(%q): start", lbName) + err = az.DeleteLB(service, lbName) if err != nil { klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - deleting; no remaining frontendIPConfigurations", serviceName, lbName) return nil, err } - klog.V(10).Infof("az.DeleteLBWithRetry(%q): end", lbName) + klog.V(10).Infof("az.DeleteLB(%q): end", lbName) } else { klog.V(2).Infof("reconcileLoadBalancer: reconcileLoadBalancer for service(%s): lb(%s) - updating", serviceName, lbName) - err := az.CreateOrUpdateLBWithRetry(service, *lb) + err := az.CreateOrUpdateLB(service, *lb) if err != nil { klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - updating", serviceName, lbName) return nil, err @@ -1143,8 +1143,8 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, if dirtySg { sg.SecurityRules = &updatedRules klog.V(2).Infof("reconcileSecurityGroup for service(%s): sg(%s) - updating", serviceName, *sg.Name) - klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): start", *sg.Name) - err := az.CreateOrUpdateSGWithRetry(service, sg) + klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): start", *sg.Name) + err := az.CreateOrUpdateSecurityGroup(service, sg) if err != nil { klog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) // TODO (Nov 2017): remove when augmented security rules are out of preview @@ -1157,7 +1157,7 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service, // END TODO return nil, err } - klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name) + klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): end", *sg.Name) } return &sg, nil } @@ -1315,7 +1315,7 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lb * pipResourceGroup := az.getPublicIPAddressResourceGroup(service) - pips, err := az.ListPIPWithRetry(service, pipResourceGroup) + pips, err := az.ListPIP(service, pipResourceGroup) if err != nil { return nil, err } @@ -1414,7 +1414,7 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string // Update load balancer when frontendIPConfigUpdated or loadBalancerRuleUpdated. if frontendIPConfigUpdated || loadBalancerRuleUpdated { - err := az.CreateOrUpdateLBWithRetry(service, *lb) + err := az.CreateOrUpdateLB(service, *lb) if err != nil { klog.Errorf("safeDeletePublicIP for service(%s) failed with error: %v", getServiceName(service), err) return err @@ -1423,14 +1423,14 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string } pipName := to.String(pip.Name) - klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): start", pipResourceGroup, pipName) - err := az.DeletePublicIPWithRetry(service, pipResourceGroup, pipName) + klog.V(10).Infof("DeletePublicIP(%s, %q): start", pipResourceGroup, pipName) + err := az.DeletePublicIP(service, pipResourceGroup, pipName) if err != nil { if err = ignoreStatusNotFoundFromError(err); err != nil { return err } } - klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): end", pipResourceGroup, pipName) + klog.V(10).Infof("DeletePublicIP(%s, %q): end", pipResourceGroup, pipName) return nil } diff --git a/pkg/cloudprovider/providers/azure/azure_routes.go b/pkg/cloudprovider/providers/azure/azure_routes.go index 36219b2f917..3a586263148 100644 --- a/pkg/cloudprovider/providers/azure/azure_routes.go +++ b/pkg/cloudprovider/providers/azure/azure_routes.go @@ -20,11 +20,11 @@ import ( "context" "fmt" - cloudprovider "k8s.io/cloud-provider" - "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/go-autorest/autorest/to" + "k8s.io/apimachinery/pkg/types" + cloudprovider "k8s.io/cloud-provider" "k8s.io/klog" ) @@ -104,18 +104,7 @@ func (az *Cloud) createRouteTable() error { } klog.V(3).Infof("createRouteTableIfNotExists: creating routetable. routeTableName=%q", az.RouteTableName) - ctx, cancel := getContextWithCancel() - defer cancel() - resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable) - klog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%q): end", az.RouteTableName) - if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("createRouteTableIfNotExists backing off: creating routetable. routeTableName=%q", az.RouteTableName) - retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("createRouteTableIfNotExists abort backoff: creating routetable. routeTableName=%q", az.RouteTableName) - } - } + err := az.CreateOrUpdateRouteTable(routeTable) if err != nil { return err } @@ -163,18 +152,7 @@ func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint s } klog.V(3).Infof("CreateRoute: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - ctx, cancel := getContextWithCancel() - defer cancel() - resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route) - klog.V(10).Infof("RoutesClient.CreateOrUpdate(%q): end", az.RouteTableName) - if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("CreateRoute backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - retryErr := az.CreateOrUpdateRouteWithRetry(route) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("CreateRoute abort backoff: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - } - } + err = az.CreateOrUpdateRoute(route) if err != nil { return err } @@ -202,20 +180,8 @@ func (az *Cloud) DeleteRoute(ctx context.Context, clusterName string, kubeRoute klog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - ctx, cancel := getContextWithCancel() - defer cancel() routeName := mapNodeNameToRouteName(kubeRoute.TargetNode) - resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName) - klog.V(10).Infof("RoutesClient.Delete(%q): end", az.RouteTableName) - - if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("DeleteRoute backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - retryErr := az.DeleteRouteWithRetry(routeName) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("DeleteRoute abort backoff: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR) - } - } + err = az.DeleteRouteWithName(routeName) if err != nil { return err } diff --git a/pkg/cloudprovider/providers/azure/azure_standard.go b/pkg/cloudprovider/providers/azure/azure_standard.go index 95126f58b0f..9bd90b5f937 100644 --- a/pkg/cloudprovider/providers/azure/azure_standard.go +++ b/pkg/cloudprovider/providers/azure/azure_standard.go @@ -26,16 +26,16 @@ import ( "strconv" "strings" - "k8s.io/api/core/v1" - cloudprovider "k8s.io/cloud-provider" - "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/go-autorest/autorest/to" + + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/uuid" + cloudprovider "k8s.io/cloud-provider" "k8s.io/klog" ) @@ -294,10 +294,6 @@ outer: return -1, fmt.Errorf("securityGroup priorities are exhausted") } -func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) { - return az.vmSet.GetIPByNodeName(string(nodeName)) -} - var polyTable = crc32.MakeTable(crc32.Koopman) //MakeCRC32 : convert string to CRC32 format @@ -460,9 +456,9 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error) // getAgentPoolAvailabiliySets lists the virtual machines for the resource group and then builds // a list of availability sets that match the nodes available to k8s. func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) { - vms, err := as.VirtualMachineClientListWithRetry(as.ResourceGroup) + vms, err := as.ListVirtualMachines(as.ResourceGroup) if err != nil { - klog.Errorf("as.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err) + klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err) return nil, err } vmNameToAvailabilitySetID := make(map[string]string, len(vms)) @@ -695,18 +691,7 @@ func (as *availabilitySet) ensureHostInPool(service *v1.Service, nodeName types. nicName := *nic.Name klog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName) - ctx, cancel := getContextWithCancel() - defer cancel() - resp, err := as.InterfacesClient.CreateOrUpdate(ctx, as.ResourceGroup, *nic.Name, nic) - klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%q): end", *nic.Name) - if as.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating, err=%v", serviceName, nicName, err) - retryErr := as.CreateOrUpdateInterfaceWithRetry(service, nic) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("nicupdate(%s) abort backoff: nic(%s) - updating", serviceName, nicName) - } - } + err := as.CreateOrUpdateInterface(service, nic) if err != nil { return err } diff --git a/pkg/cloudprovider/providers/azure/azure_vmss.go b/pkg/cloudprovider/providers/azure/azure_vmss.go index 4d5df404628..26d1bd9d1af 100644 --- a/pkg/cloudprovider/providers/azure/azure_vmss.go +++ b/pkg/cloudprovider/providers/azure/azure_vmss.go @@ -27,13 +27,13 @@ import ( "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/go-autorest/autorest/to" - "k8s.io/klog" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog" ) var ( @@ -563,6 +563,30 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err return nic, nil } +// getScaleSet gets scale set with exponential backoff retry +func (ss *scaleSet) getScaleSet(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) { + if ss.Config.shouldOmitCloudProviderBackoff() { + var result compute.VirtualMachineScaleSet + var exists bool + + cached, err := ss.vmssCache.Get(name) + if err != nil { + ss.Event(service, v1.EventTypeWarning, "GetVirtualMachineScaleSet", err.Error()) + klog.Errorf("backoff: failure for scale set %q, will retry,err=%v", name, err) + return result, false, nil + } + + if cached != nil { + exists = true + result = *(cached.(*compute.VirtualMachineScaleSet)) + } + + return result, exists, err + } + + return ss.getScaleSetWithRetry(service, name) +} + // getScaleSetWithRetry gets scale set with exponential backoff retry func (ss *scaleSet) getScaleSetWithRetry(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) { var result compute.VirtualMachineScaleSet @@ -621,6 +645,19 @@ func (ss *scaleSet) getPrimaryIPConfigForScaleSet(config *compute.VirtualMachine return nil, fmt.Errorf("failed to find a primary IP configuration for the scale set %q", scaleSetName) } +// createOrUpdateVMSS invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry. +func (ss *scaleSet) createOrUpdateVMSS(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error { + if ss.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, *virtualMachineScaleSet.Name, virtualMachineScaleSet) + klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%s): end", *virtualMachineScaleSet.Name) + return ss.processHTTPResponse(service, "CreateOrUpdateVMSS", resp, err) + } + + return ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet) +} + // createOrUpdateVMSSWithRetry invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry. func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error { return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) { @@ -632,6 +669,19 @@ func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMach }) } +// updateVMSSInstances invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry. +func (ss *scaleSet) updateVMSSInstances(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error { + if ss.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + resp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, scaleSetName, vmInstanceIDs) + klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%s): end", scaleSetName) + return ss.processHTTPResponse(service, "CreateOrUpdateVMSSInstance", resp, err) + } + + return ss.updateVMSSInstancesWithRetry(service, scaleSetName, vmInstanceIDs) +} + // updateVMSSInstancesWithRetry invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry. func (ss *scaleSet) updateVMSSInstancesWithRetry(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error { return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) { @@ -687,9 +737,9 @@ func (ss *scaleSet) getNodesScaleSets(nodes []*v1.Node) (map[string]sets.String, func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID string, vmSetName string, instanceIDs []string, isInternal bool) error { klog.V(3).Infof("ensuring hosts %q of scaleset %q in LB backendpool %q", instanceIDs, vmSetName, backendPoolID) serviceName := getServiceName(service) - virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, vmSetName) + virtualMachineScaleSet, exists, err := ss.getScaleSet(service, vmSetName) if err != nil { - klog.Errorf("ss.getScaleSetWithRetry(%s) for service %q failed: %v", vmSetName, serviceName, err) + klog.Errorf("ss.getScaleSet(%s) for service %q failed: %v", vmSetName, serviceName, err) return err } if !exists { @@ -748,19 +798,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st }) primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools - ctx, cancel := getContextWithCancel() - defer cancel() - klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating", serviceName, vmSetName) - resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, vmSetName, virtualMachineScaleSet) - klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName) - if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err) - retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName) - } - } + err := ss.createOrUpdateVMSS(service, virtualMachineScaleSet) if err != nil { return err } @@ -770,18 +808,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{ InstanceIds: &instanceIDs, } - ctx, cancel := getContextWithCancel() - defer cancel() - instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, vmSetName, vmInstanceIDs) - klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName) - if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) { - klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err) - retryErr := ss.updateVMSSInstancesWithRetry(service, vmSetName, vmInstanceIDs) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName) - } - } + err = ss.updateVMSSInstances(service, vmSetName, vmInstanceIDs) if err != nil { return err } @@ -833,9 +860,9 @@ func (ss *scaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac // ensureScaleSetBackendPoolDeleted ensures the loadBalancer backendAddressPools deleted from the specified scaleset. func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID, ssName string) error { klog.V(3).Infof("ensuring backend pool %q deleted from scaleset %q", poolID, ssName) - virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, ssName) + virtualMachineScaleSet, exists, err := ss.getScaleSet(service, ssName) if err != nil { - klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSetWithRetry(%s) failed: %v", poolID, ssName, ssName, err) + klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSet(%s) failed: %v", poolID, ssName, ssName, err) return err } if !exists { @@ -879,18 +906,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID // Update scale set with backoff. primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating", ssName) - ctx, cancel := getContextWithCancel() - defer cancel() - resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, ssName, virtualMachineScaleSet) - klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName) - if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err) - retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName) - } - } + err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet) if err != nil { return err } @@ -900,18 +916,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{ InstanceIds: &instanceIDs, } - instanceCtx, instanceCancel := getContextWithCancel() - defer instanceCancel() - instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(instanceCtx, ss.ResourceGroup, ssName, vmInstanceIDs) - klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%q): end", ssName) - if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) { - klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances scale set (%s) - updating, err=%v", ssName, err) - retryErr := ss.updateVMSSInstancesWithRetry(service, ssName, vmInstanceIDs) - if retryErr != nil { - err = retryErr - klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances abort backoff: scale set (%s) - updating", ssName) - } - } + err = ss.updateVMSSInstances(service, ssName, vmInstanceIDs) if err != nil { return err } @@ -919,17 +924,9 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID // Update virtualMachineScaleSet again. This is a workaround for removing VMSS reference from LB. // TODO: remove this workaround when figuring out the root cause. if len(newBackendPools) == 0 { - updateCtx, updateCancel := getContextWithCancel() - defer updateCancel() - klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating second time", ssName) - resp, err = ss.VirtualMachineScaleSetsClient.CreateOrUpdate(updateCtx, ss.ResourceGroup, ssName, virtualMachineScaleSet) - klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName) - if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) { - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err) - retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet) - if retryErr != nil { - klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName) - } + err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet) + if err != nil { + klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName) } } diff --git a/pkg/cloudprovider/providers/azure/azure_vmss_cache.go b/pkg/cloudprovider/providers/azure/azure_vmss_cache.go index a9a46ba703b..534ba9d445b 100644 --- a/pkg/cloudprovider/providers/azure/azure_vmss_cache.go +++ b/pkg/cloudprovider/providers/azure/azure_vmss_cache.go @@ -132,7 +132,7 @@ func (ss *scaleSet) newAvailabilitySetNodesCache() (*timedCache, error) { } for _, resourceGroup := range resourceGroups.List() { - vmList, err := ss.Cloud.VirtualMachineClientListWithRetry(resourceGroup) + vmList, err := ss.Cloud.ListVirtualMachines(resourceGroup) if err != nil { return nil, err }