Use refactored backoffs in other places

This commit is contained in:
Pengfei Ni 2018-11-19 16:17:13 +08:00
parent 994a2cc4cd
commit 3ef7ef8c6e
8 changed files with 150 additions and 147 deletions

View File

@ -50,7 +50,6 @@ func TestShouldRetryHTTPRequest(t *testing.T) {
expected: false,
},
}
for _, test := range tests {
resp := &http.Response{
StatusCode: test.code,
@ -85,7 +84,7 @@ func TestIsSuccessResponse(t *testing.T) {
resp := http.Response{
StatusCode: test.code,
}
res := isSuccessHTTPResponse(resp)
res := isSuccessHTTPResponse(&resp)
if res != test.expected {
t.Errorf("expected: %v, saw: %v", test.expected, res)
}

View File

@ -145,6 +145,10 @@ type azClientConfig struct {
//Details: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits
rateLimiterReader flowcontrol.RateLimiter
rateLimiterWriter flowcontrol.RateLimiter
CloudProviderBackoffRetries int
CloudProviderBackoffDuration int
ShouldOmitCloudProviderBackoff bool
}
// azVirtualMachinesClient implements VirtualMachinesClient.
@ -163,6 +167,10 @@ func newAzVirtualMachinesClient(config *azClientConfig) *azVirtualMachinesClient
virtualMachinesClient.BaseURI = config.resourceManagerEndpoint
virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachinesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachinesClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachinesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachinesClient.Client)
return &azVirtualMachinesClient{
@ -254,6 +262,10 @@ func newAzInterfacesClient(config *azClientConfig) *azInterfacesClient {
interfacesClient.BaseURI = config.resourceManagerEndpoint
interfacesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
interfacesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
interfacesClient.RetryAttempts = config.CloudProviderBackoffRetries
interfacesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&interfacesClient.Client)
return &azInterfacesClient{
@ -333,6 +345,10 @@ func newAzLoadBalancersClient(config *azClientConfig) *azLoadBalancersClient {
loadBalancerClient.BaseURI = config.resourceManagerEndpoint
loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
loadBalancerClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
loadBalancerClient.RetryAttempts = config.CloudProviderBackoffRetries
loadBalancerClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&loadBalancerClient.Client)
return &azLoadBalancersClient{
@ -449,6 +465,10 @@ func newAzPublicIPAddressesClient(config *azClientConfig) *azPublicIPAddressesCl
publicIPAddressClient.BaseURI = config.resourceManagerEndpoint
publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
publicIPAddressClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
publicIPAddressClient.RetryAttempts = config.CloudProviderBackoffRetries
publicIPAddressClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&publicIPAddressClient.Client)
return &azPublicIPAddressesClient{
@ -564,6 +584,10 @@ func newAzSubnetsClient(config *azClientConfig) *azSubnetsClient {
subnetsClient.BaseURI = config.resourceManagerEndpoint
subnetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
subnetsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
subnetsClient.RetryAttempts = config.CloudProviderBackoffRetries
subnetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&subnetsClient.Client)
return &azSubnetsClient{
@ -679,6 +703,10 @@ func newAzSecurityGroupsClient(config *azClientConfig) *azSecurityGroupsClient {
securityGroupsClient.BaseURI = config.resourceManagerEndpoint
securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
securityGroupsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
securityGroupsClient.RetryAttempts = config.CloudProviderBackoffRetries
securityGroupsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&securityGroupsClient.Client)
return &azSecurityGroupsClient{
@ -794,6 +822,10 @@ func newAzVirtualMachineScaleSetsClient(config *azClientConfig) *azVirtualMachin
virtualMachineScaleSetsClient.BaseURI = config.resourceManagerEndpoint
virtualMachineScaleSetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachineScaleSetsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachineScaleSetsClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachineScaleSetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachineScaleSetsClient.Client)
return &azVirtualMachineScaleSetsClient{
@ -910,6 +942,10 @@ func newAzVirtualMachineScaleSetVMsClient(config *azClientConfig) *azVirtualMach
virtualMachineScaleSetVMsClient.BaseURI = config.resourceManagerEndpoint
virtualMachineScaleSetVMsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachineScaleSetVMsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachineScaleSetVMsClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachineScaleSetVMsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachineScaleSetVMsClient.Client)
return &azVirtualMachineScaleSetVMsClient{
@ -1018,6 +1054,10 @@ func newAzRoutesClient(config *azClientConfig) *azRoutesClient {
routesClient.BaseURI = config.resourceManagerEndpoint
routesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
routesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
routesClient.RetryAttempts = config.CloudProviderBackoffRetries
routesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&routesClient.Client)
return &azRoutesClient{
@ -1087,6 +1127,10 @@ func newAzRouteTablesClient(config *azClientConfig) *azRouteTablesClient {
routeTablesClient.BaseURI = config.resourceManagerEndpoint
routeTablesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
routeTablesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
routeTablesClient.RetryAttempts = config.CloudProviderBackoffRetries
routeTablesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&routeTablesClient.Client)
return &azRouteTablesClient{
@ -1148,6 +1192,10 @@ func newAzStorageAccountClient(config *azClientConfig) *azStorageAccountClient {
storageAccountClient := storage.NewAccountsClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID)
storageAccountClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
storageAccountClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
storageAccountClient.RetryAttempts = config.CloudProviderBackoffRetries
storageAccountClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&storageAccountClient.Client)
return &azStorageAccountClient{
@ -1259,6 +1307,10 @@ func newAzDisksClient(config *azClientConfig) *azDisksClient {
disksClient := compute.NewDisksClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID)
disksClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
disksClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
disksClient.RetryAttempts = config.CloudProviderBackoffRetries
disksClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&disksClient.Client)
return &azDisksClient{
@ -1345,6 +1397,10 @@ func newAzVirtualMachineSizesClient(config *azClientConfig) *azVirtualMachineSiz
VirtualMachineSizesClient.BaseURI = config.resourceManagerEndpoint
VirtualMachineSizesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
VirtualMachineSizesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
VirtualMachineSizesClient.RetryAttempts = config.CloudProviderBackoffRetries
VirtualMachineSizesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&VirtualMachineSizesClient.Client)
return &azVirtualMachineSizesClient{

View File

@ -48,7 +48,7 @@ func (az *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.N
}
addressGetter := func(nodeName types.NodeName) ([]v1.NodeAddress, error) {
ip, publicIP, err := az.GetIPForMachineWithRetry(nodeName)
ip, publicIP, err := az.getIPForMachine(nodeName)
if err != nil {
klog.V(2).Infof("NodeAddresses(%s) abort backoff: %v", nodeName, err)
return nil, err

View File

@ -27,11 +27,11 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
serviceapi "k8s.io/kubernetes/pkg/api/v1/service"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to"
"k8s.io/klog"
)
const (
@ -210,7 +210,7 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string,
primaryVMSetName := az.vmSet.GetPrimaryVMSetName()
defaultLBName := az.getAzureLoadBalancerName(clusterName, primaryVMSetName, isInternal)
existingLBs, err := az.ListLBWithRetry(service)
existingLBs, err := az.ListLB(service)
if err != nil {
return nil, nil, false, err
}
@ -387,7 +387,7 @@ func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service)
pipResourceGroup := az.getPublicIPAddressResourceGroup(service)
pips, err := az.ListPIPWithRetry(service, pipResourceGroup)
pips, err := az.ListPIP(service, pipResourceGroup)
if err != nil {
return "", err
}
@ -474,13 +474,13 @@ func (az *Cloud) ensurePublicIPExists(service *v1.Service, pipName string, domai
}
klog.V(2).Infof("ensurePublicIPExists for service(%s): pip(%s) - creating", serviceName, *pip.Name)
klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): start", pipResourceGroup, *pip.Name)
err = az.CreateOrUpdatePIPWithRetry(service, pipResourceGroup, pip)
klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): start", pipResourceGroup, *pip.Name)
err = az.CreateOrUpdatePIP(service, pipResourceGroup, pip)
if err != nil {
klog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name)
return nil, err
}
klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): end", pipResourceGroup, *pip.Name)
klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): end", pipResourceGroup, *pip.Name)
ctx, cancel := getContextWithCancel()
defer cancel()
@ -818,16 +818,16 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service,
klog.V(10).Infof("EnsureBackendPoolDeleted(%s, %s): end", lbBackendPoolID, vmSetName)
// Remove the LB.
klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLBWithRetry(%q): start", lbName)
err = az.DeleteLBWithRetry(service, lbName)
klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLB(%q): start", lbName)
err = az.DeleteLB(service, lbName)
if err != nil {
klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - deleting; no remaining frontendIPConfigurations", serviceName, lbName)
return nil, err
}
klog.V(10).Infof("az.DeleteLBWithRetry(%q): end", lbName)
klog.V(10).Infof("az.DeleteLB(%q): end", lbName)
} else {
klog.V(2).Infof("reconcileLoadBalancer: reconcileLoadBalancer for service(%s): lb(%s) - updating", serviceName, lbName)
err := az.CreateOrUpdateLBWithRetry(service, *lb)
err := az.CreateOrUpdateLB(service, *lb)
if err != nil {
klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - updating", serviceName, lbName)
return nil, err
@ -1143,8 +1143,8 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
if dirtySg {
sg.SecurityRules = &updatedRules
klog.V(2).Infof("reconcileSecurityGroup for service(%s): sg(%s) - updating", serviceName, *sg.Name)
klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): start", *sg.Name)
err := az.CreateOrUpdateSGWithRetry(service, sg)
klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): start", *sg.Name)
err := az.CreateOrUpdateSecurityGroup(service, sg)
if err != nil {
klog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
// TODO (Nov 2017): remove when augmented security rules are out of preview
@ -1157,7 +1157,7 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
// END TODO
return nil, err
}
klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name)
klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): end", *sg.Name)
}
return &sg, nil
}
@ -1315,7 +1315,7 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lb *
pipResourceGroup := az.getPublicIPAddressResourceGroup(service)
pips, err := az.ListPIPWithRetry(service, pipResourceGroup)
pips, err := az.ListPIP(service, pipResourceGroup)
if err != nil {
return nil, err
}
@ -1414,7 +1414,7 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string
// Update load balancer when frontendIPConfigUpdated or loadBalancerRuleUpdated.
if frontendIPConfigUpdated || loadBalancerRuleUpdated {
err := az.CreateOrUpdateLBWithRetry(service, *lb)
err := az.CreateOrUpdateLB(service, *lb)
if err != nil {
klog.Errorf("safeDeletePublicIP for service(%s) failed with error: %v", getServiceName(service), err)
return err
@ -1423,14 +1423,14 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string
}
pipName := to.String(pip.Name)
klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): start", pipResourceGroup, pipName)
err := az.DeletePublicIPWithRetry(service, pipResourceGroup, pipName)
klog.V(10).Infof("DeletePublicIP(%s, %q): start", pipResourceGroup, pipName)
err := az.DeletePublicIP(service, pipResourceGroup, pipName)
if err != nil {
if err = ignoreStatusNotFoundFromError(err); err != nil {
return err
}
}
klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): end", pipResourceGroup, pipName)
klog.V(10).Infof("DeletePublicIP(%s, %q): end", pipResourceGroup, pipName)
return nil
}

View File

@ -20,11 +20,11 @@ import (
"context"
"fmt"
cloudprovider "k8s.io/cloud-provider"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to"
"k8s.io/apimachinery/pkg/types"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
)
@ -104,18 +104,7 @@ func (az *Cloud) createRouteTable() error {
}
klog.V(3).Infof("createRouteTableIfNotExists: creating routetable. routeTableName=%q", az.RouteTableName)
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable)
klog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("createRouteTableIfNotExists backing off: creating routetable. routeTableName=%q", az.RouteTableName)
retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("createRouteTableIfNotExists abort backoff: creating routetable. routeTableName=%q", az.RouteTableName)
}
}
err := az.CreateOrUpdateRouteTable(routeTable)
if err != nil {
return err
}
@ -163,18 +152,7 @@ func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint s
}
klog.V(3).Infof("CreateRoute: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route)
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("CreateRoute backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.CreateOrUpdateRouteWithRetry(route)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("CreateRoute abort backoff: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
err = az.CreateOrUpdateRoute(route)
if err != nil {
return err
}
@ -202,20 +180,8 @@ func (az *Cloud) DeleteRoute(ctx context.Context, clusterName string, kubeRoute
klog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
ctx, cancel := getContextWithCancel()
defer cancel()
routeName := mapNodeNameToRouteName(kubeRoute.TargetNode)
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName)
klog.V(10).Infof("RoutesClient.Delete(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("DeleteRoute backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.DeleteRouteWithRetry(routeName)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("DeleteRoute abort backoff: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
err = az.DeleteRouteWithName(routeName)
if err != nil {
return err
}

View File

@ -26,16 +26,16 @@ import (
"strconv"
"strings"
"k8s.io/api/core/v1"
cloudprovider "k8s.io/cloud-provider"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
)
@ -294,10 +294,6 @@ outer:
return -1, fmt.Errorf("securityGroup priorities are exhausted")
}
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) {
return az.vmSet.GetIPByNodeName(string(nodeName))
}
var polyTable = crc32.MakeTable(crc32.Koopman)
//MakeCRC32 : convert string to CRC32 format
@ -460,9 +456,9 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
// getAgentPoolAvailabiliySets lists the virtual machines for the resource group and then builds
// a list of availability sets that match the nodes available to k8s.
func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) {
vms, err := as.VirtualMachineClientListWithRetry(as.ResourceGroup)
vms, err := as.ListVirtualMachines(as.ResourceGroup)
if err != nil {
klog.Errorf("as.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err)
klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err)
return nil, err
}
vmNameToAvailabilitySetID := make(map[string]string, len(vms))
@ -695,18 +691,7 @@ func (as *availabilitySet) ensureHostInPool(service *v1.Service, nodeName types.
nicName := *nic.Name
klog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName)
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := as.InterfacesClient.CreateOrUpdate(ctx, as.ResourceGroup, *nic.Name, nic)
klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%q): end", *nic.Name)
if as.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating, err=%v", serviceName, nicName, err)
retryErr := as.CreateOrUpdateInterfaceWithRetry(service, nic)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("nicupdate(%s) abort backoff: nic(%s) - updating", serviceName, nicName)
}
}
err := as.CreateOrUpdateInterface(service, nic)
if err != nil {
return err
}

View File

@ -27,13 +27,13 @@ import (
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to"
"k8s.io/klog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
)
var (
@ -563,6 +563,30 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
return nic, nil
}
// getScaleSet gets scale set with exponential backoff retry
func (ss *scaleSet) getScaleSet(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) {
if ss.Config.shouldOmitCloudProviderBackoff() {
var result compute.VirtualMachineScaleSet
var exists bool
cached, err := ss.vmssCache.Get(name)
if err != nil {
ss.Event(service, v1.EventTypeWarning, "GetVirtualMachineScaleSet", err.Error())
klog.Errorf("backoff: failure for scale set %q, will retry,err=%v", name, err)
return result, false, nil
}
if cached != nil {
exists = true
result = *(cached.(*compute.VirtualMachineScaleSet))
}
return result, exists, err
}
return ss.getScaleSetWithRetry(service, name)
}
// getScaleSetWithRetry gets scale set with exponential backoff retry
func (ss *scaleSet) getScaleSetWithRetry(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) {
var result compute.VirtualMachineScaleSet
@ -621,6 +645,19 @@ func (ss *scaleSet) getPrimaryIPConfigForScaleSet(config *compute.VirtualMachine
return nil, fmt.Errorf("failed to find a primary IP configuration for the scale set %q", scaleSetName)
}
// createOrUpdateVMSS invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry.
func (ss *scaleSet) createOrUpdateVMSS(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error {
if ss.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, *virtualMachineScaleSet.Name, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%s): end", *virtualMachineScaleSet.Name)
return ss.processHTTPResponse(service, "CreateOrUpdateVMSS", resp, err)
}
return ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
}
// createOrUpdateVMSSWithRetry invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry.
func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error {
return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) {
@ -632,6 +669,19 @@ func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMach
})
}
// updateVMSSInstances invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry.
func (ss *scaleSet) updateVMSSInstances(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error {
if ss.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, scaleSetName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%s): end", scaleSetName)
return ss.processHTTPResponse(service, "CreateOrUpdateVMSSInstance", resp, err)
}
return ss.updateVMSSInstancesWithRetry(service, scaleSetName, vmInstanceIDs)
}
// updateVMSSInstancesWithRetry invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry.
func (ss *scaleSet) updateVMSSInstancesWithRetry(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error {
return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) {
@ -687,9 +737,9 @@ func (ss *scaleSet) getNodesScaleSets(nodes []*v1.Node) (map[string]sets.String,
func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID string, vmSetName string, instanceIDs []string, isInternal bool) error {
klog.V(3).Infof("ensuring hosts %q of scaleset %q in LB backendpool %q", instanceIDs, vmSetName, backendPoolID)
serviceName := getServiceName(service)
virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, vmSetName)
virtualMachineScaleSet, exists, err := ss.getScaleSet(service, vmSetName)
if err != nil {
klog.Errorf("ss.getScaleSetWithRetry(%s) for service %q failed: %v", vmSetName, serviceName, err)
klog.Errorf("ss.getScaleSet(%s) for service %q failed: %v", vmSetName, serviceName, err)
return err
}
if !exists {
@ -748,19 +798,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st
})
primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools
ctx, cancel := getContextWithCancel()
defer cancel()
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating", serviceName, vmSetName)
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, vmSetName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName)
}
}
err := ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
if err != nil {
return err
}
@ -770,18 +808,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st
vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{
InstanceIds: &instanceIDs,
}
ctx, cancel := getContextWithCancel()
defer cancel()
instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, vmSetName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err)
retryErr := ss.updateVMSSInstancesWithRetry(service, vmSetName, vmInstanceIDs)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName)
}
}
err = ss.updateVMSSInstances(service, vmSetName, vmInstanceIDs)
if err != nil {
return err
}
@ -833,9 +860,9 @@ func (ss *scaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac
// ensureScaleSetBackendPoolDeleted ensures the loadBalancer backendAddressPools deleted from the specified scaleset.
func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID, ssName string) error {
klog.V(3).Infof("ensuring backend pool %q deleted from scaleset %q", poolID, ssName)
virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, ssName)
virtualMachineScaleSet, exists, err := ss.getScaleSet(service, ssName)
if err != nil {
klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSetWithRetry(%s) failed: %v", poolID, ssName, ssName, err)
klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSet(%s) failed: %v", poolID, ssName, ssName, err)
return err
}
if !exists {
@ -879,18 +906,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
// Update scale set with backoff.
primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating", ssName)
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, ssName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName)
}
}
err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
if err != nil {
return err
}
@ -900,18 +916,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{
InstanceIds: &instanceIDs,
}
instanceCtx, instanceCancel := getContextWithCancel()
defer instanceCancel()
instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(instanceCtx, ss.ResourceGroup, ssName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.updateVMSSInstancesWithRetry(service, ssName, vmInstanceIDs)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances abort backoff: scale set (%s) - updating", ssName)
}
}
err = ss.updateVMSSInstances(service, ssName, vmInstanceIDs)
if err != nil {
return err
}
@ -919,17 +924,9 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
// Update virtualMachineScaleSet again. This is a workaround for removing VMSS reference from LB.
// TODO: remove this workaround when figuring out the root cause.
if len(newBackendPools) == 0 {
updateCtx, updateCancel := getContextWithCancel()
defer updateCancel()
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating second time", ssName)
resp, err = ss.VirtualMachineScaleSetsClient.CreateOrUpdate(updateCtx, ss.ResourceGroup, ssName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName)
}
err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
if err != nil {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName)
}
}

View File

@ -132,7 +132,7 @@ func (ss *scaleSet) newAvailabilitySetNodesCache() (*timedCache, error) {
}
for _, resourceGroup := range resourceGroups.List() {
vmList, err := ss.Cloud.VirtualMachineClientListWithRetry(resourceGroup)
vmList, err := ss.Cloud.ListVirtualMachines(resourceGroup)
if err != nil {
return nil, err
}