From 994a2cc4cd612c89e7dc3bef55cf9199b57712de Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Mon, 19 Nov 2018 16:16:47 +0800 Subject: [PATCH] Refactor Azure backoff to support backoff modes --- .../providers/azure/azure_backoff.go | 303 +++++++++++++++--- 1 file changed, 264 insertions(+), 39 deletions(-) diff --git a/pkg/cloudprovider/providers/azure/azure_backoff.go b/pkg/cloudprovider/providers/azure/azure_backoff.go index c0a6f4a34ee..2771bef26b1 100644 --- a/pkg/cloudprovider/providers/azure/azure_backoff.go +++ b/pkg/cloudprovider/providers/azure/azure_backoff.go @@ -17,19 +17,18 @@ limitations under the License. package azure import ( - "context" "fmt" "net/http" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" - "k8s.io/klog" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog" ) // requestBackoff if backoff is disabled in cloud provider it @@ -43,7 +42,6 @@ func (az *Cloud) requestBackoff() (resourceRequestBackoff wait.Backoff) { resourceRequestBackoff = wait.Backoff{ Steps: 1, } - return resourceRequestBackoff } @@ -73,12 +71,11 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua if err == wait.ErrWaitTimeout { err = retryErr } - return machine, err } -// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry -func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) { +// ListVirtualMachinesWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry +func (az *Cloud) ListVirtualMachinesWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) { allNodes := []compute.VirtualMachine{} err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error @@ -101,12 +98,38 @@ func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]comp return allNodes, err } +// ListVirtualMachines invokes az.VirtualMachinesClient.List with exponential backoff retry +func (az *Cloud) ListVirtualMachines(resourceGroup string) ([]compute.VirtualMachine, error) { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + allNodes, err := az.VirtualMachinesClient.List(ctx, resourceGroup) + if err != nil { + klog.Errorf("VirtualMachinesClient.List(%v) failure with err=%v", resourceGroup, err) + return nil, err + } + klog.V(2).Infof("VirtualMachinesClient.List(%v) success", resourceGroup) + return allNodes, nil + } + + return az.ListVirtualMachinesWithRetry(resourceGroup) +} + +func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) { + if az.Config.shouldOmitCloudProviderBackoff() { + return az.vmSet.GetIPByNodeName(string(nodeName)) + } + + return az.GetIPForMachineWithRetry(nodeName) +} + // GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string, error) { var ip, publicIP string err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { var retryErr error - ip, publicIP, retryErr = az.getIPForMachine(name) + ip, publicIP, retryErr = az.vmSet.GetIPByNodeName(string(name)) if retryErr != nil { klog.Errorf("GetIPForMachineWithRetry(%s): backoff failure, will retry,err=%v", name, retryErr) return false, nil @@ -117,6 +140,28 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string, return ip, publicIP, err } +// CreateOrUpdateSecurityGroup invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdateSecurityGroup(service *v1.Service, sg network.SecurityGroup) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg) + klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name) + if err == nil { + if isSuccessHTTPResponse(resp) { + // Invalidate the cache right after updating + az.nsgCache.Delete(*sg.Name) + } else if resp != nil { + return fmt.Errorf("HTTP response %q", resp.Status) + } + } + return err + } + + return az.CreateOrUpdateSGWithRetry(service, sg) +} + // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.SecurityGroup) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { @@ -134,8 +179,30 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.Secur }) } -// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry -func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error { +// CreateOrUpdateLB invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb) + klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name) + if err == nil { + if isSuccessHTTPResponse(resp) { + // Invalidate the cache right after updating + az.lbCache.Delete(*lb.Name) + } else if resp != nil { + return fmt.Errorf("HTTP response %q", resp.Status) + } + } + return err + } + + return az.createOrUpdateLBWithRetry(service, lb) +} + +// createOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) createOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -151,8 +218,27 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadB }) } -// ListLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry -func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) { +// ListLB invokes az.LoadBalancerClient.List with exponential backoff retry +func (az *Cloud) ListLB(service *v1.Service) ([]network.LoadBalancer, error) { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + allLBs, err := az.LoadBalancerClient.List(ctx, az.ResourceGroup) + if err != nil { + az.Event(service, v1.EventTypeWarning, "ListLoadBalancers", err.Error()) + klog.Errorf("LoadBalancerClient.List(%v) failure with err=%v", az.ResourceGroup, err) + return nil, err + } + klog.V(2).Infof("LoadBalancerClient.List(%v) success", az.ResourceGroup) + return allLBs, nil + } + + return az.listLBWithRetry(service) +} + +// listLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry +func (az *Cloud) listLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) { var allLBs []network.LoadBalancer err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { @@ -178,8 +264,27 @@ func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, e return allLBs, nil } -// ListPIPWithRetry list the PIP resources in the given resource group -func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) { +// ListPIP list the PIP resources in the given resource group +func (az *Cloud) ListPIP(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + allPIPs, err := az.PublicIPAddressesClient.List(ctx, pipResourceGroup) + if err != nil { + az.Event(service, v1.EventTypeWarning, "ListPublicIPs", err.Error()) + klog.Errorf("PublicIPAddressesClient.List(%v) failure with err=%v", pipResourceGroup, err) + return nil, err + } + klog.V(2).Infof("PublicIPAddressesClient.List(%v) success", pipResourceGroup) + return allPIPs, nil + } + + return az.listPIPWithRetry(service, pipResourceGroup) +} + +// listPIPWithRetry list the PIP resources in the given resource group +func (az *Cloud) listPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) { var allPIPs []network.PublicIPAddress err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { @@ -205,8 +310,22 @@ func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string) return allPIPs, nil } -// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry -func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error { +// CreateOrUpdatePIP invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, *pip.Name, pip) + klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, *pip.Name) + return az.processHTTPResponse(service, "CreateOrUpdatePublicIPAddress", resp, err) + } + + return az.createOrUpdatePIPWithRetry(service, pipResourceGroup, pip) +} + +// createOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) createOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -217,8 +336,22 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGrou }) } -// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry -func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error { +// CreateOrUpdateInterface invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdateInterface(service *v1.Service, nic network.Interface) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.InterfacesClient.CreateOrUpdate(ctx, az.ResourceGroup, *nic.Name, nic) + klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): end", *nic.Name) + return az.processHTTPResponse(service, "CreateOrUpdateInterface", resp, err) + } + + return az.createOrUpdateInterfaceWithRetry(service, nic) +} + +// createOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) createOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -229,8 +362,21 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic netwo }) } -// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry -func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error { +// DeletePublicIP invokes az.PublicIPAddressesClient.Delete with exponential backoff retry +func (az *Cloud) DeletePublicIP(service *v1.Service, pipResourceGroup string, pipName string) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.PublicIPAddressesClient.Delete(ctx, pipResourceGroup, pipName) + return az.processHTTPResponse(service, "DeletePublicIPAddress", resp, err) + } + + return az.deletePublicIPWithRetry(service, pipResourceGroup, pipName) +} + +// deletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry +func (az *Cloud) deletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -240,8 +386,29 @@ func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup s }) } -// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry -func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error { +// DeleteLB invokes az.LoadBalancerClient.Delete with exponential backoff retry +func (az *Cloud) DeleteLB(service *v1.Service, lbName string) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.LoadBalancerClient.Delete(ctx, az.ResourceGroup, lbName) + if err == nil { + if isSuccessHTTPResponse(resp) { + // Invalidate the cache right after updating + az.lbCache.Delete(lbName) + } else if resp != nil { + return fmt.Errorf("HTTP response %q", resp.Status) + } + } + return err + } + + return az.deleteLBWithRetry(service, lbName) +} + +// deleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry +func (az *Cloud) deleteLBWithRetry(service *v1.Service, lbName string) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -256,8 +423,21 @@ func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error { }) } -// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry -func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { +// CreateOrUpdateRouteTable invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable) + return az.processHTTPResponse(nil, "", resp, err) + } + + return az.createOrUpdateRouteTableWithRetry(routeTable) +} + +// createOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) createOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -267,8 +447,22 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable }) } -// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry -func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { +// CreateOrUpdateRoute invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) CreateOrUpdateRoute(route network.Route) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route) + klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name) + return az.processHTTPResponse(nil, "", resp, err) + } + + return az.createOrUpdateRouteWithRetry(route) +} + +// createOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) createOrUpdateRouteWithRetry(route network.Route) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() @@ -279,14 +473,28 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { }) } -// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry -func (az *Cloud) DeleteRouteWithRetry(routeName string) error { +// DeleteRouteWithName invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry +func (az *Cloud) DeleteRouteWithName(routeName string) error { + if az.Config.shouldOmitCloudProviderBackoff() { + ctx, cancel := getContextWithCancel() + defer cancel() + + resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName) + klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName) + return az.processHTTPResponse(nil, "", resp, err) + } + + return az.deleteRouteWithRetry(routeName) +} + +// deleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry +func (az *Cloud) deleteRouteWithRetry(routeName string) error { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { ctx, cancel := getContextWithCancel() defer cancel() resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName) - klog.V(10).Infof("RoutesClient.Delete(%s): end", az.RouteTableName) + klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName) return az.processHTTPRetryResponse(nil, "", resp, err) }) } @@ -303,21 +511,17 @@ func (az *Cloud) CreateOrUpdateVMWithRetry(resourceGroup, vmName string, newVM c }) } -// UpdateVmssVMWithRetry invokes az.VirtualMachineScaleSetVMsClient.Update with exponential backoff retry -func (az *Cloud) UpdateVmssVMWithRetry(ctx context.Context, resourceGroupName string, VMScaleSetName string, instanceID string, parameters compute.VirtualMachineScaleSetVM) error { - return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { - resp, err := az.VirtualMachineScaleSetVMsClient.Update(ctx, resourceGroupName, VMScaleSetName, instanceID, parameters) - klog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s,%s): end", VMScaleSetName, instanceID) - return az.processHTTPRetryResponse(nil, "", resp, err) - }) -} - // isSuccessHTTPResponse determines if the response from an HTTP request suggests success -func isSuccessHTTPResponse(resp http.Response) bool { +func isSuccessHTTPResponse(resp *http.Response) bool { + if resp == nil { + return false + } + // HTTP 2xx suggests a successful response if 199 < resp.StatusCode && resp.StatusCode < 300 { return true } + return false } @@ -337,7 +541,7 @@ func shouldRetryHTTPRequest(resp *http.Response, err error) bool { } func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, resp *http.Response, err error) (bool, error) { - if resp != nil && isSuccessHTTPResponse(*resp) { + if resp != nil && isSuccessHTTPResponse(resp) { // HTTP 2xx suggests a successful response return true, nil } @@ -358,3 +562,24 @@ func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, re // Fall-through: stop periodic backoff return true, nil } + +func (az *Cloud) processHTTPResponse(service *v1.Service, reason string, resp *http.Response, err error) error { + if isSuccessHTTPResponse(resp) { + // HTTP 2xx suggests a successful response + return nil + } + + if err != nil { + az.Event(service, v1.EventTypeWarning, reason, err.Error()) + klog.Errorf("processHTTPRetryResponse failure with err: %v", err) + } else if resp != nil { + az.Event(service, v1.EventTypeWarning, reason, fmt.Sprintf("Azure HTTP response %d", resp.StatusCode)) + klog.Errorf("processHTTPRetryResponse failure with HTTP response %q", resp.Status) + } + + return err +} + +func (cfg *Config) shouldOmitCloudProviderBackoff() bool { + return cfg.CloudProviderBackoffMode == backoffModeV2 +}