Refactor Azure backoff to support backoff modes

This commit is contained in:
Pengfei Ni 2018-11-19 16:16:47 +08:00
parent d5c3a959f4
commit 994a2cc4cd

View File

@ -17,19 +17,18 @@ limitations under the License.
package azure
import (
"context"
"fmt"
"net/http"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"k8s.io/klog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
)
// requestBackoff if backoff is disabled in cloud provider it
@ -43,7 +42,6 @@ func (az *Cloud) requestBackoff() (resourceRequestBackoff wait.Backoff) {
resourceRequestBackoff = wait.Backoff{
Steps: 1,
}
return resourceRequestBackoff
}
@ -73,12 +71,11 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
if err == wait.ErrWaitTimeout {
err = retryErr
}
return machine, err
}
// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) {
// ListVirtualMachinesWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) ListVirtualMachinesWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) {
allNodes := []compute.VirtualMachine{}
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
@ -101,12 +98,38 @@ func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]comp
return allNodes, err
}
// ListVirtualMachines invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) ListVirtualMachines(resourceGroup string) ([]compute.VirtualMachine, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allNodes, err := az.VirtualMachinesClient.List(ctx, resourceGroup)
if err != nil {
klog.Errorf("VirtualMachinesClient.List(%v) failure with err=%v", resourceGroup, err)
return nil, err
}
klog.V(2).Infof("VirtualMachinesClient.List(%v) success", resourceGroup)
return allNodes, nil
}
return az.ListVirtualMachinesWithRetry(resourceGroup)
}
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
return az.vmSet.GetIPByNodeName(string(nodeName))
}
return az.GetIPForMachineWithRetry(nodeName)
}
// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry
func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string, error) {
var ip, publicIP string
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
ip, publicIP, retryErr = az.getIPForMachine(name)
ip, publicIP, retryErr = az.vmSet.GetIPByNodeName(string(name))
if retryErr != nil {
klog.Errorf("GetIPForMachineWithRetry(%s): backoff failure, will retry,err=%v", name, retryErr)
return false, nil
@ -117,6 +140,28 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string,
return ip, publicIP, err
}
// CreateOrUpdateSecurityGroup invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSecurityGroup(service *v1.Service, sg network.SecurityGroup) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg)
klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.nsgCache.Delete(*sg.Name)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.CreateOrUpdateSGWithRetry(service, sg)
}
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.SecurityGroup) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -134,8 +179,30 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.Secur
})
}
// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error {
// CreateOrUpdateLB invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb)
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.lbCache.Delete(*lb.Name)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.createOrUpdateLBWithRetry(service, lb)
}
// createOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -151,8 +218,27 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadB
})
}
// ListLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) {
// ListLB invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) ListLB(service *v1.Service) ([]network.LoadBalancer, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allLBs, err := az.LoadBalancerClient.List(ctx, az.ResourceGroup)
if err != nil {
az.Event(service, v1.EventTypeWarning, "ListLoadBalancers", err.Error())
klog.Errorf("LoadBalancerClient.List(%v) failure with err=%v", az.ResourceGroup, err)
return nil, err
}
klog.V(2).Infof("LoadBalancerClient.List(%v) success", az.ResourceGroup)
return allLBs, nil
}
return az.listLBWithRetry(service)
}
// listLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) listLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) {
var allLBs []network.LoadBalancer
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -178,8 +264,27 @@ func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, e
return allLBs, nil
}
// ListPIPWithRetry list the PIP resources in the given resource group
func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) {
// ListPIP list the PIP resources in the given resource group
func (az *Cloud) ListPIP(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allPIPs, err := az.PublicIPAddressesClient.List(ctx, pipResourceGroup)
if err != nil {
az.Event(service, v1.EventTypeWarning, "ListPublicIPs", err.Error())
klog.Errorf("PublicIPAddressesClient.List(%v) failure with err=%v", pipResourceGroup, err)
return nil, err
}
klog.V(2).Infof("PublicIPAddressesClient.List(%v) success", pipResourceGroup)
return allPIPs, nil
}
return az.listPIPWithRetry(service, pipResourceGroup)
}
// listPIPWithRetry list the PIP resources in the given resource group
func (az *Cloud) listPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) {
var allPIPs []network.PublicIPAddress
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -205,8 +310,22 @@ func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string)
return allPIPs, nil
}
// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error {
// CreateOrUpdatePIP invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, *pip.Name, pip)
klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, *pip.Name)
return az.processHTTPResponse(service, "CreateOrUpdatePublicIPAddress", resp, err)
}
return az.createOrUpdatePIPWithRetry(service, pipResourceGroup, pip)
}
// createOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -217,8 +336,22 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGrou
})
}
// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error {
// CreateOrUpdateInterface invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateInterface(service *v1.Service, nic network.Interface) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.InterfacesClient.CreateOrUpdate(ctx, az.ResourceGroup, *nic.Name, nic)
klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): end", *nic.Name)
return az.processHTTPResponse(service, "CreateOrUpdateInterface", resp, err)
}
return az.createOrUpdateInterfaceWithRetry(service, nic)
}
// createOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -229,8 +362,21 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic netwo
})
}
// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error {
// DeletePublicIP invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) DeletePublicIP(service *v1.Service, pipResourceGroup string, pipName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.PublicIPAddressesClient.Delete(ctx, pipResourceGroup, pipName)
return az.processHTTPResponse(service, "DeletePublicIPAddress", resp, err)
}
return az.deletePublicIPWithRetry(service, pipResourceGroup, pipName)
}
// deletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) deletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -240,8 +386,29 @@ func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup s
})
}
// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error {
// DeleteLB invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) DeleteLB(service *v1.Service, lbName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.LoadBalancerClient.Delete(ctx, az.ResourceGroup, lbName)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.lbCache.Delete(lbName)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.deleteLBWithRetry(service, lbName)
}
// deleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) deleteLBWithRetry(service *v1.Service, lbName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -256,8 +423,21 @@ func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error {
})
}
// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
// CreateOrUpdateRouteTable invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.createOrUpdateRouteTableWithRetry(routeTable)
}
// createOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -267,8 +447,22 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable
})
}
// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
// CreateOrUpdateRoute invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRoute(route network.Route) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route)
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.createOrUpdateRouteWithRetry(route)
}
// createOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateRouteWithRetry(route network.Route) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
@ -279,14 +473,28 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
})
}
// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
// DeleteRouteWithName invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) DeleteRouteWithName(routeName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName)
klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.deleteRouteWithRetry(routeName)
}
// deleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
func (az *Cloud) deleteRouteWithRetry(routeName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName)
klog.V(10).Infof("RoutesClient.Delete(%s): end", az.RouteTableName)
klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName)
return az.processHTTPRetryResponse(nil, "", resp, err)
})
}
@ -303,21 +511,17 @@ func (az *Cloud) CreateOrUpdateVMWithRetry(resourceGroup, vmName string, newVM c
})
}
// UpdateVmssVMWithRetry invokes az.VirtualMachineScaleSetVMsClient.Update with exponential backoff retry
func (az *Cloud) UpdateVmssVMWithRetry(ctx context.Context, resourceGroupName string, VMScaleSetName string, instanceID string, parameters compute.VirtualMachineScaleSetVM) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
resp, err := az.VirtualMachineScaleSetVMsClient.Update(ctx, resourceGroupName, VMScaleSetName, instanceID, parameters)
klog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s,%s): end", VMScaleSetName, instanceID)
return az.processHTTPRetryResponse(nil, "", resp, err)
})
}
// isSuccessHTTPResponse determines if the response from an HTTP request suggests success
func isSuccessHTTPResponse(resp http.Response) bool {
func isSuccessHTTPResponse(resp *http.Response) bool {
if resp == nil {
return false
}
// HTTP 2xx suggests a successful response
if 199 < resp.StatusCode && resp.StatusCode < 300 {
return true
}
return false
}
@ -337,7 +541,7 @@ func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
}
func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, resp *http.Response, err error) (bool, error) {
if resp != nil && isSuccessHTTPResponse(*resp) {
if resp != nil && isSuccessHTTPResponse(resp) {
// HTTP 2xx suggests a successful response
return true, nil
}
@ -358,3 +562,24 @@ func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, re
// Fall-through: stop periodic backoff
return true, nil
}
func (az *Cloud) processHTTPResponse(service *v1.Service, reason string, resp *http.Response, err error) error {
if isSuccessHTTPResponse(resp) {
// HTTP 2xx suggests a successful response
return nil
}
if err != nil {
az.Event(service, v1.EventTypeWarning, reason, err.Error())
klog.Errorf("processHTTPRetryResponse failure with err: %v", err)
} else if resp != nil {
az.Event(service, v1.EventTypeWarning, reason, fmt.Sprintf("Azure HTTP response %d", resp.StatusCode))
klog.Errorf("processHTTPRetryResponse failure with HTTP response %q", resp.Status)
}
return err
}
func (cfg *Config) shouldOmitCloudProviderBackoff() bool {
return cfg.CloudProviderBackoffMode == backoffModeV2
}