configurable backoff

- leveraging Config struct (—cloud-config) to store backoff and rate limit on/off and performance configuration
- added add’l error logging
- enabled backoff for vm GET requests
This commit is contained in:
Jack Francis 2017-06-05 16:06:50 -07:00
parent 7e6c689e58
commit 3f3aa279b9
9 changed files with 186 additions and 47 deletions

View File

@ -33,10 +33,20 @@ import (
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/ghodss/yaml"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/wait"
)
// CloudProviderName is the value used for the --cloud-provider flag
const CloudProviderName = "azure"
const (
// CloudProviderName is the value used for the --cloud-provider flag
CloudProviderName = "azure"
rateLimitQPSDefault = 1
rateLimitBucketDefault = 5
backoffRetriesDefault = 6
backoffExponentDefault = 1.5
backoffDurationDefault = 5 // in seconds
backoffJitterDefault = 1.0
)
// Config holds the configuration parsed from the --cloud-config flag
// All fields are required unless otherwise specified
@ -70,6 +80,22 @@ type Config struct {
AADClientID string `json:"aadClientId" yaml:"aadClientId"`
// The ClientSecret for an AAD application with RBAC access to talk to Azure RM APIs
AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"`
// Enable exponential backoff to manage resource request retries
CloudProviderBackoff bool `json:"cloudProviderBackoff" yaml:"cloudProviderBackoff"`
// Backoff retry limit
CloudProviderBackoffRetries int `json:"cloudProviderBackoffRetries" yaml:"cloudProviderBackoffRetries"`
// Backoff exponent
CloudProviderBackoffExponent float64 `json:"cloudProviderBackoffExponent" yaml:"cloudProviderBackoffExponent"`
// Backoff duration
CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration" yaml:"cloudProviderBackoffDuration"`
// Backoff jitter
CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter" yaml:"cloudProviderBackoffJitter"`
// Enable rate limiting
CloudProviderRateLimit bool `json:"cloudProviderRateLimit" yaml:"cloudProviderRateLimit"`
// Rate limit QPS
CloudProviderRateLimitQPS int `json:"cloudProviderRateLimitQPS" yaml:"cloudProviderRateLimitQPS"`
// Rate limit Bucket Size
CloudProviderRateLimitBucket int `json:"cloudProviderRateLimitBucket" yaml:"cloudProviderRateLimitBucket"`
}
// Cloud holds the config and clients
@ -86,6 +112,7 @@ type Cloud struct {
VirtualMachinesClient compute.VirtualMachinesClient
StorageAccountClient storage.AccountsClient
operationPollRateLimiter flowcontrol.RateLimiter
resourceRequestBackoff wait.Backoff
}
func init() {
@ -179,8 +206,53 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
az.StorageAccountClient = storage.NewAccountsClientWithBaseURI(az.Environment.ResourceManagerEndpoint, az.SubscriptionID)
az.StorageAccountClient.Authorizer = servicePrincipalToken
// 1 qps, up to 5 burst when in flowcontrol; i.e., aggressive backoff enforcement
az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(1, 5)
// Conditionally configure rate limits
if az.CloudProviderRateLimit {
// Assign rate limit defaults if no configuration was passed in
if az.CloudProviderRateLimitQPS == 0 {
az.CloudProviderRateLimitQPS = rateLimitQPSDefault
}
if az.CloudProviderRateLimitBucket == 0 {
az.CloudProviderRateLimitBucket = rateLimitBucketDefault
}
az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(
float32(az.CloudProviderRateLimitQPS),
az.CloudProviderRateLimitBucket)
glog.V(2).Infof("Azure cloudprovider using rate limits: QPS=%d, bucket=%d",
az.CloudProviderRateLimitQPS,
az.CloudProviderRateLimitBucket)
} else {
// if rate limits are configured off, az.operationPollRateLimiter.Accept() is a no-op
az.operationPollRateLimiter = flowcontrol.NewFakeAlwaysRateLimiter()
}
// Conditionally configure resource request backoff
if az.CloudProviderBackoff {
// Assign backoff defaults if no configuration was passed in
if az.CloudProviderBackoffRetries == 0 {
az.CloudProviderBackoffRetries = backoffRetriesDefault
}
if az.CloudProviderBackoffExponent == 0 {
az.CloudProviderBackoffExponent = backoffExponentDefault
}
if az.CloudProviderBackoffDuration == 0 {
az.CloudProviderBackoffDuration = backoffDurationDefault
}
if az.CloudProviderBackoffJitter == 0 {
az.CloudProviderBackoffJitter = backoffJitterDefault
}
az.resourceRequestBackoff = wait.Backoff{
Steps: az.CloudProviderBackoffRetries,
Factor: az.CloudProviderBackoffExponent,
Duration: time.Duration(az.CloudProviderBackoffDuration) * time.Second,
Jitter: az.CloudProviderBackoffJitter,
}
glog.V(2).Infof("Azure cloudprovider using retry backoff: retries=%d, exponent=%f, duration=%d, jitter=%f",
az.CloudProviderBackoffRetries,
az.CloudProviderBackoffExponent,
az.CloudProviderBackoffDuration,
az.CloudProviderBackoffJitter)
}
return &az, nil
}

View File

@ -17,35 +17,36 @@ limitations under the License.
package azure
import (
"time"
"k8s.io/apimachinery/pkg/util/wait"
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/Azure/go-autorest/autorest"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types"
)
const (
operationPollInterval = 3 * time.Second
operationPollTimeoutDuration = time.Hour
backoffRetries = 12
backoffExponent = 2
backoffDuration = 1 * time.Second
backoffJitter = 1.0
)
var azAPIBackoff = wait.Backoff{
Steps: backoffRetries,
Factor: backoffExponent,
Duration: backoffDuration,
Jitter: backoffJitter,
// GetVirtualMachineWithRetry invokes az.getVirtualMachine with exponential backoff retry
func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) {
var machine compute.VirtualMachine
var exists bool
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
var retryErr error
machine, exists, retryErr = az.getVirtualMachine(name)
if retryErr != nil {
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
return false, nil
}
glog.V(2).Infof("backoff: success")
return true, nil
})
return machine, exists, err
}
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
return processRetryResponse(resp, err)
@ -54,7 +55,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
return processRetryResponse(resp, err)
@ -63,7 +64,7 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
return processRetryResponse(resp, err)
@ -72,7 +73,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
return processRetryResponse(resp, err)
@ -81,7 +82,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
return processRetryResponse(resp, err)
@ -90,7 +91,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) DeleteLBWithRetry(lbName string) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
return processRetryResponse(resp, err)
@ -99,7 +100,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error {
// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
return processRetryResponse(resp, err)
@ -108,7 +109,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable
// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
return processRetryResponse(resp, err)
@ -117,7 +118,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
return processRetryResponse(resp, err)
@ -126,7 +127,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
// CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error {
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
az.operationPollRateLimiter.Accept()
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
return processRetryResponse(resp, err)

View File

@ -24,6 +24,7 @@ import (
"k8s.io/kubernetes/pkg/cloudprovider"
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types"
)
@ -31,6 +32,7 @@ import (
func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
ip, err := az.getIPForMachine(name)
if err != nil {
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
return nil, err
}
@ -55,9 +57,22 @@ func (az *Cloud) ExternalID(name types.NodeName) (string, error) {
// InstanceID returns the cloud provider ID of the specified instance.
// Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound)
func (az *Cloud) InstanceID(name types.NodeName) (string, error) {
machine, exists, err := az.getVirtualMachine(name)
var machine compute.VirtualMachine
var exists bool
var err error
az.operationPollRateLimiter.Accept()
machine, exists, err = az.getVirtualMachine(name)
if err != nil {
return "", err
if az.CloudProviderBackoff {
glog.V(2).Infof("InstanceID(%s) backing off", name)
machine, exists, err = az.GetVirtualMachineWithRetry(name)
if err != nil {
glog.V(2).Infof("InstanceID(%s) abort backoff", name)
return "", err
}
} else {
return "", err
}
} else if !exists {
return "", cloudprovider.InstanceNotFound
}
@ -78,6 +93,7 @@ func (az *Cloud) InstanceTypeByProviderID(providerID string) (string, error) {
func (az *Cloud) InstanceType(name types.NodeName) (string, error) {
machine, exists, err := az.getVirtualMachine(name)
if err != nil {
glog.Errorf("error: az.InstanceType(%s), az.getVirtualMachine(%s) err=%v", name, name, err)
return "", err
} else if !exists {
return "", cloudprovider.InstanceNotFound
@ -102,6 +118,7 @@ func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error)
result, err := az.VirtualMachinesClient.List(az.ResourceGroup)
if err != nil {
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.List(%s), err=%v", az.ResourceGroup, err)
return nil, err
}
@ -112,6 +129,7 @@ func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error)
result, err = az.VirtualMachinesClient.ListAllNextResults(result)
if err != nil {
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.ListAllNextResults(%s), err=%v", result, err)
return nil, err
}

View File

@ -150,7 +150,7 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
sg.SecurityGroupPropertiesFormat.Subnets = nil
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
retryErr := az.CreateOrUpdateSGWithRetry(sg)
if retryErr != nil {
@ -228,7 +228,7 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
if !existsLb || lbNeedsUpdate {
glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName)
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName)
retryErr := az.CreateOrUpdateLBWithRetry(lb)
if retryErr != nil {
@ -327,7 +327,7 @@ func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Servi
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
sg.SecurityGroupPropertiesFormat.Subnets = nil
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *reconciledSg.Name, reconciledSg, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
retryErr := az.CreateOrUpdateSGWithRetry(reconciledSg)
if retryErr != nil {
@ -364,7 +364,7 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
if len(*lb.FrontendIPConfigurations) > 0 {
glog.V(3).Infof("delete(%s): lb(%s) - updating", serviceName, lbName)
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
retryErr := az.CreateOrUpdateLBWithRetry(lb)
if retryErr != nil {
@ -379,7 +379,7 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
glog.V(3).Infof("delete(%s): lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
resp, err := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
retryErr := az.DeleteLBWithRetry(lbName)
if retryErr != nil {
@ -433,7 +433,7 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name)
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - creating", serviceName, *pip.Name)
retryErr := az.CreateOrUpdatePIPWithRetry(pip)
if retryErr != nil {
@ -457,7 +457,7 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
func (az *Cloud) ensurePublicIPDeleted(serviceName, pipName string) error {
glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName)
resp, deleteErr := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
if shouldRetryAPIRequest(resp, deleteErr) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, deleteErr) {
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName)
retryErr := az.DeletePublicIPWithRetry(pipName)
if retryErr != nil {
@ -849,6 +849,7 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b
// participating in the specified LoadBalancer Backend Pool.
func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error {
vmName := mapNodeNameToVMName(nodeName)
az.operationPollRateLimiter.Accept()
machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
if err != nil {
return err
@ -906,8 +907,8 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b
glog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName)
resp, err := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating", serviceName, nicName)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating, err=%v", serviceName, nicName, err)
retryErr := az.CreateOrUpdateInterfaceWithRetry(nic)
if retryErr != nil {
err = retryErr

View File

@ -78,7 +78,7 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
glog.V(3).Infof("create: creating routetable. routeTableName=%q", az.RouteTableName)
resp, err := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create backing off: creating routetable. routeTableName=%q", az.RouteTableName)
retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable)
if retryErr != nil {
@ -113,7 +113,7 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
glog.V(3).Infof("create: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
resp, err := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.CreateOrUpdateRouteWithRetry(route)
if retryErr != nil {
@ -136,7 +136,7 @@ func (az *Cloud) DeleteRoute(clusterName string, kubeRoute *cloudprovider.Route)
routeName := mapNodeNameToRouteName(kubeRoute.TargetNode)
resp, err := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.DeleteRouteWithRetry(routeName)
if retryErr != nil {

View File

@ -66,7 +66,7 @@ func (az *Cloud) AttachDisk(diskName, diskURI string, nodeName types.NodeName, l
vmName := mapNodeNameToVMName(nodeName)
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
if retryErr != nil {
@ -146,7 +146,7 @@ func (az *Cloud) DetachDiskByName(diskName, diskURI string, nodeName types.NodeN
vmName := mapNodeNameToVMName(nodeName)
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
if shouldRetryAPIRequest(resp, err) {
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
if retryErr != nil {

View File

@ -591,7 +591,15 @@ func TestNewCloudFromJSON(t *testing.T) {
"securityGroupName": "--security-group-name--",
"vnetName": "--vnet-name--",
"routeTableName": "--route-table-name--",
"primaryAvailabilitySetName": "--primary-availability-set-name--"
"primaryAvailabilitySetName": "--primary-availability-set-name--",
"cloudProviderBackoff": true,
"cloudProviderBackoffRetries": 6,
"cloudProviderBackoffExponent": 1.5,
"cloudProviderBackoffDuration": 5,
"cloudProviderBackoffJitter": 1.0,
"cloudProviderRatelimit": true,
"cloudProviderRateLimitQPS": 1,
"cloudProviderRateLimitBucket": 5
}`
validateConfig(t, config)
}
@ -610,6 +618,14 @@ securityGroupName: --security-group-name--
vnetName: --vnet-name--
routeTableName: --route-table-name--
primaryAvailabilitySetName: --primary-availability-set-name--
cloudProviderBackoff: true
cloudProviderBackoffRetries: 6
cloudProviderBackoffExponent: 1.5
cloudProviderBackoffDuration: 5
cloudProviderBackoffJitter: 1.0
cloudProviderRatelimit: true
cloudProviderRateLimitQPS: 1
cloudProviderRateLimitBucket: 5
`
validateConfig(t, config)
}
@ -659,6 +675,30 @@ func validateConfig(t *testing.T, config string) {
if azureCloud.PrimaryAvailabilitySetName != "--primary-availability-set-name--" {
t.Errorf("got incorrect value for PrimaryAvailabilitySetName")
}
if azureCloud.CloudProviderBackoff != true {
t.Errorf("got incorrect value for CloudProviderBackoff")
}
if azureCloud.CloudProviderBackoffRetries != 6 {
t.Errorf("got incorrect value for CloudProviderBackoffRetries")
}
if azureCloud.CloudProviderBackoffExponent != 1.5 {
t.Errorf("got incorrect value for CloudProviderBackoffExponent")
}
if azureCloud.CloudProviderBackoffDuration != 5 {
t.Errorf("got incorrect value for CloudProviderBackoffDuration")
}
if azureCloud.CloudProviderBackoffJitter != 1.0 {
t.Errorf("got incorrect value for CloudProviderBackoffJitter")
}
if azureCloud.CloudProviderRateLimit != true {
t.Errorf("got incorrect value for CloudProviderRateLimit")
}
if azureCloud.CloudProviderRateLimitQPS != 1 {
t.Errorf("got incorrect value for CloudProviderRateLimitQPS")
}
if azureCloud.CloudProviderRateLimitBucket != 5 {
t.Errorf("got incorrect value for CloudProviderRateLimitBucket")
}
}
func TestDecodeInstanceInfo(t *testing.T) {

View File

@ -25,6 +25,7 @@ import (
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/types"
)
@ -242,26 +243,31 @@ func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) {
return "", cloudprovider.InstanceNotFound
}
if err != nil {
glog.Errorf("error: az.getIPForMachine(%s), az.getVirtualMachine(%s), err=%v", nodeName, nodeName, err)
return "", err
}
nicID, err := getPrimaryInterfaceID(machine)
if err != nil {
glog.Errorf("error: az.getIPForMachine(%s), getPrimaryInterfaceID(%s), err=%v", nodeName, machine, err)
return "", err
}
nicName, err := getLastSegment(nicID)
if err != nil {
glog.Errorf("error: az.getIPForMachine(%s), getLastSegment(%s), err=%v", nodeName, nicID, err)
return "", err
}
nic, err := az.InterfacesClient.Get(az.ResourceGroup, nicName, "")
if err != nil {
glog.Errorf("error: az.getIPForMachine(%s), az.InterfacesClient.Get(%s, %s, %s), err=%v", nodeName, az.ResourceGroup, nicName, "", err)
return "", err
}
ipConfig, err := getPrimaryIPConfig(nic)
if err != nil {
glog.Errorf("error: az.getIPForMachine(%s), getPrimaryIPConfig(%s), err=%v", nodeName, nic, err)
return "", err
}

View File

@ -43,6 +43,7 @@ func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualM
var realErr error
vmName := string(nodeName)
az.operationPollRateLimiter.Accept()
vm, err = az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
exists, realErr = checkResourceExistsFromError(err)