mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
configurable backoff
- leveraging Config struct (—cloud-config) to store backoff and rate limit on/off and performance configuration - added add’l error logging - enabled backoff for vm GET requests
This commit is contained in:
parent
7e6c689e58
commit
3f3aa279b9
@ -33,10 +33,20 @@ import (
|
||||
"github.com/Azure/go-autorest/autorest"
|
||||
"github.com/Azure/go-autorest/autorest/azure"
|
||||
"github.com/ghodss/yaml"
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
)
|
||||
|
||||
// CloudProviderName is the value used for the --cloud-provider flag
|
||||
const CloudProviderName = "azure"
|
||||
const (
|
||||
// CloudProviderName is the value used for the --cloud-provider flag
|
||||
CloudProviderName = "azure"
|
||||
rateLimitQPSDefault = 1
|
||||
rateLimitBucketDefault = 5
|
||||
backoffRetriesDefault = 6
|
||||
backoffExponentDefault = 1.5
|
||||
backoffDurationDefault = 5 // in seconds
|
||||
backoffJitterDefault = 1.0
|
||||
)
|
||||
|
||||
// Config holds the configuration parsed from the --cloud-config flag
|
||||
// All fields are required unless otherwise specified
|
||||
@ -70,6 +80,22 @@ type Config struct {
|
||||
AADClientID string `json:"aadClientId" yaml:"aadClientId"`
|
||||
// The ClientSecret for an AAD application with RBAC access to talk to Azure RM APIs
|
||||
AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"`
|
||||
// Enable exponential backoff to manage resource request retries
|
||||
CloudProviderBackoff bool `json:"cloudProviderBackoff" yaml:"cloudProviderBackoff"`
|
||||
// Backoff retry limit
|
||||
CloudProviderBackoffRetries int `json:"cloudProviderBackoffRetries" yaml:"cloudProviderBackoffRetries"`
|
||||
// Backoff exponent
|
||||
CloudProviderBackoffExponent float64 `json:"cloudProviderBackoffExponent" yaml:"cloudProviderBackoffExponent"`
|
||||
// Backoff duration
|
||||
CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration" yaml:"cloudProviderBackoffDuration"`
|
||||
// Backoff jitter
|
||||
CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter" yaml:"cloudProviderBackoffJitter"`
|
||||
// Enable rate limiting
|
||||
CloudProviderRateLimit bool `json:"cloudProviderRateLimit" yaml:"cloudProviderRateLimit"`
|
||||
// Rate limit QPS
|
||||
CloudProviderRateLimitQPS int `json:"cloudProviderRateLimitQPS" yaml:"cloudProviderRateLimitQPS"`
|
||||
// Rate limit Bucket Size
|
||||
CloudProviderRateLimitBucket int `json:"cloudProviderRateLimitBucket" yaml:"cloudProviderRateLimitBucket"`
|
||||
}
|
||||
|
||||
// Cloud holds the config and clients
|
||||
@ -86,6 +112,7 @@ type Cloud struct {
|
||||
VirtualMachinesClient compute.VirtualMachinesClient
|
||||
StorageAccountClient storage.AccountsClient
|
||||
operationPollRateLimiter flowcontrol.RateLimiter
|
||||
resourceRequestBackoff wait.Backoff
|
||||
}
|
||||
|
||||
func init() {
|
||||
@ -179,8 +206,53 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
|
||||
az.StorageAccountClient = storage.NewAccountsClientWithBaseURI(az.Environment.ResourceManagerEndpoint, az.SubscriptionID)
|
||||
az.StorageAccountClient.Authorizer = servicePrincipalToken
|
||||
|
||||
// 1 qps, up to 5 burst when in flowcontrol; i.e., aggressive backoff enforcement
|
||||
az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(1, 5)
|
||||
// Conditionally configure rate limits
|
||||
if az.CloudProviderRateLimit {
|
||||
// Assign rate limit defaults if no configuration was passed in
|
||||
if az.CloudProviderRateLimitQPS == 0 {
|
||||
az.CloudProviderRateLimitQPS = rateLimitQPSDefault
|
||||
}
|
||||
if az.CloudProviderRateLimitBucket == 0 {
|
||||
az.CloudProviderRateLimitBucket = rateLimitBucketDefault
|
||||
}
|
||||
az.operationPollRateLimiter = flowcontrol.NewTokenBucketRateLimiter(
|
||||
float32(az.CloudProviderRateLimitQPS),
|
||||
az.CloudProviderRateLimitBucket)
|
||||
glog.V(2).Infof("Azure cloudprovider using rate limits: QPS=%d, bucket=%d",
|
||||
az.CloudProviderRateLimitQPS,
|
||||
az.CloudProviderRateLimitBucket)
|
||||
} else {
|
||||
// if rate limits are configured off, az.operationPollRateLimiter.Accept() is a no-op
|
||||
az.operationPollRateLimiter = flowcontrol.NewFakeAlwaysRateLimiter()
|
||||
}
|
||||
|
||||
// Conditionally configure resource request backoff
|
||||
if az.CloudProviderBackoff {
|
||||
// Assign backoff defaults if no configuration was passed in
|
||||
if az.CloudProviderBackoffRetries == 0 {
|
||||
az.CloudProviderBackoffRetries = backoffRetriesDefault
|
||||
}
|
||||
if az.CloudProviderBackoffExponent == 0 {
|
||||
az.CloudProviderBackoffExponent = backoffExponentDefault
|
||||
}
|
||||
if az.CloudProviderBackoffDuration == 0 {
|
||||
az.CloudProviderBackoffDuration = backoffDurationDefault
|
||||
}
|
||||
if az.CloudProviderBackoffJitter == 0 {
|
||||
az.CloudProviderBackoffJitter = backoffJitterDefault
|
||||
}
|
||||
az.resourceRequestBackoff = wait.Backoff{
|
||||
Steps: az.CloudProviderBackoffRetries,
|
||||
Factor: az.CloudProviderBackoffExponent,
|
||||
Duration: time.Duration(az.CloudProviderBackoffDuration) * time.Second,
|
||||
Jitter: az.CloudProviderBackoffJitter,
|
||||
}
|
||||
glog.V(2).Infof("Azure cloudprovider using retry backoff: retries=%d, exponent=%f, duration=%d, jitter=%f",
|
||||
az.CloudProviderBackoffRetries,
|
||||
az.CloudProviderBackoffExponent,
|
||||
az.CloudProviderBackoffDuration,
|
||||
az.CloudProviderBackoffJitter)
|
||||
}
|
||||
|
||||
return &az, nil
|
||||
}
|
||||
|
@ -17,35 +17,36 @@ limitations under the License.
|
||||
package azure
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/arm/network"
|
||||
"github.com/Azure/go-autorest/autorest"
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
const (
|
||||
operationPollInterval = 3 * time.Second
|
||||
operationPollTimeoutDuration = time.Hour
|
||||
backoffRetries = 12
|
||||
backoffExponent = 2
|
||||
backoffDuration = 1 * time.Second
|
||||
backoffJitter = 1.0
|
||||
)
|
||||
|
||||
var azAPIBackoff = wait.Backoff{
|
||||
Steps: backoffRetries,
|
||||
Factor: backoffExponent,
|
||||
Duration: backoffDuration,
|
||||
Jitter: backoffJitter,
|
||||
// GetVirtualMachineWithRetry invokes az.getVirtualMachine with exponential backoff retry
|
||||
func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) {
|
||||
var machine compute.VirtualMachine
|
||||
var exists bool
|
||||
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
var retryErr error
|
||||
machine, exists, retryErr = az.getVirtualMachine(name)
|
||||
if retryErr != nil {
|
||||
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
|
||||
return false, nil
|
||||
}
|
||||
glog.V(2).Infof("backoff: success")
|
||||
return true, nil
|
||||
})
|
||||
return machine, exists, err
|
||||
}
|
||||
|
||||
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -54,7 +55,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
|
||||
|
||||
// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -63,7 +64,7 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
|
||||
|
||||
// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -72,7 +73,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
|
||||
|
||||
// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -81,7 +82,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
|
||||
|
||||
// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
|
||||
func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -90,7 +91,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
|
||||
|
||||
// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
|
||||
func (az *Cloud) DeleteLBWithRetry(lbName string) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -99,7 +100,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error {
|
||||
|
||||
// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -108,7 +109,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable
|
||||
|
||||
// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -117,7 +118,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
|
||||
|
||||
// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
|
||||
func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
@ -126,7 +127,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
|
||||
|
||||
// CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry
|
||||
func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error {
|
||||
return wait.ExponentialBackoff(azAPIBackoff, func() (bool, error) {
|
||||
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
|
||||
az.operationPollRateLimiter.Accept()
|
||||
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
|
||||
return processRetryResponse(resp, err)
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
@ -31,6 +32,7 @@ import (
|
||||
func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
|
||||
ip, err := az.getIPForMachine(name)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -55,9 +57,22 @@ func (az *Cloud) ExternalID(name types.NodeName) (string, error) {
|
||||
// InstanceID returns the cloud provider ID of the specified instance.
|
||||
// Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound)
|
||||
func (az *Cloud) InstanceID(name types.NodeName) (string, error) {
|
||||
machine, exists, err := az.getVirtualMachine(name)
|
||||
var machine compute.VirtualMachine
|
||||
var exists bool
|
||||
var err error
|
||||
az.operationPollRateLimiter.Accept()
|
||||
machine, exists, err = az.getVirtualMachine(name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
if az.CloudProviderBackoff {
|
||||
glog.V(2).Infof("InstanceID(%s) backing off", name)
|
||||
machine, exists, err = az.GetVirtualMachineWithRetry(name)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("InstanceID(%s) abort backoff", name)
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
return "", err
|
||||
}
|
||||
} else if !exists {
|
||||
return "", cloudprovider.InstanceNotFound
|
||||
}
|
||||
@ -78,6 +93,7 @@ func (az *Cloud) InstanceTypeByProviderID(providerID string) (string, error) {
|
||||
func (az *Cloud) InstanceType(name types.NodeName) (string, error) {
|
||||
machine, exists, err := az.getVirtualMachine(name)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.InstanceType(%s), az.getVirtualMachine(%s) err=%v", name, name, err)
|
||||
return "", err
|
||||
} else if !exists {
|
||||
return "", cloudprovider.InstanceNotFound
|
||||
@ -102,6 +118,7 @@ func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error)
|
||||
|
||||
result, err := az.VirtualMachinesClient.List(az.ResourceGroup)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.List(%s), err=%v", az.ResourceGroup, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -112,6 +129,7 @@ func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error)
|
||||
|
||||
result, err = az.VirtualMachinesClient.ListAllNextResults(result)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.ListAllNextResults(%s), err=%v", result, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -150,7 +150,7 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
|
||||
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
|
||||
sg.SecurityGroupPropertiesFormat.Subnets = nil
|
||||
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
|
||||
retryErr := az.CreateOrUpdateSGWithRetry(sg)
|
||||
if retryErr != nil {
|
||||
@ -228,7 +228,7 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
|
||||
if !existsLb || lbNeedsUpdate {
|
||||
glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName)
|
||||
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName)
|
||||
retryErr := az.CreateOrUpdateLBWithRetry(lb)
|
||||
if retryErr != nil {
|
||||
@ -327,7 +327,7 @@ func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Servi
|
||||
sg.SecurityGroupPropertiesFormat.NetworkInterfaces = nil
|
||||
sg.SecurityGroupPropertiesFormat.Subnets = nil
|
||||
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *reconciledSg.Name, reconciledSg, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
|
||||
retryErr := az.CreateOrUpdateSGWithRetry(reconciledSg)
|
||||
if retryErr != nil {
|
||||
@ -364,7 +364,7 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
|
||||
if len(*lb.FrontendIPConfigurations) > 0 {
|
||||
glog.V(3).Infof("delete(%s): lb(%s) - updating", serviceName, lbName)
|
||||
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
|
||||
retryErr := az.CreateOrUpdateLBWithRetry(lb)
|
||||
if retryErr != nil {
|
||||
@ -379,7 +379,7 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
|
||||
glog.V(3).Infof("delete(%s): lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
|
||||
|
||||
resp, err := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
|
||||
retryErr := az.DeleteLBWithRetry(lbName)
|
||||
if retryErr != nil {
|
||||
@ -433,7 +433,7 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
|
||||
|
||||
glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name)
|
||||
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - creating", serviceName, *pip.Name)
|
||||
retryErr := az.CreateOrUpdatePIPWithRetry(pip)
|
||||
if retryErr != nil {
|
||||
@ -457,7 +457,7 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
|
||||
func (az *Cloud) ensurePublicIPDeleted(serviceName, pipName string) error {
|
||||
glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName)
|
||||
resp, deleteErr := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
|
||||
if shouldRetryAPIRequest(resp, deleteErr) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, deleteErr) {
|
||||
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName)
|
||||
retryErr := az.DeletePublicIPWithRetry(pipName)
|
||||
if retryErr != nil {
|
||||
@ -849,6 +849,7 @@ func findSecurityRule(rules []network.SecurityRule, rule network.SecurityRule) b
|
||||
// participating in the specified LoadBalancer Backend Pool.
|
||||
func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, backendPoolID string) error {
|
||||
vmName := mapNodeNameToVMName(nodeName)
|
||||
az.operationPollRateLimiter.Accept()
|
||||
machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
|
||||
if err != nil {
|
||||
return err
|
||||
@ -906,8 +907,8 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b
|
||||
|
||||
glog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName)
|
||||
resp, err := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating", serviceName, nicName)
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating, err=%v", serviceName, nicName, err)
|
||||
retryErr := az.CreateOrUpdateInterfaceWithRetry(nic)
|
||||
if retryErr != nil {
|
||||
err = retryErr
|
||||
|
@ -78,7 +78,7 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
|
||||
|
||||
glog.V(3).Infof("create: creating routetable. routeTableName=%q", az.RouteTableName)
|
||||
resp, err := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("create backing off: creating routetable. routeTableName=%q", az.RouteTableName)
|
||||
retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable)
|
||||
if retryErr != nil {
|
||||
@ -113,7 +113,7 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
|
||||
|
||||
glog.V(3).Infof("create: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
||||
resp, err := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("create backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
||||
retryErr := az.CreateOrUpdateRouteWithRetry(route)
|
||||
if retryErr != nil {
|
||||
@ -136,7 +136,7 @@ func (az *Cloud) DeleteRoute(clusterName string, kubeRoute *cloudprovider.Route)
|
||||
|
||||
routeName := mapNodeNameToRouteName(kubeRoute.TargetNode)
|
||||
resp, err := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("delete backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
||||
retryErr := az.DeleteRouteWithRetry(routeName)
|
||||
if retryErr != nil {
|
||||
|
@ -66,7 +66,7 @@ func (az *Cloud) AttachDisk(diskName, diskURI string, nodeName types.NodeName, l
|
||||
vmName := mapNodeNameToVMName(nodeName)
|
||||
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
|
||||
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
|
||||
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
|
||||
if retryErr != nil {
|
||||
@ -146,7 +146,7 @@ func (az *Cloud) DetachDiskByName(diskName, diskURI string, nodeName types.NodeN
|
||||
vmName := mapNodeNameToVMName(nodeName)
|
||||
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
|
||||
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
|
||||
if shouldRetryAPIRequest(resp, err) {
|
||||
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
|
||||
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
|
||||
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
|
||||
if retryErr != nil {
|
||||
|
@ -591,7 +591,15 @@ func TestNewCloudFromJSON(t *testing.T) {
|
||||
"securityGroupName": "--security-group-name--",
|
||||
"vnetName": "--vnet-name--",
|
||||
"routeTableName": "--route-table-name--",
|
||||
"primaryAvailabilitySetName": "--primary-availability-set-name--"
|
||||
"primaryAvailabilitySetName": "--primary-availability-set-name--",
|
||||
"cloudProviderBackoff": true,
|
||||
"cloudProviderBackoffRetries": 6,
|
||||
"cloudProviderBackoffExponent": 1.5,
|
||||
"cloudProviderBackoffDuration": 5,
|
||||
"cloudProviderBackoffJitter": 1.0,
|
||||
"cloudProviderRatelimit": true,
|
||||
"cloudProviderRateLimitQPS": 1,
|
||||
"cloudProviderRateLimitBucket": 5
|
||||
}`
|
||||
validateConfig(t, config)
|
||||
}
|
||||
@ -610,6 +618,14 @@ securityGroupName: --security-group-name--
|
||||
vnetName: --vnet-name--
|
||||
routeTableName: --route-table-name--
|
||||
primaryAvailabilitySetName: --primary-availability-set-name--
|
||||
cloudProviderBackoff: true
|
||||
cloudProviderBackoffRetries: 6
|
||||
cloudProviderBackoffExponent: 1.5
|
||||
cloudProviderBackoffDuration: 5
|
||||
cloudProviderBackoffJitter: 1.0
|
||||
cloudProviderRatelimit: true
|
||||
cloudProviderRateLimitQPS: 1
|
||||
cloudProviderRateLimitBucket: 5
|
||||
`
|
||||
validateConfig(t, config)
|
||||
}
|
||||
@ -659,6 +675,30 @@ func validateConfig(t *testing.T, config string) {
|
||||
if azureCloud.PrimaryAvailabilitySetName != "--primary-availability-set-name--" {
|
||||
t.Errorf("got incorrect value for PrimaryAvailabilitySetName")
|
||||
}
|
||||
if azureCloud.CloudProviderBackoff != true {
|
||||
t.Errorf("got incorrect value for CloudProviderBackoff")
|
||||
}
|
||||
if azureCloud.CloudProviderBackoffRetries != 6 {
|
||||
t.Errorf("got incorrect value for CloudProviderBackoffRetries")
|
||||
}
|
||||
if azureCloud.CloudProviderBackoffExponent != 1.5 {
|
||||
t.Errorf("got incorrect value for CloudProviderBackoffExponent")
|
||||
}
|
||||
if azureCloud.CloudProviderBackoffDuration != 5 {
|
||||
t.Errorf("got incorrect value for CloudProviderBackoffDuration")
|
||||
}
|
||||
if azureCloud.CloudProviderBackoffJitter != 1.0 {
|
||||
t.Errorf("got incorrect value for CloudProviderBackoffJitter")
|
||||
}
|
||||
if azureCloud.CloudProviderRateLimit != true {
|
||||
t.Errorf("got incorrect value for CloudProviderRateLimit")
|
||||
}
|
||||
if azureCloud.CloudProviderRateLimitQPS != 1 {
|
||||
t.Errorf("got incorrect value for CloudProviderRateLimitQPS")
|
||||
}
|
||||
if azureCloud.CloudProviderRateLimitBucket != 5 {
|
||||
t.Errorf("got incorrect value for CloudProviderRateLimitBucket")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeInstanceInfo(t *testing.T) {
|
||||
|
@ -25,6 +25,7 @@ import (
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/arm/network"
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
@ -242,26 +243,31 @@ func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, error) {
|
||||
return "", cloudprovider.InstanceNotFound
|
||||
}
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.getIPForMachine(%s), az.getVirtualMachine(%s), err=%v", nodeName, nodeName, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
nicID, err := getPrimaryInterfaceID(machine)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.getIPForMachine(%s), getPrimaryInterfaceID(%s), err=%v", nodeName, machine, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
nicName, err := getLastSegment(nicID)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.getIPForMachine(%s), getLastSegment(%s), err=%v", nodeName, nicID, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
nic, err := az.InterfacesClient.Get(az.ResourceGroup, nicName, "")
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.getIPForMachine(%s), az.InterfacesClient.Get(%s, %s, %s), err=%v", nodeName, az.ResourceGroup, nicName, "", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
ipConfig, err := getPrimaryIPConfig(nic)
|
||||
if err != nil {
|
||||
glog.Errorf("error: az.getIPForMachine(%s), getPrimaryIPConfig(%s), err=%v", nodeName, nic, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualM
|
||||
var realErr error
|
||||
|
||||
vmName := string(nodeName)
|
||||
az.operationPollRateLimiter.Accept()
|
||||
vm, err = az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "")
|
||||
|
||||
exists, realErr = checkResourceExistsFromError(err)
|
||||
|
Loading…
Reference in New Issue
Block a user