clean up retry logic, since we try at least once

This commit is contained in:
Jingtao Ren 2017-11-16 15:04:08 -08:00
parent c3050e3ab4
commit ff961163aa
3 changed files with 48 additions and 91 deletions

View File

@ -70,6 +70,9 @@ func (fLBC fakeAzureLBClient) CreateOrUpdate(resourceGroupName string, loadBalan
} }
fLBC.FakeStore[resourceGroupName][loadBalancerName] = parameters fLBC.FakeStore[resourceGroupName][loadBalancerName] = parameters
result = fLBC.FakeStore[resourceGroupName][loadBalancerName] result = fLBC.FakeStore[resourceGroupName][loadBalancerName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
} }
@ -206,6 +209,9 @@ func (fAPC fakeAzurePIPClient) CreateOrUpdate(resourceGroupName string, publicIP
fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters
result = fAPC.FakeStore[resourceGroupName][publicIPAddressName] result = fAPC.FakeStore[resourceGroupName][publicIPAddressName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
} }
@ -311,6 +317,9 @@ func (fIC fakeAzureInterfacesClient) CreateOrUpdate(resourceGroupName string, ne
} }
fIC.FakeStore[resourceGroupName][networkInterfaceName] = parameters fIC.FakeStore[resourceGroupName][networkInterfaceName] = parameters
result = fIC.FakeStore[resourceGroupName][networkInterfaceName] result = fIC.FakeStore[resourceGroupName][networkInterfaceName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
@ -360,6 +369,9 @@ func (fVMC fakeAzureVirtualMachinesClient) CreateOrUpdate(resourceGroupName stri
} }
fVMC.FakeStore[resourceGroupName][VMName] = parameters fVMC.FakeStore[resourceGroupName][VMName] = parameters
result = fVMC.FakeStore[resourceGroupName][VMName] result = fVMC.FakeStore[resourceGroupName][VMName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
} }
@ -431,6 +443,9 @@ func (fASC fakeAzureSubnetsClient) CreateOrUpdate(resourceGroupName string, virt
} }
fASC.FakeStore[rgVnet][subnetName] = subnetParameters fASC.FakeStore[rgVnet][subnetName] = subnetParameters
result = fASC.FakeStore[rgVnet][subnetName] result = fASC.FakeStore[rgVnet][subnetName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
} }
@ -531,6 +546,9 @@ func (fNSG fakeAzureNSGClient) CreateOrUpdate(resourceGroupName string, networkS
} }
fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] = parameters fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] = parameters
result = fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] result = fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
}
err = nil err = nil
return resultChan, errChan return resultChan, errChan
} }

View File

@ -48,19 +48,10 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
} }
return addresses, nil return addresses, nil
} }
ip, err := az.getIPForMachine(name) ip, err := az.GetIPForMachineWithRetry(name)
if err != nil { if err != nil {
if az.CloudProviderBackoff { glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
glog.V(2).Infof("NodeAddresses(%s) backing off", name) return nil, err
ip, err = az.GetIPForMachineWithRetry(name)
if err != nil {
glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
return nil, err
}
} else {
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
return nil, err
}
} }
return []v1.NodeAddress{ return []v1.NodeAddress{

View File

@ -363,22 +363,13 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName, domainNameLabel stri
pip.Tags = &map[string]*string{"service": &serviceName} pip.Tags = &map[string]*string{"service": &serviceName}
glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name) glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name)
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%q): start", *pip.Name) glog.V(10).Infof("CreateOrUpdatePIPWithRetry(%q): start", *pip.Name)
respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil) err = az.CreateOrUpdatePIPWithRetry(pip)
resp := <-respChan
err = <-errChan
glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%q): end", *pip.Name)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) {
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - creating", serviceName, *pip.Name)
retryErr := az.CreateOrUpdatePIPWithRetry(pip)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name)
err = retryErr
}
}
if err != nil { if err != nil {
glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name)
return nil, err return nil, err
} }
glog.V(10).Infof("CreateOrUpdatePIPWithRetry(%q): end", *pip.Name)
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("PublicIPAddressesClient.Get(%q): start", *pip.Name) glog.V(10).Infof("PublicIPAddressesClient.Get(%q): start", *pip.Name)
@ -709,39 +700,17 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service,
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("LoadBalancerClient.Delete(%q): start", lbName) glog.V(10).Infof("LoadBalancerClient.Delete(%q): start", lbName)
respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil) err := az.DeleteLBWithRetry(lbName)
resp := <-respChan
err := <-errChan
glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
retryErr := az.DeleteLBWithRetry(lbName)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
}
}
if err != nil { if err != nil {
glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
return nil, err return nil, err
} }
glog.V(10).Infof("LoadBalancerClient.Delete(%q): end", lbName)
} else { } else {
glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName) glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName)
az.operationPollRateLimiter.Accept() err := az.CreateOrUpdateLBWithRetry(*lb)
glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): start", lbName)
respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, lbName, *lb, nil)
resp := <-respChan
err := <-errChan
glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%q): end", lbName)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) {
glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName)
retryErr := az.CreateOrUpdateLBWithRetry(*lb)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName)
return nil, retryErr
}
}
if err != nil { if err != nil {
glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName)
return nil, err return nil, err
} }
} }
@ -892,22 +861,13 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
sg.SecurityRules = &updatedRules sg.SecurityRules = &updatedRules
glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name) glog.V(3).Infof("ensure(%s): sg(%s) - updating", serviceName, *sg.Name)
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): start", *sg.Name) glog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): start", *sg.Name)
respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil) err := az.CreateOrUpdateSGWithRetry(sg)
resp := <-respChan
err := <-errChan
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%q): end", *sg.Name)
if az.CloudProviderBackoff && shouldRetryAPIRequest(resp.Response, err) {
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
retryErr := az.CreateOrUpdateSGWithRetry(sg)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
return nil, retryErr
}
}
if err != nil { if err != nil {
glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
return nil, err return nil, err
} }
glog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name)
} }
return &sg, nil return &sg, nil
} }
@ -938,22 +898,18 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, want
} else { } else {
glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName) glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName)
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): start", pipName) glog.V(10).Infof("DeletePublicIPWithRetry(%q): start", pipName)
resp, deleteErrChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil) err = az.DeletePublicIPWithRetry(pipName)
deleteErr := <-deleteErrChan if err != nil {
glog.V(10).Infof("PublicIPAddressesClient.Delete(%q): end", pipName) // response not read yet... glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName)
if az.CloudProviderBackoff && shouldRetryAPIRequest(<-resp, deleteErr) { // We let err to pass through
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName) // It may be ignorable
retryErr := az.DeletePublicIPWithRetry(pipName)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName)
return nil, retryErr
}
} }
glog.V(10).Infof("DeletePublicIPWithRetry(%q): end", pipName) // response not read yet...
deleteErr = ignoreStatusNotFoundFromError(deleteErr) err = ignoreStatusNotFoundFromError(err)
if deleteErr != nil { if err != nil {
return nil, deleteErr return nil, err
} }
glog.V(2).Infof("ensure(%s): pip(%s) - finished", serviceName, pipName) glog.V(2).Infof("ensure(%s): pip(%s) - finished", serviceName, pipName)
} }
@ -1007,20 +963,12 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b
vmName := mapNodeNameToVMName(nodeName) vmName := mapNodeNameToVMName(nodeName)
az.operationPollRateLimiter.Accept() az.operationPollRateLimiter.Accept()
glog.V(10).Infof("VirtualMachinesClient.Get(%q): start", vmName) glog.V(10).Infof("VirtualMachinesClient.Get(%q): start", vmName)
machine, err := az.VirtualMachinesClient.Get(az.ResourceGroup, vmName, "") machine, err := az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "")
glog.V(10).Infof("VirtualMachinesClient.Get(%q): end", vmName)
if err != nil { if err != nil {
if az.CloudProviderBackoff { glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID)
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) backing off", serviceName, nodeName, backendPoolID) return err
machine, err = az.VirtualMachineClientGetWithRetry(az.ResourceGroup, vmName, "")
if err != nil {
glog.V(2).Infof("ensureHostInPool(%s, %s, %s) abort backoff", serviceName, nodeName, backendPoolID)
return err
}
} else {
return err
}
} }
glog.V(10).Infof("VirtualMachinesClient.Get(%q): end", vmName)
primaryNicID, err := getPrimaryInterfaceID(machine) primaryNicID, err := getPrimaryInterfaceID(machine)
if err != nil { if err != nil {