backoff logging, error handling, wait.ConditionFunc

- added info and error logs for appropriate backoff conditions/states
- rationalized log idioms across all resource requests that are backoff-enabled
- processRetryResponse as a wait.ConditionFunc needs to supress errors if it wants the caller to continue backing off
This commit is contained in:
Jack Francis 2017-06-02 15:35:20 -07:00
parent c5dd95fc22
commit 7e6c689e58
4 changed files with 41 additions and 9 deletions

View File

@ -24,6 +24,7 @@ import (
"github.com/Azure/azure-sdk-for-go/arm/compute"
"github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/Azure/go-autorest/autorest"
"github.com/golang/glog"
)
const (
@ -132,17 +133,16 @@ func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualM
})
}
// An in-progress convenience function to deal with common HTTP backoff response conditions
// A wait.ConditionFunc function to deal with common HTTP backoff response conditions
func processRetryResponse(resp autorest.Response, err error) (bool, error) {
if isSuccessHTTPResponse(resp) {
glog.V(2).Infof("backoff: success, HTTP response=%d", resp.StatusCode)
return true, nil
}
if shouldRetryAPIRequest(resp, err) {
return false, err
}
// TODO determine the complete set of short-circuit conditions
if err != nil {
return false, err
glog.Errorf("backoff: failure, will retry, HTTP response=%d, err=%v", resp.StatusCode, err)
// suppress the error object so that backoff process continues
return false, nil
}
// Fall-through: stop periodic backoff, return error object from most recent request
return true, err
@ -150,7 +150,6 @@ func processRetryResponse(resp autorest.Response, err error) (bool, error) {
// shouldRetryAPIRequest determines if the response from an HTTP request suggests periodic retry behavior
func shouldRetryAPIRequest(resp autorest.Response, err error) bool {
// non-nil error from HTTP request suggests we should retry
if err != nil {
return true
}

View File

@ -151,8 +151,10 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
sg.SecurityGroupPropertiesFormat.Subnets = nil
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: sg(%s) - updating", serviceName, *sg.Name)
retryErr := az.CreateOrUpdateSGWithRetry(sg)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
return nil, retryErr
}
}
@ -227,8 +229,10 @@ func (az *Cloud) EnsureLoadBalancer(clusterName string, service *v1.Service, nod
glog.V(3).Infof("ensure(%s): lb(%s) - updating", serviceName, lbName)
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: lb(%s) - updating", serviceName, lbName)
retryErr := az.CreateOrUpdateLBWithRetry(lb)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: lb(%s) - updating", serviceName, lbName)
return nil, retryErr
}
}
@ -324,9 +328,11 @@ func (az *Cloud) EnsureLoadBalancerDeleted(clusterName string, service *v1.Servi
sg.SecurityGroupPropertiesFormat.Subnets = nil
resp, err := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *reconciledSg.Name, reconciledSg, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
retryErr := az.CreateOrUpdateSGWithRetry(reconciledSg)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("delete(%s) abort backoff: sg(%s) - updating", serviceName, az.SecurityGroupName)
}
}
if err != nil {
@ -359,9 +365,11 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
glog.V(3).Infof("delete(%s): lb(%s) - updating", serviceName, lbName)
resp, err := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: sg(%s) - updating", serviceName, az.SecurityGroupName)
retryErr := az.CreateOrUpdateLBWithRetry(lb)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("delete(%s) abort backoff: sg(%s) - updating", serviceName, az.SecurityGroupName)
}
}
if err != nil {
@ -372,9 +380,11 @@ func (az *Cloud) cleanupLoadBalancer(clusterName string, service *v1.Service, is
resp, err := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete(%s) backing off: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
retryErr := az.DeleteLBWithRetry(lbName)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("delete(%s) abort backoff: lb(%s) - deleting; no remaining frontendipconfigs", serviceName, lbName)
}
}
if err != nil {
@ -424,9 +434,11 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
glog.V(3).Infof("ensure(%s): pip(%s) - creating", serviceName, *pip.Name)
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - creating", serviceName, *pip.Name)
retryErr := az.CreateOrUpdatePIPWithRetry(pip)
if retryErr != nil {
return nil, retryErr
glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name)
err = retryErr
}
}
if err != nil {
@ -443,9 +455,15 @@ func (az *Cloud) ensurePublicIPExists(serviceName, pipName string) (*network.Pub
}
func (az *Cloud) ensurePublicIPDeleted(serviceName, pipName string) error {
glog.V(2).Infof("ensure(%s): pip(%s) - deleting", serviceName, pipName)
resp, deleteErr := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
if shouldRetryAPIRequest(resp, deleteErr) {
deleteErr = az.DeletePublicIPWithRetry(pipName)
glog.V(2).Infof("ensure(%s) backing off: pip(%s) - deleting", serviceName, pipName)
retryErr := az.DeletePublicIPWithRetry(pipName)
if retryErr != nil {
glog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - deleting", serviceName, pipName)
return retryErr
}
}
_, realErr := checkResourceExistsFromError(deleteErr)
if realErr != nil {
@ -889,9 +907,11 @@ func (az *Cloud) ensureHostInPool(serviceName string, nodeName types.NodeName, b
glog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName)
resp, err := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating", serviceName, nicName)
retryErr := az.CreateOrUpdateInterfaceWithRetry(nic)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("nicupdate(%s) abort backoff: nic(%s) - updating", serviceName, nicName)
}
}
if err != nil {

View File

@ -66,6 +66,7 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
routeTable, existsRouteTable, err := az.getRouteTable()
if err != nil {
glog.V(2).Infof("create error: couldn't get routetable. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
return err
}
if !existsRouteTable {
@ -78,9 +79,11 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
glog.V(3).Infof("create: creating routetable. routeTableName=%q", az.RouteTableName)
resp, err := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create backing off: creating routetable. routeTableName=%q", az.RouteTableName)
retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("create abort backoff: creating routetable. routeTableName=%q", az.RouteTableName)
}
}
if err != nil {
@ -111,9 +114,11 @@ func (az *Cloud) CreateRoute(clusterName string, nameHint string, kubeRoute *clo
glog.V(3).Infof("create: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
resp, err := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.CreateOrUpdateRouteWithRetry(route)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("create abort backoff: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
if err != nil {
@ -132,9 +137,11 @@ func (az *Cloud) DeleteRoute(clusterName string, kubeRoute *cloudprovider.Route)
routeName := mapNodeNameToRouteName(kubeRoute.TargetNode)
resp, err := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("delete backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.DeleteRouteWithRetry(routeName)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("delete abort backoff: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
if err != nil {

View File

@ -64,11 +64,14 @@ func (az *Cloud) AttachDisk(diskName, diskURI string, nodeName types.NodeName, l
},
}
vmName := mapNodeNameToVMName(nodeName)
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("create(%s) abort backoff: vm(%s)", az.ResourceGroup, vmName)
}
}
if err != nil {
@ -141,11 +144,14 @@ func (az *Cloud) DetachDiskByName(diskName, diskURI string, nodeName types.NodeN
},
}
vmName := mapNodeNameToVMName(nodeName)
glog.V(2).Infof("create(%s): vm(%s)", az.ResourceGroup, vmName)
resp, err := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)
if shouldRetryAPIRequest(resp, err) {
glog.V(2).Infof("create(%s) backing off: vm(%s)", az.ResourceGroup, vmName)
retryErr := az.CreateOrUpdateVMWithRetry(vmName, newVM)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("create(%s) abort backoff: vm(%s)", az.ResourceGroup, vmName)
}
}
if err != nil {