From 969a55865793d37cfa60d20c334ce95f2dcf8f4a Mon Sep 17 00:00:00 2001 From: hzxuzhonghu Date: Sat, 17 Mar 2018 16:02:36 +0800 Subject: [PATCH] use common clientretry.RetryOnConflict --- pkg/controller/route/BUILD | 2 +- pkg/controller/route/route_controller.go | 50 +++++++++++++----------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/pkg/controller/route/BUILD b/pkg/controller/route/BUILD index fe0e240fe3a..3bb653bdbd0 100644 --- a/pkg/controller/route/BUILD +++ b/pkg/controller/route/BUILD @@ -21,7 +21,6 @@ go_library( "//pkg/util/node:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", @@ -34,6 +33,7 @@ go_library( "//vendor/k8s.io/client-go/listers/core/v1:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library", "//vendor/k8s.io/client-go/tools/record:go_default_library", + "//vendor/k8s.io/client-go/util/retry:go_default_library", ], ) diff --git a/pkg/controller/route/route_controller.go b/pkg/controller/route/route_controller.go index dd51fb06d08..25dd418be08 100644 --- a/pkg/controller/route/route_controller.go +++ b/pkg/controller/route/route_controller.go @@ -24,8 +24,8 @@ import ( "time" "github.com/golang/glog" + "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" @@ -38,6 +38,7 @@ import ( corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" + clientretry "k8s.io/client-go/util/retry" v1node "k8s.io/kubernetes/pkg/api/v1/node" "k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/controller" @@ -49,12 +50,14 @@ const ( // Maximal number of concurrent CreateRoute API calls. // TODO: This should be per-provider. maxConcurrentRouteCreations int = 200 - // Maximum number of retries of route creations. - maxRetries int = 5 - // Maximum number of retries of node status update. - updateNodeStatusMaxRetries int = 3 ) +var updateNetworkConditionBackoff = wait.Backoff{ + Steps: 5, // Maximum number of retries. + Duration: 100 * time.Millisecond, + Jitter: 1.0, +} + type RouteController struct { routes cloudprovider.Routes kubeClient clientset.Interface @@ -165,7 +168,7 @@ func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.R wg.Add(1) go func(nodeName types.NodeName, nameHint string, route *cloudprovider.Route) { defer wg.Done() - for i := 0; i < maxRetries; i++ { + err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error { startTime := time.Now() // Ensure that we don't have more than maxConcurrentRouteCreations // CreateRoute calls in flight. @@ -186,12 +189,14 @@ func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.R Namespace: "", }, v1.EventTypeWarning, "FailedToCreateRoute", msg) } - glog.Error(msg) - - } else { - glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime)) - return + glog.V(4).Infof(msg) + return err } + glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime)) + return nil + }) + if err != nil { + glog.Errorf("Could not create route %s %s for node %s: %v", nameHint, route.DestinationCIDR, nodeName, err) } }(nodeName, nameHint, route) } else { @@ -210,14 +215,13 @@ func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.R wg.Add(1) // Delete the route. go func(route *cloudprovider.Route, startTime time.Time) { + defer wg.Done() glog.Infof("Deleting route %s %s", route.Name, route.DestinationCIDR) if err := rc.routes.DeleteRoute(context.TODO(), rc.clusterName, route); err != nil { glog.Errorf("Could not delete route %s %s after %v: %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime), err) } else { glog.Infof("Deleted route %s %s after %v", route.Name, route.DestinationCIDR, time.Now().Sub(startTime)) } - wg.Done() - }(route, time.Now()) } } @@ -227,8 +231,8 @@ func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.R } func (rc *RouteController) updateNetworkingCondition(nodeName types.NodeName, routeCreated bool) error { - var err error - for i := 0; i < updateNodeStatusMaxRetries; i++ { + err := clientretry.RetryOnConflict(updateNetworkConditionBackoff, func() error { + var err error // Patch could also fail, even though the chance is very slim. So we still do // patch in the retry loop. currentTime := metav1.Now() @@ -249,16 +253,16 @@ func (rc *RouteController) updateNetworkingCondition(nodeName types.NodeName, ro LastTransitionTime: currentTime, }) } - if err == nil { - return nil + if err != nil { + glog.V(4).Infof("Error updating node %s, retrying: %v", nodeName, err) } - if !errors.IsConflict(err) { - glog.Errorf("Error updating node %s: %v", nodeName, err) - return err - } - glog.V(4).Infof("Error updating node %s, retrying: %v", nodeName, err) + return err + }) + + if err != nil { + glog.Errorf("Error updating node %s: %v", nodeName, err) } - glog.Errorf("Error updating node %s: %v", nodeName, err) + return err }