From 22452917c20f6018a19e1e427c420388c8072a5d Mon Sep 17 00:00:00 2001 From: Chao Xu Date: Mon, 17 Feb 2020 14:19:32 -0800 Subject: [PATCH] make some rbac and scheduling post start hooks tolerate the apiserver bootstrap delay caused by installing storage versions. --- pkg/registry/rbac/rest/storage_rbac.go | 21 +++++++++++++++---- .../scheduling/rest/storage_scheduling.go | 11 +++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pkg/registry/rbac/rest/storage_rbac.go b/pkg/registry/rbac/rest/storage_rbac.go index 8776ddbfe96..6b853d5174b 100644 --- a/pkg/registry/rbac/rest/storage_rbac.go +++ b/pkg/registry/rbac/rest/storage_rbac.go @@ -26,6 +26,7 @@ import ( rbacapiv1 "k8s.io/api/rbac/v1" rbacapiv1alpha1 "k8s.io/api/rbac/v1alpha1" rbacapiv1beta1 "k8s.io/api/rbac/v1beta1" + "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -160,6 +161,14 @@ type PolicyData struct { ClusterRoleBindingsToSplit map[string]rbacapiv1.ClusterRoleBinding } +func isConflictOrServiceUnavailable(err error) bool { + return errors.IsConflict(err) || errors.IsServiceUnavailable(err) +} + +func retryOnConflictOrServiceUnavailable(backoff wait.Backoff, fn func() error) error { + return retry.OnError(backoff, isConflictOrServiceUnavailable, fn) +} + func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { return func(hookContext genericapiserver.PostStartHookContext) error { // initializing roles is really important. On some e2e runs, we've seen cases where etcd is down when the server @@ -206,7 +215,8 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { Client: reconciliation.ClusterRoleModifier{Client: clientset.ClusterRoles()}, Confirm: true, } - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // ServiceUnavailble error is returned when the API server is blocked by storage version updates + err := retryOnConflictOrServiceUnavailable(retry.DefaultBackoff, func() error { result, err := opts.Run() if err != nil { return err @@ -234,7 +244,8 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { Client: reconciliation.ClusterRoleBindingClientAdapter{Client: clientset.ClusterRoleBindings()}, Confirm: true, } - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // ServiceUnavailble error is returned when the API server is blocked by storage version updates + err := retryOnConflictOrServiceUnavailable(retry.DefaultBackoff, func() error { result, err := opts.Run() if err != nil { return err @@ -265,7 +276,8 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { Client: reconciliation.RoleModifier{Client: clientset, NamespaceClient: coreclientset.Namespaces()}, Confirm: true, } - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // ServiceUnavailble error is returned when the API server is blocked by storage version updates + err := retryOnConflictOrServiceUnavailable(retry.DefaultBackoff, func() error { result, err := opts.Run() if err != nil { return err @@ -295,7 +307,8 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { Client: reconciliation.RoleBindingClientAdapter{Client: clientset, NamespaceClient: coreclientset.Namespaces()}, Confirm: true, } - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // ServiceUnavailble error is returned when the API server is blocked by storage version updates + err := retryOnConflictOrServiceUnavailable(retry.DefaultBackoff, func() error { result, err := opts.Run() if err != nil { return err diff --git a/pkg/registry/scheduling/rest/storage_scheduling.go b/pkg/registry/scheduling/rest/storage_scheduling.go index a20bca52748..6c1a4a58a01 100644 --- a/pkg/registry/scheduling/rest/storage_scheduling.go +++ b/pkg/registry/scheduling/rest/storage_scheduling.go @@ -128,11 +128,16 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc { if err != nil { if apierrors.IsNotFound(err) { _, err := schedClientSet.PriorityClasses().Create(context.TODO(), pc, metav1.CreateOptions{}) - if err != nil && !apierrors.IsAlreadyExists(err) { - return false, err - } else { + if err == nil || apierrors.IsAlreadyExists(err) { klog.Infof("created PriorityClass %s with value %v", pc.Name, pc.Value) + continue } + // ServiceUnavailble error is returned when the API server is blocked by storage version updates + if apierrors.IsServiceUnavailable(err) { + klog.Infof("going to retry, unable to create PriorityClass %s: %v", pc.Name, err) + return false, nil + } + return false, err } else { // Unable to get the priority class for reasons other than "not found". klog.Warningf("unable to get PriorityClass %v: %v. Retrying...", pc.Name, err)