Merge pull request #39149 from deads2k/rbac-33-poststarthook

Automatic merge from submit-queue

retry RBAC initialization for up to 30 seconds, kill server on failure

RBAC initialization needs to complete in order to bootstrap a cluster.  When the bootstrapping fails (etcd not ready has happened in e2e runs), things fail badly and we don't even kill the API server to force it to retry.  This retries for up to 30 seconds and kills the server if it never succeeds.

Fixes https://github.com/kubernetes/kubernetes/issues/39108
This commit is contained in:
Kubernetes Submit Queue 2017-01-03 08:21:54 -08:00 committed by GitHub
commit f0f871f637
2 changed files with 45 additions and 38 deletions

View File

@ -33,6 +33,7 @@ go_library(
"//pkg/registry/rbac/rolebinding/etcd:go_default_library",
"//pkg/registry/rbac/rolebinding/policybased:go_default_library",
"//pkg/util/runtime:go_default_library",
"//pkg/util/wait:go_default_library",
"//plugin/pkg/auth/authorizer/rbac/bootstrappolicy:go_default_library",
"//vendor:github.com/golang/glog",
],

View File

@ -19,6 +19,7 @@ package rest
import (
"fmt"
"sync"
"time"
"github.com/golang/glog"
@ -43,6 +44,7 @@ import (
rolebindingetcd "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/etcd"
rolebindingpolicybased "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/policybased"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac/bootstrappolicy"
)
@ -114,23 +116,22 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart
}
func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
// intializing roles is really important. On some e2e runs, we've seen cases where etcd is down when the server
// starts, the roles don't initialize, and nothing works.
err := wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
clientset, err := rbacclient.NewForConfig(hookContext.LoopbackClientConfig)
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil
return false, nil
}
existingClusterRoles, err := clientset.ClusterRoles().List(api.ListOptions{})
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil
return false, nil
}
// if clusterroles already exist, then assume we don't have work to do because we've already
// initialized or another API server has started this task
if len(existingClusterRoles.Items) > 0 {
return nil
}
// only initialized on empty etcd
if len(existingClusterRoles.Items) == 0 {
for _, clusterRole := range append(bootstrappolicy.ClusterRoles(), bootstrappolicy.ControllerRoles()...) {
if _, err := clientset.ClusterRoles().Create(&clusterRole); err != nil {
// don't fail on failures, try to create as many as you can
@ -139,18 +140,15 @@ func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
}
glog.Infof("Created clusterrole.%s/%s", rbac.GroupName, clusterRole.Name)
}
}
existingClusterRoleBindings, err := clientset.ClusterRoleBindings().List(api.ListOptions{})
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err))
return nil
return false, nil
}
// if clusterrolebindings already exist, then assume we don't have work to do because we've already
// initialized or another API server has started this task
if len(existingClusterRoleBindings.Items) > 0 {
return nil
}
// only initialized on empty etcd
if len(existingClusterRoleBindings.Items) == 0 {
for _, clusterRoleBinding := range append(bootstrappolicy.ClusterRoleBindings(), bootstrappolicy.ControllerRoleBindings()...) {
if _, err := clientset.ClusterRoleBindings().Create(&clusterRoleBinding); err != nil {
// don't fail on failures, try to create as many as you can
@ -159,6 +157,14 @@ func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
}
glog.Infof("Created clusterrolebinding.%s/%s", rbac.GroupName, clusterRoleBinding.Name)
}
}
return true, nil
})
// if we're never able to make it through intialization, kill the API server
if err != nil {
return fmt.Errorf("unable to initialize roles: %v", err)
}
return nil
}