Merge pull request #39149 from deads2k/rbac-33-poststarthook

Automatic merge from submit-queue

retry RBAC initialization for up to 30 seconds, kill server on failure

RBAC initialization needs to complete in order to bootstrap a cluster.  When the bootstrapping fails (etcd not ready has happened in e2e runs), things fail badly and we don't even kill the API server to force it to retry.  This retries for up to 30 seconds and kills the server if it never succeeds.

Fixes https://github.com/kubernetes/kubernetes/issues/39108
This commit is contained in:
Kubernetes Submit Queue 2017-01-03 08:21:54 -08:00 committed by GitHub
commit f0f871f637
2 changed files with 45 additions and 38 deletions

View File

@ -33,6 +33,7 @@ go_library(
"//pkg/registry/rbac/rolebinding/etcd:go_default_library", "//pkg/registry/rbac/rolebinding/etcd:go_default_library",
"//pkg/registry/rbac/rolebinding/policybased:go_default_library", "//pkg/registry/rbac/rolebinding/policybased:go_default_library",
"//pkg/util/runtime:go_default_library", "//pkg/util/runtime:go_default_library",
"//pkg/util/wait:go_default_library",
"//plugin/pkg/auth/authorizer/rbac/bootstrappolicy:go_default_library", "//plugin/pkg/auth/authorizer/rbac/bootstrappolicy:go_default_library",
"//vendor:github.com/golang/glog", "//vendor:github.com/golang/glog",
], ],

View File

@ -19,6 +19,7 @@ package rest
import ( import (
"fmt" "fmt"
"sync" "sync"
"time"
"github.com/golang/glog" "github.com/golang/glog"
@ -43,6 +44,7 @@ import (
rolebindingetcd "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/etcd" rolebindingetcd "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/etcd"
rolebindingpolicybased "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/policybased" rolebindingpolicybased "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/policybased"
utilruntime "k8s.io/kubernetes/pkg/util/runtime" utilruntime "k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac/bootstrappolicy" "k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac/bootstrappolicy"
) )
@ -114,23 +116,22 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart
} }
func PostStartHook(hookContext genericapiserver.PostStartHookContext) error { func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
// intializing roles is really important. On some e2e runs, we've seen cases where etcd is down when the server
// starts, the roles don't initialize, and nothing works.
err := wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
clientset, err := rbacclient.NewForConfig(hookContext.LoopbackClientConfig) clientset, err := rbacclient.NewForConfig(hookContext.LoopbackClientConfig)
if err != nil { if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil return false, nil
} }
existingClusterRoles, err := clientset.ClusterRoles().List(api.ListOptions{}) existingClusterRoles, err := clientset.ClusterRoles().List(api.ListOptions{})
if err != nil { if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil return false, nil
} }
// if clusterroles already exist, then assume we don't have work to do because we've already // only initialized on empty etcd
// initialized or another API server has started this task if len(existingClusterRoles.Items) == 0 {
if len(existingClusterRoles.Items) > 0 {
return nil
}
for _, clusterRole := range append(bootstrappolicy.ClusterRoles(), bootstrappolicy.ControllerRoles()...) { for _, clusterRole := range append(bootstrappolicy.ClusterRoles(), bootstrappolicy.ControllerRoles()...) {
if _, err := clientset.ClusterRoles().Create(&clusterRole); err != nil { if _, err := clientset.ClusterRoles().Create(&clusterRole); err != nil {
// don't fail on failures, try to create as many as you can // don't fail on failures, try to create as many as you can
@ -139,18 +140,15 @@ func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
} }
glog.Infof("Created clusterrole.%s/%s", rbac.GroupName, clusterRole.Name) glog.Infof("Created clusterrole.%s/%s", rbac.GroupName, clusterRole.Name)
} }
}
existingClusterRoleBindings, err := clientset.ClusterRoleBindings().List(api.ListOptions{}) existingClusterRoleBindings, err := clientset.ClusterRoleBindings().List(api.ListOptions{})
if err != nil { if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err))
return nil return false, nil
} }
// if clusterrolebindings already exist, then assume we don't have work to do because we've already // only initialized on empty etcd
// initialized or another API server has started this task if len(existingClusterRoleBindings.Items) == 0 {
if len(existingClusterRoleBindings.Items) > 0 {
return nil
}
for _, clusterRoleBinding := range append(bootstrappolicy.ClusterRoleBindings(), bootstrappolicy.ControllerRoleBindings()...) { for _, clusterRoleBinding := range append(bootstrappolicy.ClusterRoleBindings(), bootstrappolicy.ControllerRoleBindings()...) {
if _, err := clientset.ClusterRoleBindings().Create(&clusterRoleBinding); err != nil { if _, err := clientset.ClusterRoleBindings().Create(&clusterRoleBinding); err != nil {
// don't fail on failures, try to create as many as you can // don't fail on failures, try to create as many as you can
@ -159,6 +157,14 @@ func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
} }
glog.Infof("Created clusterrolebinding.%s/%s", rbac.GroupName, clusterRoleBinding.Name) glog.Infof("Created clusterrolebinding.%s/%s", rbac.GroupName, clusterRoleBinding.Name)
} }
}
return true, nil
})
// if we're never able to make it through intialization, kill the API server
if err != nil {
return fmt.Errorf("unable to initialize roles: %v", err)
}
return nil return nil
} }