Merge pull request #39149 from deads2k/rbac-33-poststarthook

Automatic merge from submit-queue

retry RBAC initialization for up to 30 seconds, kill server on failure

RBAC initialization needs to complete in order to bootstrap a cluster.  When the bootstrapping fails (etcd not ready has happened in e2e runs), things fail badly and we don't even kill the API server to force it to retry.  This retries for up to 30 seconds and kills the server if it never succeeds.

Fixes https://github.com/kubernetes/kubernetes/issues/39108
This commit is contained in:
Kubernetes Submit Queue 2017-01-03 08:21:54 -08:00 committed by GitHub
commit f0f871f637
2 changed files with 45 additions and 38 deletions

View File

@ -33,6 +33,7 @@ go_library(
"//pkg/registry/rbac/rolebinding/etcd:go_default_library", "//pkg/registry/rbac/rolebinding/etcd:go_default_library",
"//pkg/registry/rbac/rolebinding/policybased:go_default_library", "//pkg/registry/rbac/rolebinding/policybased:go_default_library",
"//pkg/util/runtime:go_default_library", "//pkg/util/runtime:go_default_library",
"//pkg/util/wait:go_default_library",
"//plugin/pkg/auth/authorizer/rbac/bootstrappolicy:go_default_library", "//plugin/pkg/auth/authorizer/rbac/bootstrappolicy:go_default_library",
"//vendor:github.com/golang/glog", "//vendor:github.com/golang/glog",
], ],

View File

@ -19,6 +19,7 @@ package rest
import ( import (
"fmt" "fmt"
"sync" "sync"
"time"
"github.com/golang/glog" "github.com/golang/glog"
@ -43,6 +44,7 @@ import (
rolebindingetcd "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/etcd" rolebindingetcd "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/etcd"
rolebindingpolicybased "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/policybased" rolebindingpolicybased "k8s.io/kubernetes/pkg/registry/rbac/rolebinding/policybased"
utilruntime "k8s.io/kubernetes/pkg/util/runtime" utilruntime "k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac/bootstrappolicy" "k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac/bootstrappolicy"
) )
@ -114,50 +116,54 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart
} }
func PostStartHook(hookContext genericapiserver.PostStartHookContext) error { func PostStartHook(hookContext genericapiserver.PostStartHookContext) error {
clientset, err := rbacclient.NewForConfig(hookContext.LoopbackClientConfig) // intializing roles is really important. On some e2e runs, we've seen cases where etcd is down when the server
if err != nil { // starts, the roles don't initialize, and nothing works.
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) err := wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
return nil clientset, err := rbacclient.NewForConfig(hookContext.LoopbackClientConfig)
} if err != nil {
existingClusterRoles, err := clientset.ClusterRoles().List(api.ListOptions{})
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil
}
// if clusterroles already exist, then assume we don't have work to do because we've already
// initialized or another API server has started this task
if len(existingClusterRoles.Items) > 0 {
return nil
}
for _, clusterRole := range append(bootstrappolicy.ClusterRoles(), bootstrappolicy.ControllerRoles()...) {
if _, err := clientset.ClusterRoles().Create(&clusterRole); err != nil {
// don't fail on failures, try to create as many as you can
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
continue return false, nil
} }
glog.Infof("Created clusterrole.%s/%s", rbac.GroupName, clusterRole.Name)
}
existingClusterRoleBindings, err := clientset.ClusterRoleBindings().List(api.ListOptions{}) existingClusterRoles, err := clientset.ClusterRoles().List(api.ListOptions{})
if err != nil { if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
return nil return false, nil
} }
// if clusterrolebindings already exist, then assume we don't have work to do because we've already // only initialized on empty etcd
// initialized or another API server has started this task if len(existingClusterRoles.Items) == 0 {
if len(existingClusterRoleBindings.Items) > 0 { for _, clusterRole := range append(bootstrappolicy.ClusterRoles(), bootstrappolicy.ControllerRoles()...) {
return nil if _, err := clientset.ClusterRoles().Create(&clusterRole); err != nil {
} // don't fail on failures, try to create as many as you can
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err))
continue
}
glog.Infof("Created clusterrole.%s/%s", rbac.GroupName, clusterRole.Name)
}
}
for _, clusterRoleBinding := range append(bootstrappolicy.ClusterRoleBindings(), bootstrappolicy.ControllerRoleBindings()...) { existingClusterRoleBindings, err := clientset.ClusterRoleBindings().List(api.ListOptions{})
if _, err := clientset.ClusterRoleBindings().Create(&clusterRoleBinding); err != nil { if err != nil {
// don't fail on failures, try to create as many as you can
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err)) utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err))
continue return false, nil
} }
glog.Infof("Created clusterrolebinding.%s/%s", rbac.GroupName, clusterRoleBinding.Name) // only initialized on empty etcd
if len(existingClusterRoleBindings.Items) == 0 {
for _, clusterRoleBinding := range append(bootstrappolicy.ClusterRoleBindings(), bootstrappolicy.ControllerRoleBindings()...) {
if _, err := clientset.ClusterRoleBindings().Create(&clusterRoleBinding); err != nil {
// don't fail on failures, try to create as many as you can
utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err))
continue
}
glog.Infof("Created clusterrolebinding.%s/%s", rbac.GroupName, clusterRoleBinding.Name)
}
}
return true, nil
})
// if we're never able to make it through intialization, kill the API server
if err != nil {
return fmt.Errorf("unable to initialize roles: %v", err)
} }
return nil return nil