Merge pull request #104667 from jiahuif/feature/controller-manager/healthz

Health checks for controller managers.
This commit is contained in:
Kubernetes Prow Robot 2021-09-03 11:10:53 -07:00 committed by GitHub
commit 30f3511104
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 384 additions and 12 deletions

View File

@ -61,6 +61,7 @@ import (
genericcontrollermanager "k8s.io/controller-manager/app"
"k8s.io/controller-manager/controller"
"k8s.io/controller-manager/pkg/clientbuilder"
controllerhealthz "k8s.io/controller-manager/pkg/healthz"
"k8s.io/controller-manager/pkg/informerfactory"
"k8s.io/controller-manager/pkg/leadermigration"
"k8s.io/klog/v2"
@ -199,12 +200,13 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error {
electionChecker = leaderelection.NewLeaderHealthzAdaptor(time.Second * 20)
checks = append(checks, electionChecker)
}
healthzHandler := controllerhealthz.NewMutableHealthzHandler(checks...)
// Start the controller manager HTTP server
// unsecuredMux is the handler for these controller *after* authn/authz filters have been applied
var unsecuredMux *mux.PathRecorderMux
if c.SecureServing != nil {
unsecuredMux = genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, checks...)
unsecuredMux = genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, healthzHandler)
handler := genericcontrollermanager.BuildHandlerChain(unsecuredMux, &c.Authorization, &c.Authentication)
// TODO: handle stoppedCh returned by c.SecureServing.Serve
if _, err := c.SecureServing.Serve(handler, 0, stopCh); err != nil {
@ -223,7 +225,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error {
klog.Fatalf("error building controller context: %v", err)
}
controllerInitializers := initializersFunc(controllerContext.LoopMode)
if err := StartControllers(controllerContext, startSATokenController, controllerInitializers, unsecuredMux); err != nil {
if err := StartControllers(controllerContext, startSATokenController, controllerInitializers, unsecuredMux, healthzHandler); err != nil {
klog.Fatalf("error starting controllers: %v", err)
}
@ -541,7 +543,8 @@ func CreateControllerContext(s *config.CompletedConfig, rootClientBuilder, clien
}
// StartControllers starts a set of controllers with a specified ControllerContext
func StartControllers(ctx ControllerContext, startSATokenController InitFunc, controllers map[string]InitFunc, unsecuredMux *mux.PathRecorderMux) error {
func StartControllers(ctx ControllerContext, startSATokenController InitFunc, controllers map[string]InitFunc,
unsecuredMux *mux.PathRecorderMux, healthzHandler *controllerhealthz.MutableHealthzHandler) error {
// Always start the SA token controller first using a full-power client, since it needs to mint tokens for the rest
// If this fails, just return here and fail since other controllers won't be able to get credentials.
if startSATokenController != nil {
@ -556,6 +559,8 @@ func StartControllers(ctx ControllerContext, startSATokenController InitFunc, co
ctx.Cloud.Initialize(ctx.ClientBuilder, ctx.Stop)
}
var controllerChecks []healthz.HealthChecker
for controllerName, initFn := range controllers {
if !ctx.IsControllerEnabled(controllerName) {
klog.Warningf("%q is disabled", controllerName)
@ -574,6 +579,7 @@ func StartControllers(ctx ControllerContext, startSATokenController InitFunc, co
klog.Warningf("Skipping %q", controllerName)
continue
}
check := controllerhealthz.NamedPingChecker(controllerName)
if ctrl != nil {
// check if the controller supports and requests a debugHandler
// and it needs the unsecuredMux to mount the handler onto.
@ -584,10 +590,19 @@ func StartControllers(ctx ControllerContext, startSATokenController InitFunc, co
unsecuredMux.UnlistedHandlePrefix(basePath+"/", http.StripPrefix(basePath, debugHandler))
}
}
if healthCheckable, ok := ctrl.(controller.HealthCheckable); ok {
if realCheck := healthCheckable.HealthChecker(); realCheck != nil {
check = controllerhealthz.NamedHealthChecker(controllerName, realCheck)
}
}
}
controllerChecks = append(controllerChecks, check)
klog.Infof("Started %q", controllerName)
}
healthzHandler.AddHealthChecker(controllerChecks...)
return nil
}

View File

@ -51,6 +51,7 @@ import (
genericcontrollermanager "k8s.io/controller-manager/app"
"k8s.io/controller-manager/controller"
"k8s.io/controller-manager/pkg/clientbuilder"
controllerhealthz "k8s.io/controller-manager/pkg/healthz"
"k8s.io/controller-manager/pkg/informerfactory"
"k8s.io/controller-manager/pkg/leadermigration"
"k8s.io/klog/v2"
@ -156,9 +157,10 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface
checks = append(checks, electionChecker)
}
healthzHandler := controllerhealthz.NewMutableHealthzHandler(checks...)
// Start the controller manager HTTP server
if c.SecureServing != nil {
unsecuredMux := genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, checks...)
unsecuredMux := genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, healthzHandler)
handler := genericcontrollermanager.BuildHandlerChain(unsecuredMux, &c.Authorization, &c.Authentication)
// TODO: handle stoppedCh returned by c.SecureServing.Serve
if _, err := c.SecureServing.Serve(handler, 0, stopCh); err != nil {
@ -166,7 +168,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface
}
}
if c.InsecureServing != nil {
unsecuredMux := genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, checks...)
unsecuredMux := genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, healthzHandler)
insecureSuperuserAuthn := server.AuthenticationInfo{Authenticator: &server.InsecureSuperuser{}}
handler := genericcontrollermanager.BuildHandlerChain(unsecuredMux, nil, &insecureSuperuserAuthn)
if err := c.InsecureServing.Serve(handler, 0, stopCh); err != nil {
@ -182,7 +184,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface
if err != nil {
klog.Fatalf("error building controller context: %v", err)
}
if err := startControllers(cloud, controllerContext, c, ctx.Done(), controllerInitializers); err != nil {
if err := startControllers(cloud, controllerContext, c, ctx.Done(), controllerInitializers, healthzHandler); err != nil {
klog.Fatalf("error running controllers: %v", err)
}
}
@ -259,13 +261,14 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface
}
// startControllers starts the cloud specific controller loops.
func startControllers(cloud cloudprovider.Interface, ctx genericcontrollermanager.ControllerContext, c *cloudcontrollerconfig.CompletedConfig, stopCh <-chan struct{}, controllers map[string]InitFunc) error {
func startControllers(cloud cloudprovider.Interface, ctx genericcontrollermanager.ControllerContext, c *cloudcontrollerconfig.CompletedConfig, stopCh <-chan struct{}, controllers map[string]InitFunc, healthzHandler *controllerhealthz.MutableHealthzHandler) error {
// Initialize the cloud provider with a reference to the clientBuilder
cloud.Initialize(c.ClientBuilder, stopCh)
// Set the informer on the user cloud object
if informerUserCloud, ok := cloud.(cloudprovider.InformerUser); ok {
informerUserCloud.SetInformers(c.SharedInformers)
}
var controllerChecks []healthz.HealthChecker
for controllerName, initFn := range controllers {
if !genericcontrollermanager.IsControllerEnabled(controllerName, ControllersDisabledByDefault, c.ComponentConfig.Generic.Controllers) {
klog.Warningf("%q is disabled", controllerName)
@ -273,7 +276,7 @@ func startControllers(cloud cloudprovider.Interface, ctx genericcontrollermanage
}
klog.V(1).Infof("Starting %q", controllerName)
_, started, err := initFn(ctx)
ctrl, started, err := initFn(ctx)
if err != nil {
klog.Errorf("Error starting %q", controllerName)
return err
@ -282,11 +285,22 @@ func startControllers(cloud cloudprovider.Interface, ctx genericcontrollermanage
klog.Warningf("Skipping %q", controllerName)
continue
}
check := controllerhealthz.NamedPingChecker(controllerName)
if ctrl != nil {
if healthCheckable, ok := ctrl.(controller.HealthCheckable); ok {
if realCheck := healthCheckable.HealthChecker(); realCheck != nil {
check = controllerhealthz.NamedHealthChecker(controllerName, realCheck)
}
}
}
controllerChecks = append(controllerChecks, check)
klog.Infof("Started %q", controllerName)
time.Sleep(wait.Jitter(c.ComponentConfig.Generic.ControllerStartInterval.Duration, ControllerStartJitter))
}
healthzHandler.AddHealthChecker(controllerChecks...)
// If apiserver is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and controller manager at the same time.
if err := genericcontrollermanager.WaitForAPIServer(c.VersionedClient, 10*time.Second); err != nil {

View File

@ -24,7 +24,6 @@ import (
apirequest "k8s.io/apiserver/pkg/endpoints/request"
apiserver "k8s.io/apiserver/pkg/server"
genericfilters "k8s.io/apiserver/pkg/server/filters"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/server/mux"
"k8s.io/apiserver/pkg/server/routes"
"k8s.io/client-go/kubernetes/scheme"
@ -56,9 +55,9 @@ func BuildHandlerChain(apiHandler http.Handler, authorizationInfo *apiserver.Aut
}
// NewBaseHandler takes in CompletedConfig and returns a handler.
func NewBaseHandler(c *componentbaseconfig.DebuggingConfiguration, checks ...healthz.HealthChecker) *mux.PathRecorderMux {
func NewBaseHandler(c *componentbaseconfig.DebuggingConfiguration, healthzHandler http.Handler) *mux.PathRecorderMux {
mux := mux.NewPathRecorderMux("controller-manager")
healthz.InstallHandler(mux, checks...)
mux.Handle("/healthz", healthzHandler)
if c.EnableProfiling {
routes.Profiling{}.Install(mux)
if c.EnableContentionProfiling {

View File

@ -16,7 +16,11 @@ limitations under the License.
package controller
import "net/http"
import (
"net/http"
"k8s.io/controller-manager/pkg/healthz"
)
// Interface defines the base of a controller managed by a controller manager
type Interface interface {
@ -36,3 +40,16 @@ type Debuggable interface {
// The handler will be accessible at "/debug/controllers/{controllerName}/".
DebuggingHandler() http.Handler
}
// HealthCheckable defines a controller that allows the controller manager
// to include it in the health checks.
//
// If a controller implements HealthCheckable, and the returned check
// is not nil, the controller manager can expose the check to the
// /healthz endpoint.
type HealthCheckable interface {
// HealthChecker returns a UnnamedHealthChecker that the controller manager
// can choose to mount on the /healthz endpoint, or nil if no custom
// health check is desired.
HealthChecker() healthz.UnnamedHealthChecker
}

View File

@ -0,0 +1,68 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package healthz
import (
"net/http"
"sync"
"k8s.io/apiserver/pkg/server/healthz"
"k8s.io/apiserver/pkg/server/mux"
)
// MutableHealthzHandler returns a http.Handler that handles "/healthz"
// following the standard healthz mechanism.
//
// This handler can register health checks after its creation, which
// is originally not allowed with standard healthz handler.
type MutableHealthzHandler struct {
// handler is the underlying handler that will be replaced every time
// new checks are added.
handler http.Handler
// mutex is a RWMutex that allows concurrent health checks (read)
// but disallow replacing the handler at the same time (write).
mutex sync.RWMutex
checks []healthz.HealthChecker
}
func (h *MutableHealthzHandler) ServeHTTP(writer http.ResponseWriter, request *http.Request) {
h.mutex.RLock()
defer h.mutex.RUnlock()
h.handler.ServeHTTP(writer, request)
}
// AddHealthChecker adds health check(s) to the handler.
//
// Every time this function is called, the handler have to be re-initiated.
// It is advised to add as many checks at once as possible.
func (h *MutableHealthzHandler) AddHealthChecker(checks ...healthz.HealthChecker) {
h.mutex.Lock()
defer h.mutex.Unlock()
h.checks = append(h.checks, checks...)
newMux := mux.NewPathRecorderMux("healthz")
healthz.InstallHandler(newMux, h.checks...)
h.handler = newMux
}
func NewMutableHealthzHandler(checks ...healthz.HealthChecker) *MutableHealthzHandler {
h := &MutableHealthzHandler{}
h.AddHealthChecker(checks...)
return h
}

View File

@ -0,0 +1,174 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package healthz
import (
"fmt"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"k8s.io/apiserver/pkg/server/healthz"
)
func TestMutableHealthzHandler(t *testing.T) {
badChecker := healthz.NamedCheck("bad", func(r *http.Request) error {
return fmt.Errorf("bad")
})
for _, tc := range []struct {
name string
checkBatches [][]healthz.HealthChecker
appendBad bool // appends bad check after batches above, and see if it fails afterwards
path string
expectedBody string
expectedStatus int
}{
{
name: "empty",
checkBatches: [][]healthz.HealthChecker{},
path: "/healthz",
expectedBody: "ok",
expectedStatus: http.StatusOK,
},
{
name: "good",
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("good")},
},
path: "/healthz",
expectedBody: "ok",
expectedStatus: http.StatusOK,
},
{
name: "good verbose", // verbose only applies for successful checks
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("good")}, // batch 1: good
},
path: "/healthz?verbose=true",
expectedBody: "[+]good ok\nhealthz check passed\n",
expectedStatus: http.StatusOK,
},
{
name: "good and bad, same batch",
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("good"), badChecker}, // batch 1: good, bad
},
path: "/healthz",
expectedBody: "[+]good ok\n[-]bad failed: reason withheld\nhealthz check failed\n",
expectedStatus: http.StatusInternalServerError,
},
{
name: "good and bad, two batches",
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("good")}, // batch 1: good
{badChecker}, // batch 2: bad
},
path: "/healthz",
expectedBody: "[+]good ok\n[-]bad failed: reason withheld\nhealthz check failed\n",
expectedStatus: http.StatusInternalServerError,
},
{
name: "two checks and append bad",
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("foo"), NamedPingChecker("bar")},
},
path: "/healthz",
expectedBody: "ok",
expectedStatus: http.StatusOK,
appendBad: true,
},
{
name: "subcheck",
checkBatches: [][]healthz.HealthChecker{
{NamedPingChecker("good")}, // batch 1: good
{badChecker}, // batch 2: bad
},
path: "/healthz/good",
expectedBody: "ok",
expectedStatus: http.StatusOK,
},
} {
t.Run(tc.name, func(t *testing.T) {
h := NewMutableHealthzHandler()
for _, batch := range tc.checkBatches {
h.AddHealthChecker(batch...)
}
req, err := http.NewRequest("GET", fmt.Sprintf("https://example.com%v", tc.path), nil)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
w := httptest.NewRecorder()
h.ServeHTTP(w, req)
if w.Code != tc.expectedStatus {
t.Errorf("unexpected status: expected %v, got %v", tc.expectedStatus, w.Result().StatusCode)
}
if w.Body.String() != tc.expectedBody {
t.Errorf("unexpected body: expected %v, got %v", tc.expectedBody, w.Body.String())
}
if tc.appendBad {
h.AddHealthChecker(badChecker)
w := httptest.NewRecorder()
h.ServeHTTP(w, req)
// should fail
if w.Code != http.StatusInternalServerError {
t.Errorf("did not fail after adding bad checker")
}
}
})
}
}
// TestConcurrentChecks tests that the handler would not block on concurrent healthz requests.
func TestConcurrentChecks(t *testing.T) {
const N = 5
stopChan := make(chan interface{})
defer close(stopChan) // always close no matter passing or not
concurrentChan := make(chan interface{}, N)
var concurrentCount int32
pausingCheck := healthz.NamedCheck("pausing", func(r *http.Request) error {
atomic.AddInt32(&concurrentCount, 1)
concurrentChan <- nil
<-stopChan
return nil
})
h := NewMutableHealthzHandler(pausingCheck)
for i := 0; i < N; i++ {
go func() {
req, _ := http.NewRequest(http.MethodGet, "https://example.com/healthz", nil)
w := httptest.NewRecorder()
h.ServeHTTP(w, req)
}()
}
giveUp := time.After(1 * time.Second) // should take <1ms if passing
for i := 0; i < N; i++ {
select {
case <-giveUp:
t.Errorf("given up waiting for concurrent checks to start.")
return
case <-concurrentChan:
continue
}
}
if concurrentCount != N {
t.Errorf("expected %v concurrency, got %v", N, concurrentCount)
}
}

View File

@ -0,0 +1,43 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package healthz
import (
"net/http"
"k8s.io/apiserver/pkg/server/healthz"
)
// NamedPingChecker returns a health check with given name
// that returns no error when checked.
func NamedPingChecker(name string) healthz.HealthChecker {
return NamedHealthChecker(name, healthz.PingHealthz)
}
// NamedHealthChecker creates a named health check from
// an unnamed one.
func NamedHealthChecker(name string, check UnnamedHealthChecker) healthz.HealthChecker {
return healthz.NamedCheck(name, check.Check)
}
// UnnamedHealthChecker is an unnamed healthz checker.
// The name of the check can be set by the controller manager.
type UnnamedHealthChecker interface {
Check(req *http.Request) error
}
var _ UnnamedHealthChecker = (healthz.HealthChecker)(nil)

View File

@ -0,0 +1,41 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package healthz
import (
"fmt"
"net/http"
"testing"
)
type checkWithMessage struct {
message string
}
func (c *checkWithMessage) Check(_ *http.Request) error {
return fmt.Errorf("%s", c.message)
}
func TestNamedHealthChecker(t *testing.T) {
named := NamedHealthChecker("foo", &checkWithMessage{message: "hello"})
if named.Name() != "foo" {
t.Errorf("expected: %v, got: %v", "foo", named.Name())
}
if err := named.Check(nil); err.Error() != "hello" {
t.Errorf("expected: %v, got: %v", "hello", err.Error())
}
}

1
vendor/modules.txt vendored
View File

@ -1974,6 +1974,7 @@ k8s.io/controller-manager/options
k8s.io/controller-manager/pkg/clientbuilder
k8s.io/controller-manager/pkg/features
k8s.io/controller-manager/pkg/features/register
k8s.io/controller-manager/pkg/healthz
k8s.io/controller-manager/pkg/informerfactory
k8s.io/controller-manager/pkg/leadermigration
k8s.io/controller-manager/pkg/leadermigration/config