From aa1b2d6d35c92a31be17357fc66cfc1eca8a67e0 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Mon, 26 Aug 2019 16:57:25 -0700 Subject: [PATCH] add /livez as a liveness endpoint for kube-apiserver go fmt make func private refactor config_test Two primary refactorings: 1. config test checkPath method is now each a distinct test run (which makes it easier to see what is actually failing) 2. TestNewWithDelegate's root path check now parses the json output and does a comparison against a list of expected paths (no more whitespace and ordering issues when updating this test, yay). go fmt modify and simplify existing integration test for readyz/livez simplify integration test set default rbac policy rules for livez rename a few functions and the entrypoint command line argument (and etcetera) simplify interface for installing readyz and livez and make auto-register completion a bootstrapped check untangle some of the nested functions, restructure the code --- cmd/kube-apiserver/app/aggregator.go | 2 +- cmd/kube-apiserver/app/testing/BUILD | 1 - cmd/kube-apiserver/app/testing/testserver.go | 10 -- .../authorizer/rbac/bootstrappolicy/policy.go | 5 +- .../testdata/cluster-roles.yaml | 2 + staging/src/k8s.io/apiserver/pkg/server/BUILD | 1 + .../src/k8s.io/apiserver/pkg/server/config.go | 24 ++-- .../apiserver/pkg/server/config_test.go | 123 ++++++++++++------ .../apiserver/pkg/server/genericapiserver.go | 28 ++-- .../k8s.io/apiserver/pkg/server/healthz.go | 109 ++++++++++------ .../apiserver/pkg/server/healthz/healthz.go | 8 ++ .../src/k8s.io/apiserver/pkg/server/hooks.go | 5 +- .../pkg/server/options/server_run_options.go | 14 +- .../server/options/server_run_options_test.go | 6 +- .../integration/master/kube_apiserver_test.go | 52 +------- 15 files changed, 211 insertions(+), 179 deletions(-) diff --git a/cmd/kube-apiserver/app/aggregator.go b/cmd/kube-apiserver/app/aggregator.go index 54d939677b4..4bf4a0edcb8 100644 --- a/cmd/kube-apiserver/app/aggregator.go +++ b/cmd/kube-apiserver/app/aggregator.go @@ -153,7 +153,7 @@ func createAggregatorServer(aggregatorConfig *aggregatorapiserver.Config, delega return nil, err } - err = aggregatorServer.GenericAPIServer.AddHealthChecks( + err = aggregatorServer.GenericAPIServer.AddBootSequenceHealthChecks( makeAPIServiceAvailableHealthCheck( "autoregister-completion", apiServices, diff --git a/cmd/kube-apiserver/app/testing/BUILD b/cmd/kube-apiserver/app/testing/BUILD index c19fbf699de..758f8fcb0f8 100644 --- a/cmd/kube-apiserver/app/testing/BUILD +++ b/cmd/kube-apiserver/app/testing/BUILD @@ -17,7 +17,6 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apiserver/pkg/registry/generic/registry:go_default_library", - "//staging/src/k8s.io/apiserver/pkg/server/healthz:go_default_library", "//staging/src/k8s.io/apiserver/pkg/storage/storagebackend:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library", diff --git a/cmd/kube-apiserver/app/testing/testserver.go b/cmd/kube-apiserver/app/testing/testserver.go index 10b12c1b867..4a5ac474b7c 100644 --- a/cmd/kube-apiserver/app/testing/testserver.go +++ b/cmd/kube-apiserver/app/testing/testserver.go @@ -31,7 +31,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/registry/generic/registry" - "k8s.io/apiserver/pkg/server/healthz" "k8s.io/apiserver/pkg/storage/storagebackend" "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" @@ -46,8 +45,6 @@ type TearDownFunc func() type TestServerInstanceOptions struct { // DisableStorageCleanup Disable the automatic storage cleanup DisableStorageCleanup bool - // Injected health - InjectedHealthChecker healthz.HealthChecker } // TestServer return values supplied by kube-test-ApiServer @@ -151,13 +148,6 @@ func StartTestServer(t Logger, instanceOptions *TestServerInstanceOptions, custo return result, fmt.Errorf("failed to create server chain: %v", err) } - if instanceOptions.InjectedHealthChecker != nil { - t.Logf("Adding health check with delay %v %v", s.GenericServerRunOptions.MaxStartupSequenceDuration, instanceOptions.InjectedHealthChecker.Name()) - if err := server.GenericAPIServer.AddDelayedHealthzChecks(s.GenericServerRunOptions.MaxStartupSequenceDuration, instanceOptions.InjectedHealthChecker); err != nil { - return result, err - } - } - errCh := make(chan error) go func(stopCh <-chan struct{}) { prepared, err := server.PrepareRun() diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go index 2f9bcb8ad75..d9f6950afde 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go @@ -197,7 +197,8 @@ func ClusterRoles() []rbacv1.ClusterRole { ObjectMeta: metav1.ObjectMeta{Name: "system:discovery"}, Rules: []rbacv1.PolicyRule{ rbacv1helpers.NewRule("get").URLs( - "/readyz", "/healthz", "/version", "/version/", + "/livez", "/readyz", "/healthz", + "/version", "/version/", "/openapi", "/openapi/*", "/api", "/api/*", "/apis", "/apis/*", @@ -217,7 +218,7 @@ func ClusterRoles() []rbacv1.ClusterRole { ObjectMeta: metav1.ObjectMeta{Name: "system:public-info-viewer"}, Rules: []rbacv1.PolicyRule{ rbacv1helpers.NewRule("get").URLs( - "/readyz", "/healthz", "/version", "/version/", + "/livez", "/readyz", "/healthz", "/version", "/version/", ).RuleOrDie(), }, }, diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml index 0ec6f82df57..a9a6ccf9e12 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml @@ -548,6 +548,7 @@ items: - /apis - /apis/* - /healthz + - /livez - /openapi - /openapi/* - /readyz @@ -1185,6 +1186,7 @@ items: rules: - nonResourceURLs: - /healthz + - /livez - /readyz - /version - /version/ diff --git a/staging/src/k8s.io/apiserver/pkg/server/BUILD b/staging/src/k8s.io/apiserver/pkg/server/BUILD index de85f2dcc42..89b071086c9 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/BUILD +++ b/staging/src/k8s.io/apiserver/pkg/server/BUILD @@ -22,6 +22,7 @@ go_test( "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/serializer:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/json:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/net:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", diff --git a/staging/src/k8s.io/apiserver/pkg/server/config.go b/staging/src/k8s.io/apiserver/pkg/server/config.go index 37bd8ba4254..1e553b232b2 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/config.go +++ b/staging/src/k8s.io/apiserver/pkg/server/config.go @@ -139,6 +139,8 @@ type Config struct { DiscoveryAddresses discovery.Addresses // The default set of healthz checks. There might be more added via AddHealthChecks dynamically. HealthzChecks []healthz.HealthChecker + // The default set of livez checks. There might be more added via AddHealthChecks dynamically. + LivezChecks []healthz.HealthChecker // The default set of readyz-only checks. There might be more added via AddReadyzChecks dynamically. ReadyzChecks []healthz.HealthChecker // LegacyAPIGroupPrefixes is used to set up URL parsing for authorization and for validating requests @@ -165,9 +167,9 @@ type Config struct { // This represents the maximum amount of time it should take for apiserver to complete its startup // sequence and become healthy. From apiserver's start time to when this amount of time has - // elapsed, /healthz will assume that unfinished post-start hooks will complete successfully and + // elapsed, /livez will assume that unfinished post-start hooks will complete successfully and // therefore return true. - MaxStartupSequenceDuration time.Duration + LivezGracePeriod time.Duration // ShutdownDelayDuration allows to block shutdown for some time, e.g. until endpoints pointing to this API server // have converged on all node. During this time, the API server keeps serving, /healthz will return 200, // but /readyz will return failure. @@ -278,6 +280,7 @@ func NewConfig(codecs serializer.CodecFactory) *Config { DisabledPostStartHooks: sets.NewString(), HealthzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...), ReadyzChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...), + LivezChecks: append([]healthz.HealthChecker{}, defaultHealthChecks...), EnableIndex: true, EnableDiscovery: true, EnableProfiling: true, @@ -286,7 +289,7 @@ func NewConfig(codecs serializer.CodecFactory) *Config { MaxMutatingRequestsInFlight: 200, RequestTimeout: time.Duration(60) * time.Second, MinRequestTimeout: 1800, - MaxStartupSequenceDuration: time.Duration(0), + LivezGracePeriod: time.Duration(0), ShutdownDelayDuration: time.Duration(0), // 10MB is the recommended maximum client request size in bytes // the etcd server should accept. See @@ -509,15 +512,16 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G preShutdownHooks: map[string]preShutdownHookEntry{}, disabledPostStartHooks: c.DisabledPostStartHooks, - healthzChecks: c.HealthzChecks, - readyzChecks: c.ReadyzChecks, - readinessStopCh: make(chan struct{}), - maxStartupSequenceDuration: c.MaxStartupSequenceDuration, + healthzChecks: c.HealthzChecks, + livezChecks: c.LivezChecks, + readyzChecks: c.ReadyzChecks, + readinessStopCh: make(chan struct{}), + livezGracePeriod: c.LivezGracePeriod, DiscoveryGroupManager: discovery.NewRootAPIsHandler(c.DiscoveryAddresses, c.Serializer), maxRequestBodyBytes: c.MaxRequestBodyBytes, - healthzClock: clock.RealClock{}, + livezClock: clock.RealClock{}, } for { @@ -563,9 +567,7 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G if skip { continue } - - s.healthzChecks = append(s.healthzChecks, delegateCheck) - s.readyzChecks = append(s.readyzChecks, delegateCheck) + s.AddHealthChecks(delegateCheck) } s.listedPathProvider = routes.ListedPathProviders{s.listedPathProvider, delegationTarget} diff --git a/staging/src/k8s.io/apiserver/pkg/server/config_test.go b/staging/src/k8s.io/apiserver/pkg/server/config_test.go index f1ceb742c98..bf9daf844b6 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/config_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/config_test.go @@ -26,6 +26,7 @@ import ( "reflect" "testing" + "k8s.io/apimachinery/pkg/util/json" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apiserver/pkg/server/healthz" "k8s.io/client-go/informers" @@ -135,31 +136,36 @@ func TestNewWithDelegate(t *testing.T) { // Wait for the hooks to finish before checking the response <-delegatePostStartHookChan <-wrappingPostStartHookChan - - checkPath(server.URL, http.StatusOK, `{ - "paths": [ - "/apis", - "/bar", - "/foo", - "/healthz", - "/healthz/delegate-health", - "/healthz/log", - "/healthz/ping", - "/healthz/poststarthook/delegate-post-start-hook", - "/healthz/poststarthook/generic-apiserver-start-informers", - "/healthz/poststarthook/wrapping-post-start-hook", - "/healthz/wrapping-health", - "/metrics", - "/readyz", - "/readyz/delegate-health", - "/readyz/log", - "/readyz/ping", - "/readyz/poststarthook/delegate-post-start-hook", - "/readyz/poststarthook/generic-apiserver-start-informers", - "/readyz/poststarthook/wrapping-post-start-hook", - "/readyz/shutdown" - ] -}`, t) + expectedPaths := []string{ + "/apis", + "/bar", + "/foo", + "/healthz", + "/healthz/delegate-health", + "/healthz/log", + "/healthz/ping", + "/healthz/poststarthook/delegate-post-start-hook", + "/healthz/poststarthook/generic-apiserver-start-informers", + "/healthz/poststarthook/wrapping-post-start-hook", + "/healthz/wrapping-health", + "/livez", + "/livez/delegate-health", + "/livez/log", + "/livez/ping", + "/livez/poststarthook/delegate-post-start-hook", + "/livez/poststarthook/generic-apiserver-start-informers", + "/livez/poststarthook/wrapping-post-start-hook", + "/metrics", + "/readyz", + "/readyz/delegate-health", + "/readyz/log", + "/readyz/ping", + "/readyz/poststarthook/delegate-post-start-hook", + "/readyz/poststarthook/generic-apiserver-start-informers", + "/readyz/poststarthook/wrapping-post-start-hook", + "/readyz/shutdown", + } + checkExpectedPathsAtRoot(server.URL, expectedPaths, t) checkPath(server.URL+"/healthz", http.StatusInternalServerError, `[+]ping ok [+]log ok [-]wrapping-health failed: reason withheld @@ -181,22 +187,57 @@ healthz check failed } func checkPath(url string, expectedStatusCode int, expectedBody string, t *testing.T) { - resp, err := http.Get(url) - if err != nil { - t.Fatal(err) - } - dump, _ := httputil.DumpResponse(resp, true) - t.Log(string(dump)) + t.Run(url, func(t *testing.T) { + resp, err := http.Get(url) + if err != nil { + t.Fatal(err) + } + dump, _ := httputil.DumpResponse(resp, true) + t.Log(string(dump)) - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } - if e, a := expectedBody, string(body); e != a { - t.Errorf("%q expected %v, got %v", url, e, a) - } - if e, a := expectedStatusCode, resp.StatusCode; e != a { - t.Errorf("%q expected %v, got %v", url, e, a) - } + if e, a := expectedBody, string(body); e != a { + t.Errorf("%q expected %v, got %v", url, e, a) + } + if e, a := expectedStatusCode, resp.StatusCode; e != a { + t.Errorf("%q expected %v, got %v", url, e, a) + } + }) +} + +func checkExpectedPathsAtRoot(url string, expectedPaths []string, t *testing.T) { + t.Run(url, func(t *testing.T) { + resp, err := http.Get(url) + if err != nil { + t.Fatal(err) + } + dump, _ := httputil.DumpResponse(resp, true) + t.Log(string(dump)) + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + var result map[string]interface{} + json.Unmarshal(body, &result) + paths, ok := result["paths"].([]interface{}) + if !ok { + t.Errorf("paths not found") + } + pathset := sets.NewString() + for _, p := range paths { + pathset.Insert(p.(string)) + } + expectedset := sets.NewString(expectedPaths...) + for _, p := range pathset.Difference(expectedset) { + t.Errorf("Got %v path, which we did not expect", p) + } + for _, p := range expectedset.Difference(pathset) { + t.Errorf(" Expected %v path which we did not get", p) + } + }) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/genericapiserver.go b/staging/src/k8s.io/apiserver/pkg/server/genericapiserver.go index 53b770c93d6..8262f2ea2fd 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/genericapiserver.go +++ b/staging/src/k8s.io/apiserver/pkg/server/genericapiserver.go @@ -146,14 +146,19 @@ type GenericAPIServer struct { preShutdownHooksCalled bool // healthz checks - healthzLock sync.Mutex - healthzChecks []healthz.HealthChecker - healthzChecksInstalled bool - readyzLock sync.Mutex - readyzChecks []healthz.HealthChecker - readyzChecksInstalled bool - maxStartupSequenceDuration time.Duration - healthzClock clock.Clock + healthzLock sync.Mutex + healthzChecks []healthz.HealthChecker + healthzChecksInstalled bool + // livez checks + livezLock sync.Mutex + livezChecks []healthz.HealthChecker + livezChecksInstalled bool + // readyz checks + readyzLock sync.Mutex + readyzChecks []healthz.HealthChecker + readyzChecksInstalled bool + livezGracePeriod time.Duration + livezClock clock.Clock // the readiness stop channel is used to signal that the apiserver has initiated a shutdown sequence, this // will cause readyz to return unhealthy. readinessStopCh chan struct{} @@ -281,7 +286,12 @@ func (s *GenericAPIServer) PrepareRun() preparedGenericAPIServer { } s.installHealthz() - s.installReadyz(s.readinessStopCh) + s.installLivez() + err := s.addReadyzShutdownCheck(s.readinessStopCh) + if err != nil { + klog.Errorf("Failed to install readyz shutdown check %s", err) + } + s.installReadyz() // Register audit backend preShutdownHook. if s.AuditBackend != nil { diff --git a/staging/src/k8s.io/apiserver/pkg/server/healthz.go b/staging/src/k8s.io/apiserver/pkg/server/healthz.go index afd4188c578..645886949bb 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/healthz.go +++ b/staging/src/k8s.io/apiserver/pkg/server/healthz.go @@ -25,51 +25,93 @@ import ( "k8s.io/apiserver/pkg/server/healthz" ) -// AddHealthChecks adds HealthzCheck(s) to both healthz and readyz. All healthz checks -// are automatically added to readyz, since we want to avoid the situation where the -// apiserver is ready but not live. +// AddHealthChecks adds HealthCheck(s) to health endpoints (healthz, livez, readyz) but +// configures the liveness grace period to be zero, which means we expect this health check +// to immediately indicate that the apiserver is unhealthy. func (s *GenericAPIServer) AddHealthChecks(checks ...healthz.HealthChecker) error { - return s.AddDelayedHealthzChecks(0, checks...) + // we opt for a delay of zero here, because this entrypoint adds generic health checks + // and not health checks which are specifically related to kube-apiserver boot-sequences. + return s.addHealthChecks(0, checks...) } -// AddReadyzChecks allows you to add a HealthzCheck to readyz. -func (s *GenericAPIServer) AddReadyzChecks(checks ...healthz.HealthChecker) error { +// AddBootSequenceHealthChecks adds health checks to the old healthz endpoint (for backwards compatibility reasons) +// as well as livez and readyz. The livez grace period is defined by the value of the +// command-line flag --livez-grace-period; before the grace period elapses, the livez health checks +// will default to healthy. One may want to set a grace period in order to prevent the kubelet from restarting +// the kube-apiserver due to long-ish boot sequences. Readyz health checks, on the other hand, have no grace period, +// since readyz should fail until boot fully completes. +func (s *GenericAPIServer) AddBootSequenceHealthChecks(checks ...healthz.HealthChecker) error { + return s.addHealthChecks(s.livezGracePeriod, checks...) +} + +// addHealthChecks adds health checks to healthz, livez, and readyz. The delay passed in will set +// a corresponding grace period on livez. +func (s *GenericAPIServer) addHealthChecks(livezGracePeriod time.Duration, checks ...healthz.HealthChecker) error { + s.healthzLock.Lock() + defer s.healthzLock.Unlock() + if s.healthzChecksInstalled { + return fmt.Errorf("unable to add because the healthz endpoint has already been created") + } + s.healthzChecks = append(s.healthzChecks, checks...) + return s.addLivezChecks(livezGracePeriod, checks...) +} + +// addReadyzChecks allows you to add a HealthCheck to readyz. +func (s *GenericAPIServer) addReadyzChecks(checks ...healthz.HealthChecker) error { s.readyzLock.Lock() defer s.readyzLock.Unlock() - return s.addReadyzChecks(checks...) -} - -// addReadyzChecks allows you to add a HealthzCheck to readyz. -// premise: readyzLock has been obtained -func (s *GenericAPIServer) addReadyzChecks(checks ...healthz.HealthChecker) error { if s.readyzChecksInstalled { return fmt.Errorf("unable to add because the readyz endpoint has already been created") } - s.readyzChecks = append(s.readyzChecks, checks...) return nil } +// addLivezChecks allows you to add a HealthCheck to livez. It will also automatically add a check to readyz, +// since we want to avoid being ready when we are not live. +func (s *GenericAPIServer) addLivezChecks(delay time.Duration, checks ...healthz.HealthChecker) error { + s.livezLock.Lock() + defer s.livezLock.Unlock() + if s.livezChecksInstalled { + return fmt.Errorf("unable to add because the livez endpoint has already been created") + } + for _, check := range checks { + s.livezChecks = append(s.livezChecks, delayedHealthCheck(check, s.livezClock, delay)) + } + return s.addReadyzChecks(checks...) +} + +// addReadyzShutdownCheck is a convenience function for adding a readyz shutdown check, so +// that we can register that the api-server is no longer ready while we attempt to gracefully +// shutdown. +func (s *GenericAPIServer) addReadyzShutdownCheck(stopCh <-chan struct{}) error { + return s.addReadyzChecks(shutdownCheck{stopCh}) +} + // installHealthz creates the healthz endpoint for this server func (s *GenericAPIServer) installHealthz() { s.healthzLock.Lock() defer s.healthzLock.Unlock() s.healthzChecksInstalled = true - healthz.InstallHandler(s.Handler.NonGoRestfulMux, s.healthzChecks...) } // installReadyz creates the readyz endpoint for this server. -func (s *GenericAPIServer) installReadyz(stopCh <-chan struct{}) { +func (s *GenericAPIServer) installReadyz() { s.readyzLock.Lock() defer s.readyzLock.Unlock() - s.addReadyzChecks(shutdownCheck{stopCh}) - s.readyzChecksInstalled = true - healthz.InstallReadyzHandler(s.Handler.NonGoRestfulMux, s.readyzChecks...) } +// installLivez creates the livez endpoint for this server. +func (s *GenericAPIServer) installLivez() { + s.livezLock.Lock() + defer s.livezLock.Unlock() + s.livezChecksInstalled = true + healthz.InstallLivezHandler(s.Handler.NonGoRestfulMux, s.livezChecks...) +} + // shutdownCheck fails if the embedded channel is closed. This is intended to allow for graceful shutdown sequences // for the apiserver. type shutdownCheck struct { @@ -89,46 +131,27 @@ func (c shutdownCheck) Check(req *http.Request) error { return nil } -// AddDelayedHealthzChecks adds a health check to both healthz and readyz. The delay parameter -// allows you to set the grace period for healthz checks, which will return healthy while -// grace period has not yet elapsed. One may want to set a grace period in order to prevent -// the kubelet from restarting the kube-apiserver due to long-ish boot sequences. Readyz health -// checks have no grace period, since we want readyz to fail while boot has not completed. -func (s *GenericAPIServer) AddDelayedHealthzChecks(delay time.Duration, checks ...healthz.HealthChecker) error { - s.healthzLock.Lock() - defer s.healthzLock.Unlock() - if s.healthzChecksInstalled { - return fmt.Errorf("unable to add because the healthz endpoint has already been created") - } - for _, check := range checks { - s.healthzChecks = append(s.healthzChecks, delayedHealthCheck(check, s.healthzClock, s.maxStartupSequenceDuration)) - } - - s.readyzLock.Lock() - defer s.readyzLock.Unlock() - return s.addReadyzChecks(checks...) -} - -// delayedHealthCheck wraps a health check which will not fail until the explicitly defined delay has elapsed. +// delayedHealthCheck wraps a health check which will not fail until the explicitly defined delay has elapsed. This +// is intended for use primarily for livez health checks. func delayedHealthCheck(check healthz.HealthChecker, clock clock.Clock, delay time.Duration) healthz.HealthChecker { - return delayedHealthzCheck{ + return delayedLivezCheck{ check, clock.Now().Add(delay), clock, } } -type delayedHealthzCheck struct { +type delayedLivezCheck struct { check healthz.HealthChecker startCheck time.Time clock clock.Clock } -func (c delayedHealthzCheck) Name() string { +func (c delayedLivezCheck) Name() string { return c.check.Name() } -func (c delayedHealthzCheck) Check(req *http.Request) error { +func (c delayedLivezCheck) Check(req *http.Request) error { if c.clock.Now().After(c.startCheck) { return c.check.Check(req) } diff --git a/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go b/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go index a8d0cefa1b9..a55e201e704 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go +++ b/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go @@ -101,6 +101,14 @@ func InstallReadyzHandler(mux mux, checks ...HealthChecker) { InstallPathHandler(mux, "/readyz", checks...) } +// InstallLivezHandler registers handlers for liveness checking on the path +// "/livez" to mux. *All handlers* for mux must be specified in +// exactly one call to InstallHandler. Calling InstallHandler more +// than once for the same mux will result in a panic. +func InstallLivezHandler(mux mux, checks ...HealthChecker) { + InstallPathHandler(mux, "/livez", checks...) +} + // InstallPathHandler registers handlers for health checking on // a specific path to mux. *All handlers* for the path must be // specified in exactly one call to InstallPathHandler. Calling diff --git a/staging/src/k8s.io/apiserver/pkg/server/hooks.go b/staging/src/k8s.io/apiserver/pkg/server/hooks.go index 46585efdfc0..fba876d2573 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/hooks.go +++ b/staging/src/k8s.io/apiserver/pkg/server/hooks.go @@ -22,12 +22,11 @@ import ( "net/http" "runtime/debug" - "k8s.io/klog" - utilerrors "k8s.io/apimachinery/pkg/util/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apiserver/pkg/server/healthz" restclient "k8s.io/client-go/rest" + "k8s.io/klog" ) // PostStartHookFunc is a function that is called after the server has started. @@ -97,7 +96,7 @@ func (s *GenericAPIServer) AddPostStartHook(name string, hook PostStartHookFunc) // done is closed when the poststarthook is finished. This is used by the health check to be able to indicate // that the poststarthook is finished done := make(chan struct{}) - if err := s.AddDelayedHealthzChecks(s.maxStartupSequenceDuration, postStartHookHealthz{name: "poststarthook/" + name, done: done}); err != nil { + if err := s.AddBootSequenceHealthChecks(postStartHookHealthz{name: "poststarthook/" + name, done: done}); err != nil { return err } s.postStartHooks[name] = postStartHookEntry{hook: hook, originatingStack: string(debug.Stack()), done: done} diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options.go b/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options.go index d6d9909da9b..f68c11dbf22 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options.go @@ -41,7 +41,7 @@ type ServerRunOptions struct { MaxRequestsInFlight int MaxMutatingRequestsInFlight int RequestTimeout time.Duration - MaxStartupSequenceDuration time.Duration + LivezGracePeriod time.Duration MinRequestTimeout int ShutdownDelayDuration time.Duration // We intentionally did not add a flag for this option. Users of the @@ -62,7 +62,7 @@ func NewServerRunOptions() *ServerRunOptions { MaxRequestsInFlight: defaults.MaxRequestsInFlight, MaxMutatingRequestsInFlight: defaults.MaxMutatingRequestsInFlight, RequestTimeout: defaults.RequestTimeout, - MaxStartupSequenceDuration: defaults.MaxStartupSequenceDuration, + LivezGracePeriod: defaults.LivezGracePeriod, MinRequestTimeout: defaults.MinRequestTimeout, ShutdownDelayDuration: defaults.ShutdownDelayDuration, JSONPatchMaxCopyBytes: defaults.JSONPatchMaxCopyBytes, @@ -76,7 +76,7 @@ func (s *ServerRunOptions) ApplyTo(c *server.Config) error { c.ExternalAddress = s.ExternalHost c.MaxRequestsInFlight = s.MaxRequestsInFlight c.MaxMutatingRequestsInFlight = s.MaxMutatingRequestsInFlight - c.MaxStartupSequenceDuration = s.MaxStartupSequenceDuration + c.LivezGracePeriod = s.LivezGracePeriod c.RequestTimeout = s.RequestTimeout c.MinRequestTimeout = s.MinRequestTimeout c.ShutdownDelayDuration = s.ShutdownDelayDuration @@ -112,8 +112,8 @@ func (s *ServerRunOptions) Validate() []error { errors = append(errors, fmt.Errorf("--target-ram-mb can not be negative value")) } - if s.MaxStartupSequenceDuration < 0 { - errors = append(errors, fmt.Errorf("--maximum-startup-sequence-duration can not be a negative value")) + if s.LivezGracePeriod < 0 { + errors = append(errors, fmt.Errorf("--livez-grace-period can not be a negative value")) } if s.EnableInfightQuotaHandler { @@ -199,9 +199,9 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) { "it out. This is the default request timeout for requests but may be overridden by flags such as "+ "--min-request-timeout for specific types of requests.") - fs.DurationVar(&s.MaxStartupSequenceDuration, "maximum-startup-sequence-duration", s.MaxStartupSequenceDuration, ""+ + fs.DurationVar(&s.LivezGracePeriod, "livez-grace-period", s.LivezGracePeriod, ""+ "This option represents the maximum amount of time it should take for apiserver to complete its startup sequence "+ - "and become healthy. From apiserver's start time to when this amount of time has elapsed, /healthz will assume "+ + "and become live. From apiserver's start time to when this amount of time has elapsed, /livez will assume "+ "that unfinished post-start hooks will complete successfully and therefore return true.") fs.IntVar(&s.MinRequestTimeout, "min-request-timeout", s.MinRequestTimeout, ""+ diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options_test.go b/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options_test.go index 9fe66f18cce..2661128b7bc 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/server_run_options_test.go @@ -137,7 +137,7 @@ func TestServerRunOptionsValidate(t *testing.T) { expectErr: "--max-resource-write-bytes can not be negative value", }, { - name: "Test when MaxStartupSequenceDuration is negative value", + name: "Test when LivezGracePeriod is negative value", testOptions: &ServerRunOptions{ AdvertiseAddress: net.ParseIP("192.168.10.10"), CorsAllowedOriginList: []string{"10.10.10.100", "10.10.10.200"}, @@ -148,9 +148,9 @@ func TestServerRunOptionsValidate(t *testing.T) { JSONPatchMaxCopyBytes: 10 * 1024 * 1024, MaxRequestBodyBytes: 10 * 1024 * 1024, TargetRAMMB: 65536, - MaxStartupSequenceDuration: -time.Second, + LivezGracePeriod: -time.Second, }, - expectErr: "--maximum-startup-sequence-duration can not be a negative value", + expectErr: "--livez-grace-period can not be a negative value", }, { name: "Test when MinimalShutdownDuration is negative value", diff --git a/test/integration/master/kube_apiserver_test.go b/test/integration/master/kube_apiserver_test.go index 29b84537dc3..b1228e8eb10 100644 --- a/test/integration/master/kube_apiserver_test.go +++ b/test/integration/master/kube_apiserver_test.go @@ -22,7 +22,6 @@ import ( "net/http" "reflect" "strings" - "sync" "testing" "time" @@ -93,35 +92,16 @@ func endpointReturnsStatusOK(client *kubernetes.Clientset, path string) bool { return status == http.StatusOK } -func TestStartupSequenceHealthzAndReadyz(t *testing.T) { - hc := &delayedCheck{} - instanceOptions := &kubeapiservertesting.TestServerInstanceOptions{ - InjectedHealthChecker: hc, - } - server := kubeapiservertesting.StartTestServerOrDie(t, instanceOptions, []string{"--maximum-startup-sequence-duration", "15s"}, framework.SharedEtcd()) +func TestLivezAndReadyz(t *testing.T) { + server := kubeapiservertesting.StartTestServerOrDie(t, nil, []string{"--livez-grace-period", "0s"}, framework.SharedEtcd()) defer server.TearDownFn() client, err := kubernetes.NewForConfig(server.ClientConfig) if err != nil { t.Fatalf("unexpected error: %v", err) } - - if endpointReturnsStatusOK(client, "/readyz") { - t.Fatalf("readyz should start unready") - } - // we need to wait longer than our grace period - err = wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) { - return !endpointReturnsStatusOK(client, "/healthz"), nil - }) - if err != nil { - t.Fatalf("healthz should have become unhealthy: %v", err) - } - hc.makeHealthy() - err = wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) { - return endpointReturnsStatusOK(client, "/healthz"), nil - }) - if err != nil { - t.Fatalf("healthz should have become healthy again: %v", err) + if !endpointReturnsStatusOK(client, "/livez") { + t.Fatalf("livez should be healthy") } if !endpointReturnsStatusOK(client, "/readyz") { t.Fatalf("readyz should be healthy") @@ -296,27 +276,3 @@ func TestReconcilerMasterLeaseMultiMoreMasters(t *testing.T) { func TestReconcilerMasterLeaseMultiCombined(t *testing.T) { testReconcilersMasterLease(t, 3, 3) } - -type delayedCheck struct { - healthLock sync.Mutex - isHealthy bool -} - -func (h *delayedCheck) Name() string { - return "delayed-check" -} - -func (h *delayedCheck) Check(req *http.Request) error { - h.healthLock.Lock() - defer h.healthLock.Unlock() - if h.isHealthy { - return nil - } - return fmt.Errorf("isn't healthy") -} - -func (h *delayedCheck) makeHealthy() { - h.healthLock.Lock() - defer h.healthLock.Unlock() - h.isHealthy = true -}