From 7710128636a16c73045291d4729675339a7d57f7 Mon Sep 17 00:00:00 2001 From: Rita Zhang Date: Mon, 11 Sep 2023 16:47:29 -0700 Subject: [PATCH] kms: remove livez check Signed-off-by: Rita Zhang --- .../apiserver/pkg/server/options/etcd.go | 9 ++- .../apiserver/pkg/server/options/etcd_test.go | 80 +++++++++++++------ .../transformation/kms_transformation_test.go | 5 ++ .../transformation/transformation_test.go | 40 +++++++++- 4 files changed, 104 insertions(+), 30 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/etcd.go b/staging/src/k8s.io/apiserver/pkg/server/options/etcd.go index 2d8741ebd1f..d4f75ddd721 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/etcd.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/etcd.go @@ -332,18 +332,23 @@ func (s *EtcdOptions) maybeApplyResourceTransformers(c *server.Config) (err erro c.ResourceTransformers = dynamicTransformers if !s.SkipHealthEndpoints { - c.AddHealthChecks(dynamicTransformers) + addHealthChecksWithoutLivez(c, dynamicTransformers) } } else { c.ResourceTransformers = encryptionconfig.StaticTransformers(encryptionConfiguration.Transformers) if !s.SkipHealthEndpoints { - c.AddHealthChecks(encryptionConfiguration.HealthChecks...) + addHealthChecksWithoutLivez(c, encryptionConfiguration.HealthChecks...) } } return nil } +func addHealthChecksWithoutLivez(c *server.Config, healthChecks ...healthz.HealthChecker) { + c.HealthzChecks = append(c.HealthzChecks, healthChecks...) + c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...) +} + func (s *EtcdOptions) addEtcdHealthEndpoint(c *server.Config) error { healthCheck, err := storagefactory.CreateHealthCheck(s.StorageConfig, c.DrainedNotify()) if err != nil { diff --git a/staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go b/staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go index d10474248b4..89a1251bc1e 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go @@ -234,63 +234,85 @@ func TestKMSHealthzEndpoint(t *testing.T) { testCases := []struct { name string encryptionConfigPath string - wantChecks []string + wantHealthzChecks []string + wantReadyzChecks []string + wantLivezChecks []string skipHealth bool reload bool }{ { - name: "no kms-provider, expect no kms healthz check", + name: "no kms-provider, expect no kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/no-kms-provider.yaml", - wantChecks: []string{"etcd"}, + wantHealthzChecks: []string{"etcd"}, + wantReadyzChecks: []string{"etcd", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "no kms-provider+reload, expect single kms healthz check", + name: "no kms-provider+reload, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/no-kms-provider.yaml", reload: true, - wantChecks: []string{"etcd", "kms-providers"}, + wantHealthzChecks: []string{"etcd", "kms-providers"}, + wantReadyzChecks: []string{"etcd", "kms-providers", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "single kms-provider, expect single kms healthz check", + name: "single kms-provider, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/single-kms-provider.yaml", - wantChecks: []string{"etcd", "kms-provider-0"}, + wantHealthzChecks: []string{"etcd", "kms-provider-0"}, + wantReadyzChecks: []string{"etcd", "kms-provider-0", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "two kms-providers, expect two kms healthz checks", + name: "two kms-providers, expect two kms healthz checks, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-providers.yaml", - wantChecks: []string{"etcd", "kms-provider-0", "kms-provider-1"}, + wantHealthzChecks: []string{"etcd", "kms-provider-0", "kms-provider-1"}, + wantReadyzChecks: []string{"etcd", "kms-provider-0", "kms-provider-1", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "two kms-providers+reload, expect single kms healthz check", + name: "two kms-providers+reload, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-providers.yaml", reload: true, - wantChecks: []string{"etcd", "kms-providers"}, + wantHealthzChecks: []string{"etcd", "kms-providers"}, + wantReadyzChecks: []string{"etcd", "kms-providers", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "kms v1+v2, expect three kms healthz checks", + name: "kms v1+v2, expect three kms healthz checks, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-providers-with-v2.yaml", - wantChecks: []string{"etcd", "kms-provider-0", "kms-provider-1", "kms-provider-2"}, + wantHealthzChecks: []string{"etcd", "kms-provider-0", "kms-provider-1", "kms-provider-2"}, + wantReadyzChecks: []string{"etcd", "kms-provider-0", "kms-provider-1", "kms-provider-2", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "kms v1+v2+reload, expect single kms healthz check", + name: "kms v1+v2+reload, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-providers-with-v2.yaml", reload: true, - wantChecks: []string{"etcd", "kms-providers"}, + wantHealthzChecks: []string{"etcd", "kms-providers"}, + wantReadyzChecks: []string{"etcd", "kms-providers", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "multiple kms v2, expect single kms healthz check", + name: "multiple kms v2, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-v2-providers.yaml", - wantChecks: []string{"etcd", "kms-providers"}, + wantHealthzChecks: []string{"etcd", "kms-providers"}, + wantReadyzChecks: []string{"etcd", "kms-providers", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "multiple kms v2+reload, expect single kms healthz check", + name: "multiple kms v2+reload, expect single kms healthz check, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-v2-providers.yaml", reload: true, - wantChecks: []string{"etcd", "kms-providers"}, + wantHealthzChecks: []string{"etcd", "kms-providers"}, + wantReadyzChecks: []string{"etcd", "kms-providers", "etcd-readiness"}, + wantLivezChecks: []string{"etcd"}, }, { - name: "two kms-providers with skip, expect zero kms healthz checks", + name: "two kms-providers with skip, expect zero kms healthz checks, no kms livez check", encryptionConfigPath: "testdata/encryption-configs/multiple-kms-providers.yaml", - wantChecks: nil, + wantHealthzChecks: nil, + wantReadyzChecks: nil, + wantLivezChecks: nil, skipHealth: true, }, } @@ -310,7 +332,9 @@ func TestKMSHealthzEndpoint(t *testing.T) { t.Fatalf("Failed to add healthz error: %v", err) } - healthChecksAreEqual(t, tc.wantChecks, serverConfig.HealthzChecks) + healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz") + healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz") + healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez") }) } } @@ -320,17 +344,20 @@ func TestReadinessCheck(t *testing.T) { name string wantReadyzChecks []string wantHealthzChecks []string + wantLivezChecks []string skipHealth bool }{ { name: "Readyz should have etcd-readiness check", wantReadyzChecks: []string{"etcd", "etcd-readiness"}, wantHealthzChecks: []string{"etcd"}, + wantLivezChecks: []string{"etcd"}, }, { name: "skip health, Readyz should not have etcd-readiness check", wantReadyzChecks: nil, wantHealthzChecks: nil, + wantLivezChecks: nil, skipHealth: true, }, } @@ -346,13 +373,14 @@ func TestReadinessCheck(t *testing.T) { t.Fatalf("Failed to add healthz error: %v", err) } - healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks) - healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks) + healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz") + healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz") + healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez") }) } } -func healthChecksAreEqual(t *testing.T, want []string, healthChecks []healthz.HealthChecker) { +func healthChecksAreEqual(t *testing.T, want []string, healthChecks []healthz.HealthChecker, checkerType string) { t.Helper() wantSet := sets.NewString(want...) @@ -365,6 +393,6 @@ func healthChecksAreEqual(t *testing.T, want []string, healthChecks []healthz.He gotSet.Delete("log", "ping") // not relevant for our tests if !wantSet.Equal(gotSet) { - t.Errorf("healthz checks are not equal, missing=%q, extra=%q", wantSet.Difference(gotSet).List(), gotSet.Difference(wantSet).List()) + t.Errorf("%s checks are not equal, missing=%q, extra=%q", checkerType, wantSet.Difference(gotSet).List(), gotSet.Difference(wantSet).List()) } } diff --git a/test/integration/controlplane/transformation/kms_transformation_test.go b/test/integration/controlplane/transformation/kms_transformation_test.go index 6985f7f563e..941234f9b46 100644 --- a/test/integration/controlplane/transformation/kms_transformation_test.go +++ b/test/integration/controlplane/transformation/kms_transformation_test.go @@ -1028,6 +1028,8 @@ resources: // healthz/kms-provider-0 and /healthz/kms-provider-1 should be OK. mustBeHealthy(t, "/kms-provider-0", "ok", test.kubeAPIServer.ClientConfig) mustBeHealthy(t, "/kms-provider-1", "ok", test.kubeAPIServer.ClientConfig) + mustNotHaveLivez(t, "/kms-provider-0", "404 page not found", test.kubeAPIServer.ClientConfig) + mustNotHaveLivez(t, "/kms-provider-1", "404 page not found", test.kubeAPIServer.ClientConfig) // Stage 2 - kms-plugin for provider-1 is down. Therefore, expect the healthz check // to fail and report that provider-1 is down @@ -1035,7 +1037,10 @@ resources: mustBeUnHealthy(t, "/kms-provider-0", "internal server error: rpc error: code = FailedPrecondition desc = failed precondition - key disabled", test.kubeAPIServer.ClientConfig) + + mustNotHaveLivez(t, "/kms-provider-0", "404 page not found", test.kubeAPIServer.ClientConfig) mustBeHealthy(t, "/kms-provider-1", "ok", test.kubeAPIServer.ClientConfig) + mustNotHaveLivez(t, "/kms-provider-1", "404 page not found", test.kubeAPIServer.ClientConfig) pluginMock1.ExitFailedState() // Stage 3 - kms-plugin for provider-1 is now up. Therefore, expect the health check for provider-1 diff --git a/test/integration/controlplane/transformation/transformation_test.go b/test/integration/controlplane/transformation/transformation_test.go index a5fe45af6a5..45d7f0ec7b2 100644 --- a/test/integration/controlplane/transformation/transformation_test.go +++ b/test/integration/controlplane/transformation/transformation_test.go @@ -132,6 +132,7 @@ func newTransformTest(l kubeapiservertesting.Logger, transformerConfigYAML strin if transformerConfigYAML != "" && reload { // when reloading is enabled, this healthz endpoint is always present mustBeHealthy(l, "/kms-providers", "ok", e.kubeAPIServer.ClientConfig) + mustNotHaveLivez(l, "/kms-providers", "404 page not found", e.kubeAPIServer.ClientConfig) // excluding healthz endpoints even if they do not exist should work mustBeHealthy(l, "", `warn: some health checks cannot be excluded: no matches for "kms-provider-0","kms-provider-1","kms-provider-2","kms-provider-3"`, @@ -550,7 +551,7 @@ func (e *transformTest) printMetrics() error { func mustBeHealthy(t kubeapiservertesting.Logger, checkName, wantBodyContains string, clientConfig *rest.Config, excludes ...string) { t.Helper() var restErr error - pollErr := wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + pollErr := wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { body, ok, err := getHealthz(checkName, clientConfig, excludes...) restErr = err if err != nil { @@ -571,7 +572,7 @@ func mustBeHealthy(t kubeapiservertesting.Logger, checkName, wantBodyContains st func mustBeUnHealthy(t kubeapiservertesting.Logger, checkName, wantBodyContains string, clientConfig *rest.Config, excludes ...string) { t.Helper() var restErr error - pollErr := wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + pollErr := wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { body, ok, err := getHealthz(checkName, clientConfig, excludes...) restErr = err if err != nil { @@ -589,6 +590,27 @@ func mustBeUnHealthy(t kubeapiservertesting.Logger, checkName, wantBodyContains } } +func mustNotHaveLivez(t kubeapiservertesting.Logger, checkName, wantBodyContains string, clientConfig *rest.Config, excludes ...string) { + t.Helper() + var restErr error + pollErr := wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + body, ok, err := getLivez(checkName, clientConfig, excludes...) + restErr = err + if err != nil { + return false, err + } + done := !ok && strings.Contains(body, wantBodyContains) + if !done { + t.Logf("expected server check %q with message %q but it is not: %s", checkName, wantBodyContains, body) + } + return done, nil + }) + + if pollErr != nil { + t.Fatalf("failed to get the expected livez status of !OK for check: %s, error: %v, debug inner error: %v", checkName, pollErr, restErr) + } +} + func getHealthz(checkName string, clientConfig *rest.Config, excludes ...string) (string, bool, error) { client, err := kubernetes.NewForConfig(clientConfig) if err != nil { @@ -602,3 +624,17 @@ func getHealthz(checkName string, clientConfig *rest.Config, excludes ...string) body, err := req.DoRaw(context.TODO()) // we can still have a response body during an error case return string(body), err == nil, nil } + +func getLivez(checkName string, clientConfig *rest.Config, excludes ...string) (string, bool, error) { + client, err := kubernetes.NewForConfig(clientConfig) + if err != nil { + return "", false, fmt.Errorf("failed to create a client: %v", err) + } + + req := client.CoreV1().RESTClient().Get().AbsPath(fmt.Sprintf("/livez%v", checkName)).Param("verbose", "true") + for _, exclude := range excludes { + req.Param("exclude", exclude) + } + body, err := req.DoRaw(context.TODO()) // we can still have a response body during an error case + return string(body), err == nil, nil +}