From 895c80be2a9e61b4ee9905229364914e30759b92 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Tue, 9 Aug 2022 14:23:42 -0700 Subject: [PATCH] add metrics for health checks (for later use in apiserver) Change-Id: I0dff11cc298c4960ae6620004a071ef6a62ddc9a --- .../metrics/prometheus/health/metrics.go | 80 +++++++++++++++ .../metrics/prometheus/health/metrics_test.go | 99 +++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go create mode 100644 staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go new file mode 100644 index 00000000000..1c635dc9f98 --- /dev/null +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go @@ -0,0 +1,80 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package health + +import ( + "context" + "errors" + + k8smetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +type HealthcheckStatus string + +const ( + SUCCESS HealthcheckStatus = "success" + ERROR HealthcheckStatus = "error" + PENDING HealthcheckStatus = "pending" +) + +type HealthcheckType string + +const ( + LIVEZ HealthcheckType = "livez" + READYZ HealthcheckType = "readyz" + HEALTHZ HealthcheckType = "healthz" +) + +var ( + // healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck. + healthcheck = k8smetrics.NewGaugeVec( + &k8smetrics.GaugeOpts{ + Name: "k8s_healthcheck", + Help: "This metric records the result of a single health check.", + StabilityLevel: k8smetrics.ALPHA, + }, + []string{"name", "type", "status"}, + ) + statuses = []HealthcheckStatus{SUCCESS, ERROR, PENDING} + statusSet = map[HealthcheckStatus]struct{}{SUCCESS: {}, ERROR: {}, PENDING: {}} + checkSet = map[HealthcheckType]struct{}{LIVEZ: {}, READYZ: {}, HEALTHZ: {}} +) + +func init() { + legacyregistry.MustRegister(healthcheck) +} + +func ResetHealthMetrics() { + healthcheck.Reset() +} + +func ObserveHealthcheck(ctx context.Context, name string, healthcheckType HealthcheckType, status HealthcheckStatus) error { + if _, ok := statusSet[status]; !ok { + return errors.New("not a valid healthcheck status") + } + if _, ok := checkSet[healthcheckType]; !ok { + return errors.New("not a valid healthcheck type") + } + for _, s := range statuses { + if status != s { + healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(s)).Set(0) + } + } + healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(status)).Set(1) + return nil +} diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go new file mode 100644 index 00000000000..11fa87e061a --- /dev/null +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go @@ -0,0 +1,99 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package health + +import ( + "context" + "strings" + "testing" + + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +var ( + testedMetrics = []string{"k8s_healthcheck"} +) + +func TestObserveHealthcheck(t *testing.T) { + defer legacyregistry.Reset() + defer ResetHealthMetrics() + initialState := ERROR + healthcheckName := "healthcheck-a" + initialOutput := ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 +` + testCases := []struct { + desc string + name string + hcType HealthcheckType + hcStatus HealthcheckStatus + want string + }{ + { + desc: "test pending", + name: healthcheckName, + hcType: HEALTHZ, + hcStatus: PENDING, + want: ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 +`, + }, + { + desc: "test success", + name: healthcheckName, + hcType: HEALTHZ, + hcStatus: SUCCESS, + want: ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 1 +`, + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + // let's first record an error as initial state + err := ObserveHealthcheck(context.Background(), test.name, test.hcType, initialState) + if err != nil { + t.Errorf("unexpected err: %v", err) + } + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(initialOutput), testedMetrics...); err != nil { + t.Fatal(err) + } + // now record that we successfully purge state + err = ObserveHealthcheck(context.Background(), test.name, test.hcType, test.hcStatus) + if err != nil { + t.Errorf("unexpected err: %v", err) + } + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.want), testedMetrics...); err != nil { + t.Fatal(err) + } + }) + } +}