From 895c80be2a9e61b4ee9905229364914e30759b92 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Tue, 9 Aug 2022 14:23:42 -0700 Subject: [PATCH 1/3] add metrics for health checks (for later use in apiserver) Change-Id: I0dff11cc298c4960ae6620004a071ef6a62ddc9a --- .../metrics/prometheus/health/metrics.go | 80 +++++++++++++++ .../metrics/prometheus/health/metrics_test.go | 99 +++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go create mode 100644 staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go new file mode 100644 index 00000000000..1c635dc9f98 --- /dev/null +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go @@ -0,0 +1,80 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package health + +import ( + "context" + "errors" + + k8smetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +type HealthcheckStatus string + +const ( + SUCCESS HealthcheckStatus = "success" + ERROR HealthcheckStatus = "error" + PENDING HealthcheckStatus = "pending" +) + +type HealthcheckType string + +const ( + LIVEZ HealthcheckType = "livez" + READYZ HealthcheckType = "readyz" + HEALTHZ HealthcheckType = "healthz" +) + +var ( + // healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck. + healthcheck = k8smetrics.NewGaugeVec( + &k8smetrics.GaugeOpts{ + Name: "k8s_healthcheck", + Help: "This metric records the result of a single health check.", + StabilityLevel: k8smetrics.ALPHA, + }, + []string{"name", "type", "status"}, + ) + statuses = []HealthcheckStatus{SUCCESS, ERROR, PENDING} + statusSet = map[HealthcheckStatus]struct{}{SUCCESS: {}, ERROR: {}, PENDING: {}} + checkSet = map[HealthcheckType]struct{}{LIVEZ: {}, READYZ: {}, HEALTHZ: {}} +) + +func init() { + legacyregistry.MustRegister(healthcheck) +} + +func ResetHealthMetrics() { + healthcheck.Reset() +} + +func ObserveHealthcheck(ctx context.Context, name string, healthcheckType HealthcheckType, status HealthcheckStatus) error { + if _, ok := statusSet[status]; !ok { + return errors.New("not a valid healthcheck status") + } + if _, ok := checkSet[healthcheckType]; !ok { + return errors.New("not a valid healthcheck type") + } + for _, s := range statuses { + if status != s { + healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(s)).Set(0) + } + } + healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(status)).Set(1) + return nil +} diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go new file mode 100644 index 00000000000..11fa87e061a --- /dev/null +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go @@ -0,0 +1,99 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package health + +import ( + "context" + "strings" + "testing" + + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +var ( + testedMetrics = []string{"k8s_healthcheck"} +) + +func TestObserveHealthcheck(t *testing.T) { + defer legacyregistry.Reset() + defer ResetHealthMetrics() + initialState := ERROR + healthcheckName := "healthcheck-a" + initialOutput := ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 +` + testCases := []struct { + desc string + name string + hcType HealthcheckType + hcStatus HealthcheckStatus + want string + }{ + { + desc: "test pending", + name: healthcheckName, + hcType: HEALTHZ, + hcStatus: PENDING, + want: ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 +`, + }, + { + desc: "test success", + name: healthcheckName, + hcType: HEALTHZ, + hcStatus: SUCCESS, + want: ` + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # TYPE k8s_healthcheck gauge + k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 + k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 1 +`, + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + // let's first record an error as initial state + err := ObserveHealthcheck(context.Background(), test.name, test.hcType, initialState) + if err != nil { + t.Errorf("unexpected err: %v", err) + } + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(initialOutput), testedMetrics...); err != nil { + t.Fatal(err) + } + // now record that we successfully purge state + err = ObserveHealthcheck(context.Background(), test.name, test.hcType, test.hcStatus) + if err != nil { + t.Errorf("unexpected err: %v", err) + } + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.want), testedMetrics...); err != nil { + t.Fatal(err) + } + }) + } +} From 822c52c220e057d48a20e6c34bba5781e957bd7a Mon Sep 17 00:00:00 2001 From: Han Kang Date: Fri, 12 Aug 2022 17:34:22 -0700 Subject: [PATCH 2/3] address comments Change-Id: I9c9854b8bb3221e4791c70f566361bd0421061c1 --- .../metrics/prometheus/health/metrics.go | 23 ++++++++++--------- .../metrics/prometheus/health/metrics_test.go | 16 ++++++------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go index 1c635dc9f98..e27166fb191 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go @@ -27,32 +27,33 @@ import ( type HealthcheckStatus string const ( - SUCCESS HealthcheckStatus = "success" - ERROR HealthcheckStatus = "error" - PENDING HealthcheckStatus = "pending" + Success HealthcheckStatus = "success" + Error HealthcheckStatus = "error" + Pending HealthcheckStatus = "pending" ) type HealthcheckType string const ( - LIVEZ HealthcheckType = "livez" - READYZ HealthcheckType = "readyz" - HEALTHZ HealthcheckType = "healthz" + Livez HealthcheckType = "livez" + Readyz HealthcheckType = "readyz" + Healthz HealthcheckType = "healthz" ) var ( // healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck. healthcheck = k8smetrics.NewGaugeVec( &k8smetrics.GaugeOpts{ - Name: "k8s_healthcheck", - Help: "This metric records the result of a single health check.", + Namespace: "k8s", + Name: "healthcheck", + Help: "This metric records the result of a single healthcheck.", StabilityLevel: k8smetrics.ALPHA, }, []string{"name", "type", "status"}, ) - statuses = []HealthcheckStatus{SUCCESS, ERROR, PENDING} - statusSet = map[HealthcheckStatus]struct{}{SUCCESS: {}, ERROR: {}, PENDING: {}} - checkSet = map[HealthcheckType]struct{}{LIVEZ: {}, READYZ: {}, HEALTHZ: {}} + statuses = []HealthcheckStatus{Success, Error, Pending} + statusSet = map[HealthcheckStatus]struct{}{Success: {}, Error: {}, Pending: {}} + checkSet = map[HealthcheckType]struct{}{Livez: {}, Readyz: {}, Healthz: {}} ) func init() { diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go index 11fa87e061a..84715b28496 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go @@ -32,10 +32,10 @@ var ( func TestObserveHealthcheck(t *testing.T) { defer legacyregistry.Reset() defer ResetHealthMetrics() - initialState := ERROR + initialState := Error healthcheckName := "healthcheck-a" initialOutput := ` - # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single healthcheck. # TYPE k8s_healthcheck gauge k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 @@ -51,10 +51,10 @@ func TestObserveHealthcheck(t *testing.T) { { desc: "test pending", name: healthcheckName, - hcType: HEALTHZ, - hcStatus: PENDING, + hcType: Healthz, + hcStatus: Pending, want: ` - # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single healthcheck. # TYPE k8s_healthcheck gauge k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1 @@ -64,10 +64,10 @@ func TestObserveHealthcheck(t *testing.T) { { desc: "test success", name: healthcheckName, - hcType: HEALTHZ, - hcStatus: SUCCESS, + hcType: Healthz, + hcStatus: Success, want: ` - # HELP k8s_healthcheck [ALPHA] This metric records the result of a single health check. + # HELP k8s_healthcheck [ALPHA] This metric records the result of a single healthcheck. # TYPE k8s_healthcheck gauge k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 From 6c451da1273689dd348ddd8c4b4a491944c31c67 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Tue, 16 Aug 2022 09:19:26 -0700 Subject: [PATCH 3/3] add counter metric for dashpole Change-Id: I4a235c0d8b936da960cf5ce25cdd992e94130391 --- .../metrics/prometheus/health/metrics.go | 14 ++++++++++++++ .../metrics/prometheus/health/metrics_test.go | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go index e27166fb191..4be16d7cb0b 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics.go @@ -51,6 +51,17 @@ var ( }, []string{"name", "type", "status"}, ) + + // healthchecksTotal is a Prometheus Counter metrics used for counting the results of a k8s healthcheck. + healthchecksTotal = k8smetrics.NewCounterVec( + &k8smetrics.CounterOpts{ + Namespace: "k8s", + Name: "healthchecks_total", + Help: "This metric records the results of all healthcheck.", + StabilityLevel: k8smetrics.ALPHA, + }, + []string{"name", "type", "status"}, + ) statuses = []HealthcheckStatus{Success, Error, Pending} statusSet = map[HealthcheckStatus]struct{}{Success: {}, Error: {}, Pending: {}} checkSet = map[HealthcheckType]struct{}{Livez: {}, Readyz: {}, Healthz: {}} @@ -58,10 +69,12 @@ var ( func init() { legacyregistry.MustRegister(healthcheck) + legacyregistry.MustRegister(healthchecksTotal) } func ResetHealthMetrics() { healthcheck.Reset() + healthchecksTotal.Reset() } func ObserveHealthcheck(ctx context.Context, name string, healthcheckType HealthcheckType, status HealthcheckStatus) error { @@ -76,6 +89,7 @@ func ObserveHealthcheck(ctx context.Context, name string, healthcheckType Health healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(s)).Set(0) } } + healthchecksTotal.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(status)).Inc() healthcheck.WithContext(ctx).WithLabelValues(name, string(healthcheckType), string(status)).Set(1) return nil } diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go index 84715b28496..9ba52a3099a 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/health/metrics_test.go @@ -26,7 +26,7 @@ import ( ) var ( - testedMetrics = []string{"k8s_healthcheck"} + testedMetrics = []string{"k8s_healthcheck", "k8s_healthchecks_total"} ) func TestObserveHealthcheck(t *testing.T) { @@ -40,6 +40,9 @@ func TestObserveHealthcheck(t *testing.T) { k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 + # HELP k8s_healthchecks_total [ALPHA] This metric records the results of all healthcheck. + # TYPE k8s_healthchecks_total counter + k8s_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 ` testCases := []struct { desc string @@ -59,6 +62,10 @@ func TestObserveHealthcheck(t *testing.T) { k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1 k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 + # HELP k8s_healthchecks_total [ALPHA] This metric records the results of all healthcheck. + # TYPE k8s_healthchecks_total counter + k8s_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 + k8s_healthchecks_total{name="healthcheck-a",status="pending",type="healthz"} 1 `, }, { @@ -72,12 +79,17 @@ func TestObserveHealthcheck(t *testing.T) { k8s_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 k8s_healthcheck{name="healthcheck-a",status="success",type="healthz"} 1 + # HELP k8s_healthchecks_total [ALPHA] This metric records the results of all healthcheck. + # TYPE k8s_healthchecks_total counter + k8s_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 + k8s_healthchecks_total{name="healthcheck-a",status="success",type="healthz"} 1 `, }, } for _, test := range testCases { t.Run(test.desc, func(t *testing.T) { + defer ResetHealthMetrics() // let's first record an error as initial state err := ObserveHealthcheck(context.Background(), test.name, test.hcType, initialState) if err != nil {