diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go index 15d48846746..85ef7b414f0 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go @@ -210,7 +210,7 @@ func ClusterRoles() []rbacv1.ClusterRole { ObjectMeta: metav1.ObjectMeta{Name: "system:monitoring"}, Rules: []rbacv1.PolicyRule{ rbacv1helpers.NewRule("get").URLs( - "/metrics", + "/metrics", "/metrics/slis", "/livez", "/readyz", "/healthz", "/livez/*", "/readyz/*", "/healthz/*", ).RuleOrDie(), diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml index c10b1f26f36..e8d002b873b 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml @@ -934,6 +934,7 @@ items: - /livez - /livez/* - /metrics + - /metrics/slis - /readyz - /readyz/* verbs: diff --git a/staging/src/k8s.io/apiserver/pkg/server/config.go b/staging/src/k8s.io/apiserver/pkg/server/config.go index d21ea2ef000..0b716d4e337 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/config.go +++ b/staging/src/k8s.io/apiserver/pkg/server/config.go @@ -67,6 +67,7 @@ import ( "k8s.io/client-go/informers" restclient "k8s.io/client-go/rest" "k8s.io/component-base/logs" + "k8s.io/component-base/metrics/prometheus/slis" "k8s.io/klog/v2" openapicommon "k8s.io/kube-openapi/pkg/common" "k8s.io/kube-openapi/pkg/validation/spec" @@ -884,8 +885,10 @@ func installAPI(s *GenericAPIServer, c *Config) { if c.EnableMetrics { if c.EnableProfiling { routes.MetricsWithReset{}.Install(s.Handler.NonGoRestfulMux) + slis.SLIMetricsWithReset{}.Install(s.Handler.NonGoRestfulMux) } else { routes.DefaultMetrics{}.Install(s.Handler.NonGoRestfulMux) + slis.SLIMetrics{}.Install(s.Handler.NonGoRestfulMux) } } diff --git a/staging/src/k8s.io/apiserver/pkg/server/config_test.go b/staging/src/k8s.io/apiserver/pkg/server/config_test.go index 4f01f74284a..6556117c008 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/config_test.go +++ b/staging/src/k8s.io/apiserver/pkg/server/config_test.go @@ -169,6 +169,7 @@ func TestNewWithDelegate(t *testing.T) { "/livez/poststarthook/storage-object-count-tracker-hook", "/livez/poststarthook/wrapping-post-start-hook", "/metrics", + "/metrics/slis", "/readyz", "/readyz/delegate-health", "/readyz/informer-sync", diff --git a/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go b/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go index 3a7280f6798..425b7a742e7 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go +++ b/staging/src/k8s.io/apiserver/pkg/server/healthz/healthz.go @@ -18,6 +18,7 @@ package healthz import ( "bytes" + "context" "fmt" "net/http" "reflect" @@ -30,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/endpoints/metrics" "k8s.io/apiserver/pkg/server/httplog" + "k8s.io/component-base/metrics/prometheus/slis" "k8s.io/klog/v2" ) @@ -237,6 +239,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec continue } if err := check.Check(r); err != nil { + slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Error) // don't include the error since this endpoint is public. If someone wants more detail // they should have explicit permission to the detailed checks. fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name()) @@ -244,6 +247,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err) failedChecks = append(failedChecks, check.Name()) } else { + slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Success) fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name()) } } diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics.go index f7357aa955e..7fb4a8e064e 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics.go @@ -18,8 +18,6 @@ package slis import ( "context" - "errors" - k8smetrics "k8s.io/component-base/metrics" ) @@ -28,17 +26,10 @@ type HealthcheckStatus string const ( Success HealthcheckStatus = "success" Error HealthcheckStatus = "error" - Pending HealthcheckStatus = "pending" ) type HealthcheckType string -const ( - Livez HealthcheckType = "livez" - Readyz HealthcheckType = "readyz" - Healthz HealthcheckType = "healthz" -) - var ( // healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck. healthcheck = k8smetrics.NewGaugeVec( @@ -48,7 +39,7 @@ var ( Help: "This metric records the result of a single healthcheck.", StabilityLevel: k8smetrics.ALPHA, }, - []string{"name", "type", "status"}, + []string{"name", "type"}, ) // healthchecksTotal is a Prometheus Counter metrics used for counting the results of a k8s healthcheck. @@ -61,8 +52,6 @@ var ( }, []string{"name", "type", "status"}, ) - statuses = []HealthcheckStatus{Success, Error, Pending} - statusSet = map[HealthcheckStatus]struct{}{Success: {}, Error: {}, Pending: {}} ) func Register(registry k8smetrics.KubeRegistry) { @@ -76,15 +65,12 @@ func ResetHealthMetrics() { } func ObserveHealthcheck(ctx context.Context, name string, healthcheckType string, status HealthcheckStatus) error { - if _, ok := statusSet[status]; !ok { - return errors.New("not a valid healthcheck status") - } - for _, s := range statuses { - if status != s { - healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType, string(s)).Set(0) - } + if status == Success { + healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType).Set(1) + } else { + healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType).Set(0) } + healthchecksTotal.WithContext(ctx).WithLabelValues(name, healthcheckType, string(status)).Inc() - healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType, string(status)).Set(1) return nil } diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics_test.go index 40d5122e04b..b4f8df34046 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics_test.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/slis/metrics_test.go @@ -39,9 +39,7 @@ func TestObserveHealthcheck(t *testing.T) { initialOutput := ` # HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck. # TYPE kubernetes_healthcheck gauge - kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1 - kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 - kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 + kubernetes_healthcheck{name="healthcheck-a",type="healthz"} 0 # HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck. # TYPE kubernetes_healthchecks_total counter kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 @@ -53,23 +51,6 @@ func TestObserveHealthcheck(t *testing.T) { hcStatus HealthcheckStatus want string }{ - { - desc: "test pending", - name: healthcheckName, - hcType: "healthz", - hcStatus: Pending, - want: ` - # HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck. - # TYPE kubernetes_healthcheck gauge - kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 - kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1 - kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0 - # HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck. - # TYPE kubernetes_healthchecks_total counter - kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 - kubernetes_healthchecks_total{name="healthcheck-a",status="pending",type="healthz"} 1 -`, - }, { desc: "test success", name: healthcheckName, @@ -78,9 +59,7 @@ func TestObserveHealthcheck(t *testing.T) { want: ` # HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck. # TYPE kubernetes_healthcheck gauge - kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0 - kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0 - kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 1 + kubernetes_healthcheck{name="healthcheck-a",type="healthz"} 1 # HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck. # TYPE kubernetes_healthchecks_total counter kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1 diff --git a/vendor/modules.txt b/vendor/modules.txt index c15360e4542..a0503028017 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1978,6 +1978,7 @@ k8s.io/component-base/metrics/prometheus/controllers k8s.io/component-base/metrics/prometheus/feature k8s.io/component-base/metrics/prometheus/ratelimiter k8s.io/component-base/metrics/prometheus/restclient +k8s.io/component-base/metrics/prometheus/slis k8s.io/component-base/metrics/prometheus/version k8s.io/component-base/metrics/prometheus/workqueue k8s.io/component-base/metrics/prometheusextension