enable health check SLI metrics for apiserver

Change-Id: I1b43e6dfea35b8c3bfdf5daaa8b42adff2fbc786
This commit is contained in:
Han Kang 2022-09-26 16:10:58 -07:00
parent d39c9aeff0
commit db13f51db9
6 changed files with 11 additions and 1 deletions

View File

@ -210,7 +210,7 @@ func ClusterRoles() []rbacv1.ClusterRole {
ObjectMeta: metav1.ObjectMeta{Name: "system:monitoring"},
Rules: []rbacv1.PolicyRule{
rbacv1helpers.NewRule("get").URLs(
"/metrics",
"/metrics", "/metrics/slis",
"/livez", "/readyz", "/healthz",
"/livez/*", "/readyz/*", "/healthz/*",
).RuleOrDie(),

View File

@ -934,6 +934,7 @@ items:
- /livez
- /livez/*
- /metrics
- /metrics/slis
- /readyz
- /readyz/*
verbs:

View File

@ -67,6 +67,7 @@ import (
"k8s.io/client-go/informers"
restclient "k8s.io/client-go/rest"
"k8s.io/component-base/logs"
"k8s.io/component-base/metrics/prometheus/slis"
"k8s.io/klog/v2"
openapicommon "k8s.io/kube-openapi/pkg/common"
"k8s.io/kube-openapi/pkg/validation/spec"
@ -884,8 +885,10 @@ func installAPI(s *GenericAPIServer, c *Config) {
if c.EnableMetrics {
if c.EnableProfiling {
routes.MetricsWithReset{}.Install(s.Handler.NonGoRestfulMux)
slis.SLIMetricsWithReset{}.Install(s.Handler.NonGoRestfulMux)
} else {
routes.DefaultMetrics{}.Install(s.Handler.NonGoRestfulMux)
slis.SLIMetrics{}.Install(s.Handler.NonGoRestfulMux)
}
}

View File

@ -169,6 +169,7 @@ func TestNewWithDelegate(t *testing.T) {
"/livez/poststarthook/storage-object-count-tracker-hook",
"/livez/poststarthook/wrapping-post-start-hook",
"/metrics",
"/metrics/slis",
"/readyz",
"/readyz/delegate-health",
"/readyz/informer-sync",

View File

@ -18,6 +18,7 @@ package healthz
import (
"bytes"
"context"
"fmt"
"net/http"
"reflect"
@ -30,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/endpoints/metrics"
"k8s.io/apiserver/pkg/server/httplog"
"k8s.io/component-base/metrics/prometheus/slis"
"k8s.io/klog/v2"
)
@ -237,6 +239,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec
continue
}
if err := check.Check(r); err != nil {
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Error)
// don't include the error since this endpoint is public. If someone wants more detail
// they should have explicit permission to the detailed checks.
fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name())
@ -244,6 +247,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec
fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err)
failedChecks = append(failedChecks, check.Name())
} else {
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Success)
fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name())
}
}

1
vendor/modules.txt vendored
View File

@ -1978,6 +1978,7 @@ k8s.io/component-base/metrics/prometheus/controllers
k8s.io/component-base/metrics/prometheus/feature
k8s.io/component-base/metrics/prometheus/ratelimiter
k8s.io/component-base/metrics/prometheus/restclient
k8s.io/component-base/metrics/prometheus/slis
k8s.io/component-base/metrics/prometheus/version
k8s.io/component-base/metrics/prometheus/workqueue
k8s.io/component-base/metrics/prometheusextension