mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
Merge pull request #112741 from logicalhan/health-check-metrics
enable health check SLI metrics for apiserver
This commit is contained in:
commit
e11f23eb97
@ -210,7 +210,7 @@ func ClusterRoles() []rbacv1.ClusterRole {
|
|||||||
ObjectMeta: metav1.ObjectMeta{Name: "system:monitoring"},
|
ObjectMeta: metav1.ObjectMeta{Name: "system:monitoring"},
|
||||||
Rules: []rbacv1.PolicyRule{
|
Rules: []rbacv1.PolicyRule{
|
||||||
rbacv1helpers.NewRule("get").URLs(
|
rbacv1helpers.NewRule("get").URLs(
|
||||||
"/metrics",
|
"/metrics", "/metrics/slis",
|
||||||
"/livez", "/readyz", "/healthz",
|
"/livez", "/readyz", "/healthz",
|
||||||
"/livez/*", "/readyz/*", "/healthz/*",
|
"/livez/*", "/readyz/*", "/healthz/*",
|
||||||
).RuleOrDie(),
|
).RuleOrDie(),
|
||||||
|
@ -934,6 +934,7 @@ items:
|
|||||||
- /livez
|
- /livez
|
||||||
- /livez/*
|
- /livez/*
|
||||||
- /metrics
|
- /metrics
|
||||||
|
- /metrics/slis
|
||||||
- /readyz
|
- /readyz
|
||||||
- /readyz/*
|
- /readyz/*
|
||||||
verbs:
|
verbs:
|
||||||
|
@ -67,6 +67,7 @@ import (
|
|||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
restclient "k8s.io/client-go/rest"
|
restclient "k8s.io/client-go/rest"
|
||||||
"k8s.io/component-base/logs"
|
"k8s.io/component-base/logs"
|
||||||
|
"k8s.io/component-base/metrics/prometheus/slis"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
openapicommon "k8s.io/kube-openapi/pkg/common"
|
openapicommon "k8s.io/kube-openapi/pkg/common"
|
||||||
"k8s.io/kube-openapi/pkg/validation/spec"
|
"k8s.io/kube-openapi/pkg/validation/spec"
|
||||||
@ -884,8 +885,10 @@ func installAPI(s *GenericAPIServer, c *Config) {
|
|||||||
if c.EnableMetrics {
|
if c.EnableMetrics {
|
||||||
if c.EnableProfiling {
|
if c.EnableProfiling {
|
||||||
routes.MetricsWithReset{}.Install(s.Handler.NonGoRestfulMux)
|
routes.MetricsWithReset{}.Install(s.Handler.NonGoRestfulMux)
|
||||||
|
slis.SLIMetricsWithReset{}.Install(s.Handler.NonGoRestfulMux)
|
||||||
} else {
|
} else {
|
||||||
routes.DefaultMetrics{}.Install(s.Handler.NonGoRestfulMux)
|
routes.DefaultMetrics{}.Install(s.Handler.NonGoRestfulMux)
|
||||||
|
slis.SLIMetrics{}.Install(s.Handler.NonGoRestfulMux)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,6 +169,7 @@ func TestNewWithDelegate(t *testing.T) {
|
|||||||
"/livez/poststarthook/storage-object-count-tracker-hook",
|
"/livez/poststarthook/storage-object-count-tracker-hook",
|
||||||
"/livez/poststarthook/wrapping-post-start-hook",
|
"/livez/poststarthook/wrapping-post-start-hook",
|
||||||
"/metrics",
|
"/metrics",
|
||||||
|
"/metrics/slis",
|
||||||
"/readyz",
|
"/readyz",
|
||||||
"/readyz/delegate-health",
|
"/readyz/delegate-health",
|
||||||
"/readyz/informer-sync",
|
"/readyz/informer-sync",
|
||||||
|
@ -18,6 +18,7 @@ package healthz
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"reflect"
|
"reflect"
|
||||||
@ -30,6 +31,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/apiserver/pkg/endpoints/metrics"
|
"k8s.io/apiserver/pkg/endpoints/metrics"
|
||||||
"k8s.io/apiserver/pkg/server/httplog"
|
"k8s.io/apiserver/pkg/server/httplog"
|
||||||
|
"k8s.io/component-base/metrics/prometheus/slis"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -237,6 +239,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := check.Check(r); err != nil {
|
if err := check.Check(r); err != nil {
|
||||||
|
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Error)
|
||||||
// don't include the error since this endpoint is public. If someone wants more detail
|
// don't include the error since this endpoint is public. If someone wants more detail
|
||||||
// they should have explicit permission to the detailed checks.
|
// they should have explicit permission to the detailed checks.
|
||||||
fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name())
|
fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name())
|
||||||
@ -244,6 +247,7 @@ func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChec
|
|||||||
fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err)
|
fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err)
|
||||||
failedChecks = append(failedChecks, check.Name())
|
failedChecks = append(failedChecks, check.Name())
|
||||||
} else {
|
} else {
|
||||||
|
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Success)
|
||||||
fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name())
|
fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,6 @@ package slis
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
|
|
||||||
k8smetrics "k8s.io/component-base/metrics"
|
k8smetrics "k8s.io/component-base/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -28,17 +26,10 @@ type HealthcheckStatus string
|
|||||||
const (
|
const (
|
||||||
Success HealthcheckStatus = "success"
|
Success HealthcheckStatus = "success"
|
||||||
Error HealthcheckStatus = "error"
|
Error HealthcheckStatus = "error"
|
||||||
Pending HealthcheckStatus = "pending"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type HealthcheckType string
|
type HealthcheckType string
|
||||||
|
|
||||||
const (
|
|
||||||
Livez HealthcheckType = "livez"
|
|
||||||
Readyz HealthcheckType = "readyz"
|
|
||||||
Healthz HealthcheckType = "healthz"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck.
|
// healthcheck is a Prometheus Gauge metrics used for recording the results of a k8s healthcheck.
|
||||||
healthcheck = k8smetrics.NewGaugeVec(
|
healthcheck = k8smetrics.NewGaugeVec(
|
||||||
@ -48,7 +39,7 @@ var (
|
|||||||
Help: "This metric records the result of a single healthcheck.",
|
Help: "This metric records the result of a single healthcheck.",
|
||||||
StabilityLevel: k8smetrics.ALPHA,
|
StabilityLevel: k8smetrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{"name", "type", "status"},
|
[]string{"name", "type"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// healthchecksTotal is a Prometheus Counter metrics used for counting the results of a k8s healthcheck.
|
// healthchecksTotal is a Prometheus Counter metrics used for counting the results of a k8s healthcheck.
|
||||||
@ -61,8 +52,6 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"name", "type", "status"},
|
[]string{"name", "type", "status"},
|
||||||
)
|
)
|
||||||
statuses = []HealthcheckStatus{Success, Error, Pending}
|
|
||||||
statusSet = map[HealthcheckStatus]struct{}{Success: {}, Error: {}, Pending: {}}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func Register(registry k8smetrics.KubeRegistry) {
|
func Register(registry k8smetrics.KubeRegistry) {
|
||||||
@ -76,15 +65,12 @@ func ResetHealthMetrics() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ObserveHealthcheck(ctx context.Context, name string, healthcheckType string, status HealthcheckStatus) error {
|
func ObserveHealthcheck(ctx context.Context, name string, healthcheckType string, status HealthcheckStatus) error {
|
||||||
if _, ok := statusSet[status]; !ok {
|
if status == Success {
|
||||||
return errors.New("not a valid healthcheck status")
|
healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType).Set(1)
|
||||||
}
|
} else {
|
||||||
for _, s := range statuses {
|
healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType).Set(0)
|
||||||
if status != s {
|
|
||||||
healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType, string(s)).Set(0)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
healthchecksTotal.WithContext(ctx).WithLabelValues(name, healthcheckType, string(status)).Inc()
|
healthchecksTotal.WithContext(ctx).WithLabelValues(name, healthcheckType, string(status)).Inc()
|
||||||
healthcheck.WithContext(ctx).WithLabelValues(name, healthcheckType, string(status)).Set(1)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -39,9 +39,7 @@ func TestObserveHealthcheck(t *testing.T) {
|
|||||||
initialOutput := `
|
initialOutput := `
|
||||||
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck.
|
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck.
|
||||||
# TYPE kubernetes_healthcheck gauge
|
# TYPE kubernetes_healthcheck gauge
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 1
|
kubernetes_healthcheck{name="healthcheck-a",type="healthz"} 0
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0
|
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0
|
|
||||||
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck.
|
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck.
|
||||||
# TYPE kubernetes_healthchecks_total counter
|
# TYPE kubernetes_healthchecks_total counter
|
||||||
kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1
|
kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1
|
||||||
@ -53,23 +51,6 @@ func TestObserveHealthcheck(t *testing.T) {
|
|||||||
hcStatus HealthcheckStatus
|
hcStatus HealthcheckStatus
|
||||||
want string
|
want string
|
||||||
}{
|
}{
|
||||||
{
|
|
||||||
desc: "test pending",
|
|
||||||
name: healthcheckName,
|
|
||||||
hcType: "healthz",
|
|
||||||
hcStatus: Pending,
|
|
||||||
want: `
|
|
||||||
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck.
|
|
||||||
# TYPE kubernetes_healthcheck gauge
|
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0
|
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 1
|
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 0
|
|
||||||
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck.
|
|
||||||
# TYPE kubernetes_healthchecks_total counter
|
|
||||||
kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1
|
|
||||||
kubernetes_healthchecks_total{name="healthcheck-a",status="pending",type="healthz"} 1
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
desc: "test success",
|
desc: "test success",
|
||||||
name: healthcheckName,
|
name: healthcheckName,
|
||||||
@ -78,9 +59,7 @@ func TestObserveHealthcheck(t *testing.T) {
|
|||||||
want: `
|
want: `
|
||||||
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck.
|
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck.
|
||||||
# TYPE kubernetes_healthcheck gauge
|
# TYPE kubernetes_healthcheck gauge
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="error",type="healthz"} 0
|
kubernetes_healthcheck{name="healthcheck-a",type="healthz"} 1
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="pending",type="healthz"} 0
|
|
||||||
kubernetes_healthcheck{name="healthcheck-a",status="success",type="healthz"} 1
|
|
||||||
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck.
|
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck.
|
||||||
# TYPE kubernetes_healthchecks_total counter
|
# TYPE kubernetes_healthchecks_total counter
|
||||||
kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1
|
kubernetes_healthchecks_total{name="healthcheck-a",status="error",type="healthz"} 1
|
||||||
|
1
vendor/modules.txt
vendored
1
vendor/modules.txt
vendored
@ -1978,6 +1978,7 @@ k8s.io/component-base/metrics/prometheus/controllers
|
|||||||
k8s.io/component-base/metrics/prometheus/feature
|
k8s.io/component-base/metrics/prometheus/feature
|
||||||
k8s.io/component-base/metrics/prometheus/ratelimiter
|
k8s.io/component-base/metrics/prometheus/ratelimiter
|
||||||
k8s.io/component-base/metrics/prometheus/restclient
|
k8s.io/component-base/metrics/prometheus/restclient
|
||||||
|
k8s.io/component-base/metrics/prometheus/slis
|
||||||
k8s.io/component-base/metrics/prometheus/version
|
k8s.io/component-base/metrics/prometheus/version
|
||||||
k8s.io/component-base/metrics/prometheus/workqueue
|
k8s.io/component-base/metrics/prometheus/workqueue
|
||||||
k8s.io/component-base/metrics/prometheusextension
|
k8s.io/component-base/metrics/prometheusextension
|
||||||
|
Loading…
Reference in New Issue
Block a user