aggregate kube-proxy metrics

Instead of using two metrics use just one metrics with multiple labels,
since the labels can only get 2 values, 200 or 503 there is no risk of
carindality explosion and are simple to represent in graphs.

Change-Id: I0e9cbd6ec2051de44d277d673dc20f02b96aa4d1
This commit is contained in:
Antonio Ojea 2023-07-16 11:47:19 +00:00
parent 900237fada
commit 19f61caabe
3 changed files with 25 additions and 46 deletions

View File

@ -26,7 +26,7 @@ import (
"time" "time"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/component-base/metrics/testutil" "k8s.io/component-base/metrics/testutil"
@ -604,22 +604,23 @@ func testHTTPHandler(hsTest *serverTest, status int, t *testing.T) {
hsTest.tracking503++ hsTest.tracking503++
} }
if hsTest.url == healthzURL { if hsTest.url == healthzURL {
testMetricEquals(metrics.ProxyHealthz200Total, float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("200"), float64(hsTest.tracking200), t)
testMetricEquals(metrics.ProxyHealthz503Total, float64(hsTest.tracking503), t) testMetricEquals(metrics.ProxyHealthzTotal.WithLabelValues("503"), float64(hsTest.tracking503), t)
} }
if hsTest.url == livezURL { if hsTest.url == livezURL {
testMetricEquals(metrics.ProxyLivez200Total, float64(hsTest.tracking200), t) testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("200"), float64(hsTest.tracking200), t)
testMetricEquals(metrics.ProxyLivez503Total, float64(hsTest.tracking503), t) testMetricEquals(metrics.ProxyLivezTotal.WithLabelValues("503"), float64(hsTest.tracking503), t)
} }
} }
func testMetricEquals(metric *basemetrics.Counter, expected float64, t *testing.T) { func testMetricEquals(metric basemetrics.CounterMetric, expected float64, t *testing.T) {
t.Helper()
val, err := testutil.GetCounterMetricValue(metric) val, err := testutil.GetCounterMetricValue(metric)
if err != nil { if err != nil {
t.Errorf("unable to retrieve value for metric: %s, err: %v", metric.Name, err) t.Errorf("unable to retrieve value for metric, err: %v", err)
} }
if val != expected { if val != expected {
t.Errorf("unexpected metric: %s, expected: %v, found: %v", metric.Name, expected, val) t.Errorf("expected: %v, found: %v", expected, val)
} }
} }

View File

@ -193,10 +193,10 @@ func (h healthzHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
resp.Header().Set("Content-Type", "application/json") resp.Header().Set("Content-Type", "application/json")
resp.Header().Set("X-Content-Type-Options", "nosniff") resp.Header().Set("X-Content-Type-Options", "nosniff")
if !healthy { if !healthy {
metrics.ProxyHealthz503Total.Inc() metrics.ProxyHealthzTotal.WithLabelValues("503").Inc()
resp.WriteHeader(http.StatusServiceUnavailable) resp.WriteHeader(http.StatusServiceUnavailable)
} else { } else {
metrics.ProxyHealthz200Total.Inc() metrics.ProxyHealthzTotal.WithLabelValues("200").Inc()
resp.WriteHeader(http.StatusOK) resp.WriteHeader(http.StatusOK)
// In older releases, the returned "lastUpdated" time indicated the last // In older releases, the returned "lastUpdated" time indicated the last
// time the proxier sync loop ran, even if nothing had changed. To // time the proxier sync loop ran, even if nothing had changed. To
@ -217,10 +217,10 @@ func (h livezHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
resp.Header().Set("Content-Type", "application/json") resp.Header().Set("Content-Type", "application/json")
resp.Header().Set("X-Content-Type-Options", "nosniff") resp.Header().Set("X-Content-Type-Options", "nosniff")
if !healthy { if !healthy {
metrics.ProxyLivez503Total.Inc() metrics.ProxyLivezTotal.WithLabelValues("503").Inc()
resp.WriteHeader(http.StatusServiceUnavailable) resp.WriteHeader(http.StatusServiceUnavailable)
} else { } else {
metrics.ProxyLivez200Total.Inc() metrics.ProxyLivezTotal.WithLabelValues("200").Inc()
resp.WriteHeader(http.StatusOK) resp.WriteHeader(http.StatusOK)
// In older releases, the returned "lastUpdated" time indicated the last // In older releases, the returned "lastUpdated" time indicated the last
// time the proxier sync loop ran, even if nothing had changed. To // time the proxier sync loop ran, even if nothing had changed. To

View File

@ -184,48 +184,28 @@ var (
[]string{"table"}, []string{"table"},
) )
// ProxyHealthz200Total is the number of returned HTTP Status 200 for each // ProxyHealthzTotal is the number of returned HTTP Status for each
// healthz probe. // healthz probe.
ProxyHealthz200Total = metrics.NewCounter( ProxyHealthzTotal = metrics.NewCounterVec(
&metrics.CounterOpts{ &metrics.CounterOpts{
Subsystem: kubeProxySubsystem, Subsystem: kubeProxySubsystem,
Name: "proxy_healthz_200_total", Name: "proxy_healthz_total",
Help: "Cumulative proxy healthz HTTP status 200", Help: "Cumulative proxy healthz HTTP status",
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
[]string{"code"},
) )
// ProxyHealthz503Total is the number of returned HTTP Status 503 for each // ProxyLivezTotal is the number of returned HTTP Status for each
// healthz probe.
ProxyHealthz503Total = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "proxy_healthz_503_total",
Help: "Cumulative proxy healthz HTTP status 503",
StabilityLevel: metrics.ALPHA,
},
)
// ProxyLivez200Total is the number of returned HTTP Status 200 for each
// livez probe. // livez probe.
ProxyLivez200Total = metrics.NewCounter( ProxyLivezTotal = metrics.NewCounterVec(
&metrics.CounterOpts{ &metrics.CounterOpts{
Subsystem: kubeProxySubsystem, Subsystem: kubeProxySubsystem,
Name: "proxy_livez_200_total", Name: "proxy_livez_total",
Help: "Cumulative proxy livez HTTP status 200", Help: "Cumulative proxy livez HTTP status",
StabilityLevel: metrics.ALPHA,
},
)
// ProxyLivez503Total is the number of returned HTTP Status 503 for each
// livez probe.
ProxyLivez503Total = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "proxy_livez_503_total",
Help: "Cumulative proxy livez HTTP status 503",
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
[]string{"code"},
) )
// SyncProxyRulesLastQueuedTimestamp is the last time a proxy sync was // SyncProxyRulesLastQueuedTimestamp is the last time a proxy sync was
@ -274,10 +254,8 @@ func RegisterMetrics() {
legacyregistry.MustRegister(IptablesPartialRestoreFailuresTotal) legacyregistry.MustRegister(IptablesPartialRestoreFailuresTotal)
legacyregistry.MustRegister(SyncProxyRulesLastQueuedTimestamp) legacyregistry.MustRegister(SyncProxyRulesLastQueuedTimestamp)
legacyregistry.MustRegister(SyncProxyRulesNoLocalEndpointsTotal) legacyregistry.MustRegister(SyncProxyRulesNoLocalEndpointsTotal)
legacyregistry.MustRegister(ProxyHealthz200Total) legacyregistry.MustRegister(ProxyHealthzTotal)
legacyregistry.MustRegister(ProxyHealthz503Total) legacyregistry.MustRegister(ProxyLivezTotal)
legacyregistry.MustRegister(ProxyLivez200Total)
legacyregistry.MustRegister(ProxyLivez503Total)
}) })
} }