From fce6eb09033967cd8024168f3140ec6b9c6b10b7 Mon Sep 17 00:00:00 2001 From: "Dr. Stefan Schimanski" Date: Thu, 22 Nov 2018 15:16:44 +0100 Subject: [PATCH] aggregator: add metrics for request errors --- .../kube-aggregator/pkg/apiserver/BUILD | 1 + .../pkg/apiserver/handler_proxy.go | 31 ++++++++++--- .../pkg/controllers/status/BUILD | 7 ++- .../status/available_controller.go | 31 ++++++++++--- .../pkg/controllers/status/metrics.go | 43 +++++++++++++++++++ 5 files changed, 99 insertions(+), 14 deletions(-) create mode 100644 staging/src/k8s.io/kube-aggregator/pkg/controllers/status/metrics.go diff --git a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/BUILD b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/BUILD index 69af8089830..aa5763bd528 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/BUILD +++ b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/BUILD @@ -55,6 +55,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apiserver/pkg/endpoints/handlers/responsewriters:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/endpoints/metrics:go_default_library", "//staging/src/k8s.io/apiserver/pkg/endpoints/request:go_default_library", "//staging/src/k8s.io/apiserver/pkg/features:go_default_library", "//staging/src/k8s.io/apiserver/pkg/server:go_default_library", diff --git a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go index 3a272e2c503..e6e30bbd44b 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go +++ b/staging/src/k8s.io/kube-aggregator/pkg/apiserver/handler_proxy.go @@ -30,6 +30,7 @@ import ( utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/util/proxy" "k8s.io/apiserver/pkg/endpoints/handlers/responsewriters" + endpointmetrics "k8s.io/apiserver/pkg/endpoints/metrics" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" genericfeatures "k8s.io/apiserver/pkg/features" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -38,7 +39,7 @@ import ( apiregistrationapi "k8s.io/kube-aggregator/pkg/apis/apiregistration" ) -const AggregatorComponent metrics.Component = "aggregator" +const aggregatorComponent string = "aggregator" // proxyHandler provides a http.Handler which will proxy traffic to locations // specified by items implementing Redirector. @@ -62,6 +63,8 @@ type proxyHandlingInfo struct { // local indicates that this APIService is locally satisfied local bool + // name is the name of the APIService + name string // restConfig holds the information for building a roundtripper restConfig *restclient.Config // transportBuildingError is an error produced while building the transport. If this @@ -77,6 +80,19 @@ type proxyHandlingInfo struct { serviceAvailable bool } +func proxyError(w http.ResponseWriter, req *http.Request, error string, code int) { + http.Error(w, error, code) + + ctx := req.Context() + info, ok := genericapirequest.RequestInfoFrom(ctx) + if !ok { + klog.Warning("no RequestInfo found in the context") + return + } + // TODO: record long-running request differently? The long-running check func does not necessarily match the one of the aggregated apiserver + endpointmetrics.Record(req, info, aggregatorComponent, "", code, 0, 0) +} + func (r *proxyHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { value := r.handlingInfo.Load() if value == nil { @@ -94,18 +110,18 @@ func (r *proxyHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { } if !handlingInfo.serviceAvailable { - http.Error(w, "service unavailable", http.StatusServiceUnavailable) + proxyError(w, req, "service unavailable", http.StatusServiceUnavailable) return } if handlingInfo.transportBuildingError != nil { - http.Error(w, handlingInfo.transportBuildingError.Error(), http.StatusInternalServerError) + proxyError(w, req, handlingInfo.transportBuildingError.Error(), http.StatusInternalServerError) return } user, ok := genericapirequest.UserFrom(req.Context()) if !ok { - http.Error(w, "missing user", http.StatusInternalServerError) + proxyError(w, req, "missing user", http.StatusInternalServerError) return } @@ -115,7 +131,7 @@ func (r *proxyHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { rloc, err := r.serviceResolver.ResolveEndpoint(handlingInfo.serviceNamespace, handlingInfo.serviceName) if err != nil { klog.Errorf("error resolving %s/%s: %v", handlingInfo.serviceNamespace, handlingInfo.serviceName, err) - http.Error(w, "service unavailable", http.StatusServiceUnavailable) + proxyError(w, req, "service unavailable", http.StatusServiceUnavailable) return } location.Host = rloc.Host @@ -128,14 +144,14 @@ func (r *proxyHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { newReq.URL = location if handlingInfo.proxyRoundTripper == nil { - http.Error(w, "", http.StatusNotFound) + proxyError(w, req, "", http.StatusNotFound) return } // we need to wrap the roundtripper in another roundtripper which will apply the front proxy headers proxyRoundTripper, upgrade, err := maybeWrapForConnectionUpgrades(handlingInfo.restConfig, handlingInfo.proxyRoundTripper, req) if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + proxyError(w, req, err.Error(), http.StatusInternalServerError) return } proxyRoundTripper = transport.NewAuthProxyRoundTripper(user.GetName(), user.GetGroups(), user.GetExtra(), proxyRoundTripper) @@ -197,6 +213,7 @@ func (r *proxyHandler) updateAPIService(apiService *apiregistrationapi.APIServic } newInfo := proxyHandlingInfo{ + name: apiService.Name, restConfig: &restclient.Config{ TLSClientConfig: restclient.TLSClientConfig{ Insecure: apiService.Spec.InsecureSkipTLSVerify, diff --git a/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/BUILD b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/BUILD index f644a132653..5ec5d75b3b9 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/BUILD +++ b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/BUILD @@ -8,7 +8,10 @@ load( go_library( name = "go_default_library", - srcs = ["available_controller.go"], + srcs = [ + "available_controller.go", + "metrics.go", + ], importmap = "k8s.io/kubernetes/vendor/k8s.io/kube-aggregator/pkg/controllers/status", importpath = "k8s.io/kube-aggregator/pkg/controllers/status", deps = [ @@ -16,7 +19,6 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/meta:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", @@ -30,6 +32,7 @@ go_library( "//staging/src/k8s.io/kube-aggregator/pkg/client/informers/internalversion/apiregistration/internalversion:go_default_library", "//staging/src/k8s.io/kube-aggregator/pkg/client/listers/apiregistration/internalversion:go_default_library", "//staging/src/k8s.io/kube-aggregator/pkg/controllers:go_default_library", + "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go index 0018492c796..6bcad90bd39 100644 --- a/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go +++ b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go @@ -29,7 +29,6 @@ import ( "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -152,9 +151,8 @@ func (c *AvailableConditionController) sync(key string) error { apiService := originalAPIService.DeepCopy() availableCondition := apiregistration.APIServiceCondition{ - Type: apiregistration.Available, - Status: apiregistration.ConditionTrue, - LastTransitionTime: metav1.Now(), + Type: apiregistration.Available, + Status: apiregistration.ConditionTrue, } // local API services are always considered available @@ -283,7 +281,30 @@ func updateAPIServiceStatus(client apiregistrationclient.APIServicesGetter, orig if equality.Semantic.DeepEqual(originalAPIService.Status, newAPIService.Status) { return newAPIService, nil } - return client.APIServices().UpdateStatus(newAPIService) + + newAPIService, err := client.APIServices().UpdateStatus(newAPIService) + if err != nil { + return nil, err + } + + // update metrics + wasAvailable := apiregistration.IsAPIServiceConditionTrue(originalAPIService, apiregistration.Available) + isAvailable := apiregistration.IsAPIServiceConditionTrue(newAPIService, apiregistration.Available) + if isAvailable != wasAvailable { + if isAvailable { + unavailableGauge.WithLabelValues(newAPIService.Name).Set(0.0) + } else { + unavailableGauge.WithLabelValues(newAPIService.Name).Set(1.0) + + reason := "UnknownReason" + if newCondition := apiregistration.GetAPIServiceConditionByType(newAPIService, apiregistration.Available); newCondition != nil { + reason = newCondition.Reason + } + unavailableCounter.WithLabelValues(newAPIService.Name, reason).Inc() + } + } + + return newAPIService, nil } func (c *AvailableConditionController) Run(threadiness int, stopCh <-chan struct{}) { diff --git a/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/metrics.go b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/metrics.go new file mode 100644 index 00000000000..c36527b1076 --- /dev/null +++ b/staging/src/k8s.io/kube-aggregator/pkg/controllers/status/metrics.go @@ -0,0 +1,43 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiserver + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +var ( + unavailableCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "aggregator_unavailable_apiservice_count", + Help: "Counter of APIServices which are marked as unavailable broken down by APIService name and reason.", + }, + []string{"name", "reason"}, + ) + unavailableGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "aggregator_unavailable_apiserver_gauge", + Help: "Gauge of APIServices which are marked as unavailable broken down by APIService name.", + }, + []string{"name"}, + ) +) + +func init() { + prometheus.MustRegister(unavailableCounter) + prometheus.MustRegister(unavailableGauge) +}