add metrics to nodeSyncLoop in service controller

This commit is contained in:
Minhan Xia 2021-02-04 15:54:10 -08:00
parent 64baf0f73f
commit 7250083a1e
3 changed files with 76 additions and 1 deletions

View File

@ -5,6 +5,7 @@ go_library(
srcs = [
"controller.go",
"doc.go",
"metrics.go",
],
importmap = "k8s.io/kubernetes/vendor/k8s.io/cloud-provider/controllers/service",
importpath = "k8s.io/cloud-provider/controllers/service",
@ -27,6 +28,8 @@ go_library(
"//staging/src/k8s.io/cloud-provider:go_default_library",
"//staging/src/k8s.io/cloud-provider/service/helpers:go_default_library",
"//staging/src/k8s.io/component-base/featuregate:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
"//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],

View File

@ -112,11 +112,12 @@ func New(
recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "service-controller"})
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage("service_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage(subSystemName, kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
return nil, err
}
}
registerMetrics()
s := &Controller{
cloud: cloud,
knownHosts: []*v1.Node{},
@ -663,6 +664,13 @@ func nodeReadyConditionStatus(node *v1.Node) v1.ConditionStatus {
func (s *Controller) nodeSyncLoop() {
s.knownHostsLock.Lock()
defer s.knownHostsLock.Unlock()
startTime := time.Now()
defer func() {
latency := time.Now().Sub(startTime).Seconds()
klog.V(4).Infof("It took %v seconds to finish nodeSyncLoop", latency)
nodeSyncLatency.Observe(latency)
}()
newHosts, err := listWithPredicate(s.nodeLister, s.getNodeConditionPredicate())
if err != nil {
runtime.HandleError(fmt.Errorf("Failed to retrieve current set of nodes from node lister: %v", err))
@ -713,6 +721,12 @@ func (s *Controller) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts []
if !wantsLoadBalancer(service) {
return nil
}
startTime := time.Now()
defer func() {
latency := time.Now().Sub(startTime).Seconds()
klog.V(4).Infof("It took %v seconds to update load balancer hosts for service %s/%s", latency, service.Namespace, service.Name)
updateLoadBalancerHostLatency.Observe(latency)
}()
// This operation doesn't normally take very long (and happens pretty often), so we only record the final event
err := s.balancer.UpdateLoadBalancer(context.TODO(), s.clusterName, service, hosts)

View File

@ -0,0 +1,58 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sync"
)
const (
// subSystemName is the name of this subsystem name used for prometheus metrics.
subSystemName = "service_controller"
)
var register sync.Once
// registerMetrics registers service-controller metrics.
func registerMetrics() {
register.Do(func() {
legacyregistry.MustRegister(nodeSyncLatency)
legacyregistry.MustRegister(updateLoadBalancerHostLatency)
})
}
var (
nodeSyncLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "nodesync_latency_seconds",
Subsystem: subSystemName,
Help: "A metric measuring the latency for nodesync which updates loadbalancer hosts on cluster node updates.",
// Buckets from 1s to 16384s
Buckets: metrics.ExponentialBuckets(1, 2, 15),
StabilityLevel: metrics.ALPHA,
})
updateLoadBalancerHostLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "update_loadbalancer_host_latency_seconds",
Subsystem: subSystemName,
Help: "A metric measuring the latency for updating each load balancer hosts.",
// Buckets from 1s to 16384s
Buckets: metrics.ExponentialBuckets(1, 2, 15),
StabilityLevel: metrics.ALPHA,
})
)