Merge pull request #98779 from freehan/nodesync-metrics

add metrics to nodeSyncLoop in service controller
This commit is contained in:
Kubernetes Prow Robot 2021-02-09 17:43:20 -08:00 committed by GitHub
commit 2289b02ea3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 76 additions and 1 deletions

View File

@ -5,6 +5,7 @@ go_library(
srcs = [
"controller.go",
"doc.go",
"metrics.go",
],
importmap = "k8s.io/kubernetes/vendor/k8s.io/cloud-provider/controllers/service",
importpath = "k8s.io/cloud-provider/controllers/service",
@ -27,6 +28,8 @@ go_library(
"//staging/src/k8s.io/cloud-provider:go_default_library",
"//staging/src/k8s.io/cloud-provider/service/helpers:go_default_library",
"//staging/src/k8s.io/component-base/featuregate:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
"//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],

View File

@ -112,11 +112,12 @@ func New(
recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "service-controller"})
if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage("service_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage(subSystemName, kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil {
return nil, err
}
}
registerMetrics()
s := &Controller{
cloud: cloud,
knownHosts: []*v1.Node{},
@ -663,6 +664,13 @@ func nodeReadyConditionStatus(node *v1.Node) v1.ConditionStatus {
func (s *Controller) nodeSyncLoop() {
s.knownHostsLock.Lock()
defer s.knownHostsLock.Unlock()
startTime := time.Now()
defer func() {
latency := time.Now().Sub(startTime).Seconds()
klog.V(4).Infof("It took %v seconds to finish nodeSyncLoop", latency)
nodeSyncLatency.Observe(latency)
}()
newHosts, err := listWithPredicate(s.nodeLister, s.getNodeConditionPredicate())
if err != nil {
runtime.HandleError(fmt.Errorf("Failed to retrieve current set of nodes from node lister: %v", err))
@ -713,6 +721,12 @@ func (s *Controller) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts []
if !wantsLoadBalancer(service) {
return nil
}
startTime := time.Now()
defer func() {
latency := time.Now().Sub(startTime).Seconds()
klog.V(4).Infof("It took %v seconds to update load balancer hosts for service %s/%s", latency, service.Namespace, service.Name)
updateLoadBalancerHostLatency.Observe(latency)
}()
// This operation doesn't normally take very long (and happens pretty often), so we only record the final event
err := s.balancer.UpdateLoadBalancer(context.TODO(), s.clusterName, service, hosts)

View File

@ -0,0 +1,58 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sync"
)
const (
// subSystemName is the name of this subsystem name used for prometheus metrics.
subSystemName = "service_controller"
)
var register sync.Once
// registerMetrics registers service-controller metrics.
func registerMetrics() {
register.Do(func() {
legacyregistry.MustRegister(nodeSyncLatency)
legacyregistry.MustRegister(updateLoadBalancerHostLatency)
})
}
var (
nodeSyncLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "nodesync_latency_seconds",
Subsystem: subSystemName,
Help: "A metric measuring the latency for nodesync which updates loadbalancer hosts on cluster node updates.",
// Buckets from 1s to 16384s
Buckets: metrics.ExponentialBuckets(1, 2, 15),
StabilityLevel: metrics.ALPHA,
})
updateLoadBalancerHostLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "update_loadbalancer_host_latency_seconds",
Subsystem: subSystemName,
Help: "A metric measuring the latency for updating each load balancer hosts.",
// Buckets from 1s to 16384s
Buckets: metrics.ExponentialBuckets(1, 2, 15),
StabilityLevel: metrics.ALPHA,
})
)