diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go index 1a2d0aea6d4..d470fef7de8 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go @@ -720,6 +720,7 @@ func (c *Controller) nodeSyncService(svc *v1.Service, oldNodes, newNodes []*v1.N klog.V(4).Infof("nodeSyncService started for service %s/%s", svc.Namespace, svc.Name) if err := c.lockedUpdateLoadBalancerHosts(svc, newNodes); err != nil { runtime.HandleError(fmt.Errorf("failed to update load balancer hosts for service %s/%s: %v", svc.Namespace, svc.Name, err)) + nodeSyncErrorCount.Inc() return retNeedRetry } klog.V(4).Infof("nodeSyncService finished successfully for service %s/%s", svc.Namespace, svc.Name) @@ -763,6 +764,7 @@ func (c *Controller) updateLoadBalancerHosts(ctx context.Context, services []*v1 // associated with the service. func (c *Controller) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts []*v1.Node) error { startTime := time.Now() + loadBalancerSyncCount.Inc() defer func() { latency := time.Since(startTime).Seconds() klog.V(4).Infof("It took %v seconds to update load balancer hosts for service %s/%s", latency, service.Namespace, service.Name) diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go b/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go index e8410e3eb81..677318e43a2 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go +++ b/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go @@ -17,9 +17,10 @@ limitations under the License. package service import ( + "sync" + "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" - "sync" ) const ( @@ -32,12 +33,26 @@ var register sync.Once // registerMetrics registers service-controller metrics. func registerMetrics() { register.Do(func() { + legacyregistry.MustRegister(loadBalancerSyncCount) legacyregistry.MustRegister(nodeSyncLatency) + legacyregistry.MustRegister(nodeSyncErrorCount) legacyregistry.MustRegister(updateLoadBalancerHostLatency) }) } var ( + loadBalancerSyncCount = metrics.NewCounter(&metrics.CounterOpts{ + Name: "loadbalancer_sync_total", + Subsystem: subSystemName, + Help: "A metric counting the amount of times any load balancer has been configured, as an effect of service/node changes on the cluster", + StabilityLevel: metrics.ALPHA, + }) + nodeSyncErrorCount = metrics.NewCounter(&metrics.CounterOpts{ + Name: "nodesync_error_total", + Subsystem: subSystemName, + Help: "A metric counting the amount of times any load balancer has been configured and errored, as an effect of node changes on the cluster", + StabilityLevel: metrics.ALPHA, + }) nodeSyncLatency = metrics.NewHistogram(&metrics.HistogramOpts{ Name: "nodesync_latency_seconds", Subsystem: subSystemName, @@ -46,7 +61,6 @@ var ( Buckets: metrics.ExponentialBuckets(1, 2, 15), StabilityLevel: metrics.ALPHA, }) - updateLoadBalancerHostLatency = metrics.NewHistogram(&metrics.HistogramOpts{ Name: "update_loadbalancer_host_latency_seconds", Subsystem: subSystemName,