From 2fc401d0a295dd30ec73e99dbcdbba6a08268d6e Mon Sep 17 00:00:00 2001 From: atiratree Date: Mon, 6 Dec 2021 23:21:13 +0100 Subject: [PATCH] add gc metrics and collect sync errors --- .../garbagecollector/garbagecollector.go | 7 +++ .../garbagecollector/metrics/metrics.go | 45 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 pkg/controller/garbagecollector/metrics/metrics.go diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 4eb39e703c3..3fc0fda7ae2 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -46,6 +46,7 @@ import ( "k8s.io/klog/v2" c "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/apis/config/scheme" + "k8s.io/kubernetes/pkg/controller/garbagecollector/metrics" // import known versions _ "k8s.io/client-go/kubernetes" @@ -123,6 +124,8 @@ func NewGarbageCollector( ignoredResources: ignoredResources, } + metrics.Register() + return gc, nil } @@ -179,6 +182,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.ServerResourcesInterf // This can occur if there is an internal error in GetDeletableResources. if len(newResources) == 0 { klog.V(2).Infof("no resources reported by discovery, skipping garbage collector sync") + metrics.GarbageCollectorResourcesSyncError.Inc() return } @@ -203,6 +207,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.ServerResourcesInterf newResources = GetDeletableResources(discoveryClient) if len(newResources) == 0 { klog.V(2).Infof("no resources reported by discovery (attempt %d)", attempt) + metrics.GarbageCollectorResourcesSyncError.Inc() return false, nil } } @@ -226,6 +231,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.ServerResourcesInterf // attempt. if err := gc.resyncMonitors(newResources); err != nil { utilruntime.HandleError(fmt.Errorf("failed to sync resource monitors (attempt %d): %v", attempt, err)) + metrics.GarbageCollectorResourcesSyncError.Inc() return false, nil } klog.V(4).Infof("resynced monitors") @@ -237,6 +243,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.ServerResourcesInterf // note that workers stay paused until we successfully resync. if !cache.WaitForNamedCacheSync("garbage collector", waitForStopOrTimeout(stopCh, period), gc.dependencyGraphBuilder.IsSynced) { utilruntime.HandleError(fmt.Errorf("timed out waiting for dependency graph builder sync during GC sync (attempt %d)", attempt)) + metrics.GarbageCollectorResourcesSyncError.Inc() return false, nil } diff --git a/pkg/controller/garbagecollector/metrics/metrics.go b/pkg/controller/garbagecollector/metrics/metrics.go new file mode 100644 index 00000000000..d19e63bf714 --- /dev/null +++ b/pkg/controller/garbagecollector/metrics/metrics.go @@ -0,0 +1,45 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const GarbageCollectorControllerSubsystem = "garbagecollector_controller" + +var ( + GarbageCollectorResourcesSyncError = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: GarbageCollectorControllerSubsystem, + Name: "resources_sync_error_total", + Help: "Number of garbage collector resources sync errors", + StabilityLevel: metrics.ALPHA, + }) +) + +var registerMetrics sync.Once + +// Register registers GarbageCollectorController metrics. +func Register() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(GarbageCollectorResourcesSyncError) + }) +}