From 8ce37fde9569064b074f4282d76dcc8c2b21577f Mon Sep 17 00:00:00 2001 From: xing-yang Date: Tue, 1 Nov 2022 22:46:50 +0000 Subject: [PATCH 1/3] Add metric deleting_pods_total --- pkg/controller/podgc/gc_controller.go | 5 ++ pkg/controller/podgc/gc_controller_test.go | 23 ++++++++ pkg/controller/podgc/metrics.go | 61 ++++++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 pkg/controller/podgc/metrics.go diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index afba67b7f48..36140ba891d 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -76,6 +76,9 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c // This function is only intended for integration tests func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int, gcCheckPeriod, quarantineTime time.Duration) *PodGCController { + // Register prometheus metrics + Register() + gcc := &PodGCController{ kubeClient: kubeClient, terminatedPodThreshold: terminatedPodThreshold, @@ -173,9 +176,11 @@ func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) { wait.Add(1) go func(pod *v1.Pod) { defer wait.Done() + deletingPodsTotal.WithLabelValues().Inc() if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil { // ignore not founds utilruntime.HandleError(err) + deletingPodsErrorTotal.WithLabelValues().Inc() } }(terminatingPods[i]) } diff --git a/pkg/controller/podgc/gc_controller_test.go b/pkg/controller/podgc/gc_controller_test.go index 923e2a1a7b9..1dddd4f1588 100644 --- a/pkg/controller/podgc/gc_controller_test.go +++ b/pkg/controller/podgc/gc_controller_test.go @@ -35,6 +35,7 @@ import ( clienttesting "k8s.io/client-go/testing" "k8s.io/client-go/util/workqueue" featuregatetesting "k8s.io/component-base/featuregate/testing" + metricstestutil "k8s.io/component-base/metrics/testutil" "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/testutil" "k8s.io/kubernetes/pkg/features" @@ -631,6 +632,8 @@ func TestGCTerminating(t *testing.T) { verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) }) } + // deletingPodsTotal is 7 in this test + testDeletingPodsMetrics(t, 7) } func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) { @@ -645,6 +648,26 @@ func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDelet } } +func testDeletingPodsMetrics(t *testing.T, inputDeletingPodsTotal int) { + t.Helper() + + actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsTotal.WithLabelValues()) + if err != nil { + t.Errorf("Error getting actualDeletingPodsTotal") + } + if actualDeletingPodsTotal != float64(inputDeletingPodsTotal) { + t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", inputDeletingPodsTotal, actualDeletingPodsTotal) + } + + actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsErrorTotal.WithLabelValues()) + if err != nil { + t.Errorf("Error getting actualDeletingPodsErrorTotal") + } + if actualDeletingPodsErrorTotal != float64(0) { + t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", 0, actualDeletingPodsErrorTotal) + } +} + func setupNewSimpleClient(nodes []*v1.Node, pods []*v1.Pod) *fake.Clientset { podList := &v1.PodList{} for _, podItem := range pods { diff --git a/pkg/controller/podgc/metrics.go b/pkg/controller/podgc/metrics.go new file mode 100644 index 00000000000..7761aa6ad38 --- /dev/null +++ b/pkg/controller/podgc/metrics.go @@ -0,0 +1,61 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podgc + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + podGCController = "pod_gc_collector" + deletingPods = "deleting_pods_total" + deletingPodsError = "deleting_pods_error_total" +) + +var ( + deletingPodsTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: podGCController, + Name: deletingPods, + Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.", + StabilityLevel: metrics.ALPHA, + }, + []string{}, + ) + deletingPodsErrorTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: podGCController, + Name: deletingPodsError, + Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.", + StabilityLevel: metrics.ALPHA, + }, + []string{}, + ) +) + +var registerMetrics sync.Once + +// Register the metrics that are to be monitored. +func Register() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(deletingPodsTotal) + legacyregistry.MustRegister(deletingPodsErrorTotal) + }) +} From b4e6bed525fc8fbfe4b43e6cab65db1b5555ac3b Mon Sep 17 00:00:00 2001 From: xing-yang Date: Wed, 2 Nov 2022 19:54:38 +0000 Subject: [PATCH 2/3] Moved Register() to init() --- pkg/controller/podgc/gc_controller.go | 8 +++++--- pkg/controller/podgc/metrics.go | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index 36140ba891d..0b376313a4e 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -68,6 +68,11 @@ type PodGCController struct { quarantineTime time.Duration } +func init() { + // Register prometheus metrics + Register() +} + func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int) *PodGCController { return NewPodGCInternal(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold, gcCheckPeriod, quarantineTime) @@ -76,9 +81,6 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c // This function is only intended for integration tests func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int, gcCheckPeriod, quarantineTime time.Duration) *PodGCController { - // Register prometheus metrics - Register() - gcc := &PodGCController{ kubeClient: kubeClient, terminatedPodThreshold: terminatedPodThreshold, diff --git a/pkg/controller/podgc/metrics.go b/pkg/controller/podgc/metrics.go index 7761aa6ad38..5eaf033bfaa 100644 --- a/pkg/controller/podgc/metrics.go +++ b/pkg/controller/podgc/metrics.go @@ -25,8 +25,8 @@ import ( const ( podGCController = "pod_gc_collector" - deletingPods = "deleting_pods_total" - deletingPodsError = "deleting_pods_error_total" + deletingPods = "force_delete_pods_total" + deletingPodsError = "force_delete_pod_errors_total" ) var ( From 85f5583684f6ada680e4dc1edf782cc0bf67e920 Mon Sep 17 00:00:00 2001 From: xing-yang Date: Wed, 2 Nov 2022 22:37:50 +0000 Subject: [PATCH 3/3] Address review comments --- pkg/controller/podgc/gc_controller.go | 2 +- pkg/controller/podgc/metrics.go | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index 0b376313a4e..db1c62d2693 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -70,7 +70,7 @@ type PodGCController struct { func init() { // Register prometheus metrics - Register() + RegisterMetrics() } func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, diff --git a/pkg/controller/podgc/metrics.go b/pkg/controller/podgc/metrics.go index 5eaf033bfaa..e0c742a81cb 100644 --- a/pkg/controller/podgc/metrics.go +++ b/pkg/controller/podgc/metrics.go @@ -24,16 +24,14 @@ import ( ) const ( - podGCController = "pod_gc_collector" - deletingPods = "force_delete_pods_total" - deletingPodsError = "force_delete_pod_errors_total" + podGCController = "pod_gc_collector" ) var ( deletingPodsTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: podGCController, - Name: deletingPods, + Name: "force_delete_pods_total", Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.", StabilityLevel: metrics.ALPHA, }, @@ -42,7 +40,7 @@ var ( deletingPodsErrorTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: podGCController, - Name: deletingPodsError, + Name: "force_delete_pod_errors_total", Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.", StabilityLevel: metrics.ALPHA, }, @@ -53,7 +51,7 @@ var ( var registerMetrics sync.Once // Register the metrics that are to be monitored. -func Register() { +func RegisterMetrics() { registerMetrics.Do(func() { legacyregistry.MustRegister(deletingPodsTotal) legacyregistry.MustRegister(deletingPodsErrorTotal)