diff --git a/pkg/controller/podgc/gc_controller.go b/pkg/controller/podgc/gc_controller.go index afba67b7f48..36140ba891d 100644 --- a/pkg/controller/podgc/gc_controller.go +++ b/pkg/controller/podgc/gc_controller.go @@ -76,6 +76,9 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c // This function is only intended for integration tests func NewPodGCInternal(ctx context.Context, kubeClient clientset.Interface, podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, terminatedPodThreshold int, gcCheckPeriod, quarantineTime time.Duration) *PodGCController { + // Register prometheus metrics + Register() + gcc := &PodGCController{ kubeClient: kubeClient, terminatedPodThreshold: terminatedPodThreshold, @@ -173,9 +176,11 @@ func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) { wait.Add(1) go func(pod *v1.Pod) { defer wait.Done() + deletingPodsTotal.WithLabelValues().Inc() if err := gcc.markFailedAndDeletePod(ctx, pod); err != nil { // ignore not founds utilruntime.HandleError(err) + deletingPodsErrorTotal.WithLabelValues().Inc() } }(terminatingPods[i]) } diff --git a/pkg/controller/podgc/gc_controller_test.go b/pkg/controller/podgc/gc_controller_test.go index 923e2a1a7b9..1dddd4f1588 100644 --- a/pkg/controller/podgc/gc_controller_test.go +++ b/pkg/controller/podgc/gc_controller_test.go @@ -35,6 +35,7 @@ import ( clienttesting "k8s.io/client-go/testing" "k8s.io/client-go/util/workqueue" featuregatetesting "k8s.io/component-base/featuregate/testing" + metricstestutil "k8s.io/component-base/metrics/testutil" "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/testutil" "k8s.io/kubernetes/pkg/features" @@ -631,6 +632,8 @@ func TestGCTerminating(t *testing.T) { verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) }) } + // deletingPodsTotal is 7 in this test + testDeletingPodsMetrics(t, 7) } func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) { @@ -645,6 +648,26 @@ func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDelet } } +func testDeletingPodsMetrics(t *testing.T, inputDeletingPodsTotal int) { + t.Helper() + + actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsTotal.WithLabelValues()) + if err != nil { + t.Errorf("Error getting actualDeletingPodsTotal") + } + if actualDeletingPodsTotal != float64(inputDeletingPodsTotal) { + t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", inputDeletingPodsTotal, actualDeletingPodsTotal) + } + + actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(deletingPodsErrorTotal.WithLabelValues()) + if err != nil { + t.Errorf("Error getting actualDeletingPodsErrorTotal") + } + if actualDeletingPodsErrorTotal != float64(0) { + t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", 0, actualDeletingPodsErrorTotal) + } +} + func setupNewSimpleClient(nodes []*v1.Node, pods []*v1.Pod) *fake.Clientset { podList := &v1.PodList{} for _, podItem := range pods { diff --git a/pkg/controller/podgc/metrics.go b/pkg/controller/podgc/metrics.go new file mode 100644 index 00000000000..7761aa6ad38 --- /dev/null +++ b/pkg/controller/podgc/metrics.go @@ -0,0 +1,61 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podgc + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + podGCController = "pod_gc_collector" + deletingPods = "deleting_pods_total" + deletingPodsError = "deleting_pods_error_total" +) + +var ( + deletingPodsTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: podGCController, + Name: deletingPods, + Help: "Number of pods that are being forcefully deleted since the Pod GC Controller started.", + StabilityLevel: metrics.ALPHA, + }, + []string{}, + ) + deletingPodsErrorTotal = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: podGCController, + Name: deletingPodsError, + Help: "Number of errors encountered when forcefully deleting the pods since the Pod GC Controller started.", + StabilityLevel: metrics.ALPHA, + }, + []string{}, + ) +) + +var registerMetrics sync.Once + +// Register the metrics that are to be monitored. +func Register() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(deletingPodsTotal) + legacyregistry.MustRegister(deletingPodsErrorTotal) + }) +}