From 6cb28fd1b4562b0d0256cbfb1d713517a95a0c86 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 16 Feb 2021 13:53:33 +0100 Subject: [PATCH 1/3] generic ephemeral volume: add metrics As discussed during the production readiness review, a metric for the PVC create operations is useful. The "ephemeral_volume" workqueue metrics were already added in the initial implementation. The new code follows the example set by the endpoints controller. --- pkg/controller/volume/ephemeral/controller.go | 12 +++ .../volume/ephemeral/controller_test.go | 88 +++++++++++++++---- .../volume/ephemeral/metrics/metrics.go | 53 +++++++++++ 3 files changed, 137 insertions(+), 16 deletions(-) create mode 100644 pkg/controller/volume/ephemeral/metrics/metrics.go diff --git a/pkg/controller/volume/ephemeral/controller.go b/pkg/controller/volume/ephemeral/controller.go index 59d008a0fff..07c1c200771 100644 --- a/pkg/controller/volume/ephemeral/controller.go +++ b/pkg/controller/volume/ephemeral/controller.go @@ -25,6 +25,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" @@ -38,6 +39,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/kubernetes/pkg/controller/volume/common" + ephemeralvolumemetrics "k8s.io/kubernetes/pkg/controller/volume/ephemeral/metrics" "k8s.io/kubernetes/pkg/controller/volume/events" "k8s.io/kubernetes/pkg/volume/util" ) @@ -90,6 +92,8 @@ func NewController( queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ephemeral_volume"), } + ephemeralvolumemetrics.RegisterMetrics() + eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartLogging(klog.Infof) eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) @@ -279,6 +283,14 @@ func (ec *ephemeralController) handleVolume(pod *v1.Pod, vol v1.Volume) error { Spec: ephemeral.VolumeClaimTemplate.Spec, } _, err = ec.kubeClient.CoreV1().PersistentVolumeClaims(pod.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) + reason := "" + if err != nil { + reason = string(apierrors.ReasonForError(err)) + if reason == "" { + reason = "Unknown" + } + } + ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues(reason).Inc() if err != nil { return fmt.Errorf("create PVC %s: %v", pvcName, err) } diff --git a/pkg/controller/volume/ephemeral/controller_test.go b/pkg/controller/volume/ephemeral/controller_test.go index 370c50d2187..158997001c5 100644 --- a/pkg/controller/volume/ephemeral/controller_test.go +++ b/pkg/controller/volume/ephemeral/controller_test.go @@ -18,6 +18,7 @@ package ephemeral import ( "context" + "errors" "sort" "testing" @@ -25,14 +26,18 @@ import ( // storagev1 "k8s.io/api/storage/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" // "k8s.io/apimachinery/pkg/types" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" "k8s.io/client-go/tools/cache" kcache "k8s.io/client-go/tools/cache" + "k8s.io/component-base/metrics/testutil" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/controller" + ephemeralvolumemetrics "k8s.io/kubernetes/pkg/controller/volume/ephemeral/metrics" "github.com/stretchr/testify/assert" ) @@ -57,18 +62,20 @@ func init() { func TestSyncHandler(t *testing.T) { tests := []struct { - name string - podKey string - pvcs []*v1.PersistentVolumeClaim - pods []*v1.Pod - expectedPVCs []v1.PersistentVolumeClaim - expectedError bool + name string + podKey string + pvcs []*v1.PersistentVolumeClaim + pods []*v1.Pod + expectedPVCs []v1.PersistentVolumeClaim + expectedError bool + expectedMetrics expectedMetrics }{ { - name: "create", - pods: []*v1.Pod{testPodWithEphemeral}, - podKey: podKey(testPodWithEphemeral), - expectedPVCs: []v1.PersistentVolumeClaim{*testPodEphemeralClaim}, + name: "create", + pods: []*v1.Pod{testPodWithEphemeral}, + podKey: podKey(testPodWithEphemeral), + expectedPVCs: []v1.PersistentVolumeClaim{*testPodEphemeralClaim}, + expectedMetrics: expectedMetrics{1, 0}, }, { name: "no-such-pod", @@ -90,11 +97,12 @@ func TestSyncHandler(t *testing.T) { podKey: podKey(testPod), }, { - name: "create-with-other-PVC", - pods: []*v1.Pod{testPodWithEphemeral}, - podKey: podKey(testPodWithEphemeral), - pvcs: []*v1.PersistentVolumeClaim{otherNamespaceClaim}, - expectedPVCs: []v1.PersistentVolumeClaim{*otherNamespaceClaim, *testPodEphemeralClaim}, + name: "create-with-other-PVC", + pods: []*v1.Pod{testPodWithEphemeral}, + podKey: podKey(testPodWithEphemeral), + pvcs: []*v1.PersistentVolumeClaim{otherNamespaceClaim}, + expectedPVCs: []v1.PersistentVolumeClaim{*otherNamespaceClaim, *testPodEphemeralClaim}, + expectedMetrics: expectedMetrics{1, 0}, }, { name: "wrong-PVC-owner", @@ -104,10 +112,17 @@ func TestSyncHandler(t *testing.T) { expectedPVCs: []v1.PersistentVolumeClaim{*conflictingClaim}, expectedError: true, }, + { + name: "create-conflict", + pods: []*v1.Pod{testPodWithEphemeral}, + podKey: podKey(testPodWithEphemeral), + expectedMetrics: expectedMetrics{0, 1}, + expectedError: true, + }, } for _, tc := range tests { - // Run sequentially because of global logging. + // Run sequentially because of global logging and global metrics. t.Run(tc.name, func(t *testing.T) { // There is no good way to shut down the informers. They spawn // various goroutines and some of them (in particular shared informer) @@ -124,6 +139,12 @@ func TestSyncHandler(t *testing.T) { } fakeKubeClient := createTestClient(objects...) + if tc.expectedMetrics.numConflicts > 0 { + fakeKubeClient.PrependReactor("create", "persistentvolumeclaims", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, apierrors.NewConflict(action.GetResource().GroupResource(), "fake name", errors.New("fake conflict")) + }) + } + setupMetrics() informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) podInformer := informerFactory.Core().V1().Pods() pvcInformer := informerFactory.Core().V1().PersistentVolumeClaims() @@ -152,6 +173,7 @@ func TestSyncHandler(t *testing.T) { t.Fatalf("unexpected error while listing PVCs: %v", err) } assert.Equal(t, sortPVCs(tc.expectedPVCs), sortPVCs(pvcs.Items)) + expectMetrics(t, tc.expectedMetrics) }) } } @@ -219,3 +241,37 @@ func createTestClient(objects ...runtime.Object) *fake.Clientset { fakeClient := fake.NewSimpleClientset(objects...) return fakeClient } + +// Metrics helpers + +type expectedMetrics struct { + numCreated int + numConflicts int +} + +func expectMetrics(t *testing.T, em expectedMetrics) { + t.Helper() + + actualCreated, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues("")) + handleErr(t, err, "ephemeralVolumeCreate") + if actualCreated != float64(em.numCreated) { + t.Errorf("Expected PVCs to be created %d, got %v", em.numCreated, actualCreated) + } + actualConflicts, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues("Conflict")) + handleErr(t, err, "ephemeralVolumeCreate/Conflict") + if actualConflicts != float64(em.numConflicts) { + t.Errorf("Expected PVCs to have conflicts %d, got %v", em.numConflicts, actualConflicts) + } +} + +func handleErr(t *testing.T, err error, metricName string) { + if err != nil { + t.Errorf("Failed to get %s value, err: %v", metricName, err) + } +} + +func setupMetrics() { + ephemeralvolumemetrics.RegisterMetrics() + ephemeralvolumemetrics.EphemeralVolumeCreate.Delete(map[string]string{"reason": ""}) + ephemeralvolumemetrics.EphemeralVolumeCreate.Delete(map[string]string{"reason": "Conflict"}) +} diff --git a/pkg/controller/volume/ephemeral/metrics/metrics.go b/pkg/controller/volume/ephemeral/metrics/metrics.go new file mode 100644 index 00000000000..813f56ba0a5 --- /dev/null +++ b/pkg/controller/volume/ephemeral/metrics/metrics.go @@ -0,0 +1,53 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +// EphemeralVolumeSubsystem - subsystem name used for Endpoint Slices. +const EphemeralVolumeSubsystem = "ephemeral_volume_controller" + +var ( + // EphemeralVolumeCreate tracks the number of + // PersistentVolumeClaims().Create calls for each failure + // reason + // (https://pkg.go.dev/k8s.io/apimachinery@v0.20.2/pkg/apis/meta/v1#StatusReason), + // with empty for successful calls. + EphemeralVolumeCreate = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: EphemeralVolumeSubsystem, + Name: "create", + Help: "Number of PersistenVolumeClaims creation requests", + StabilityLevel: metrics.ALPHA, + }, + []string{"reason"}, + ) +) + +var registerMetrics sync.Once + +// RegisterMetrics registers EphemeralVolume metrics. +func RegisterMetrics() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(EphemeralVolumeCreate) + }) +} From ef6f65beadff3d0de1d0b76671ef6d837b00dc05 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 23 Feb 2021 21:21:05 +0100 Subject: [PATCH 2/3] k8s.io/component-base: support GetCounterMetricValue for Counter GetCounterMetricValue has an unchecked conversion to metrics.Metric, something that didn't work for a *Counter because it didn't implement that interface. --- staging/src/k8s.io/component-base/metrics/counter.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/staging/src/k8s.io/component-base/metrics/counter.go b/staging/src/k8s.io/component-base/metrics/counter.go index 7344945e177..34c177fe1ac 100644 --- a/staging/src/k8s.io/component-base/metrics/counter.go +++ b/staging/src/k8s.io/component-base/metrics/counter.go @@ -20,6 +20,7 @@ import ( "context" "github.com/blang/semver" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" ) // Counter is our internal representation for our wrapping struct around prometheus @@ -31,6 +32,9 @@ type Counter struct { selfCollector } +// The implementation of the Metric interface is expected by testutil.GetCounterMetricValue. +var _ Metric = &Counter{} + // NewCounter returns an object which satisfies the kubeCollector and CounterMetric interfaces. // However, the object returned will not measure anything unless the collector is first // registered, since the metric is lazily instantiated. @@ -46,6 +50,14 @@ func NewCounter(opts *CounterOpts) *Counter { return kc } +func (c *Counter) Desc() *prometheus.Desc { + return c.metric.Desc() +} + +func (c *Counter) Write(to *dto.Metric) error { + return c.metric.Write(to) +} + // Reset resets the underlying prometheus Counter to start counting from 0 again func (c *Counter) Reset() { if !c.IsCreated() { From 98f75290ba89f5dc17d9a718763d556b1e8b4b2c Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 23 Feb 2021 21:23:31 +0100 Subject: [PATCH 3/3] generic ephemeral volume: simpler metrics A CounterVector with status as label may create unnecessary overhead and using the success case with the empty label value wasn't easy. It's better to have two seperate counters, one for total number of calls and one for failed calls. --- pkg/controller/volume/ephemeral/controller.go | 11 ++------ .../volume/ephemeral/controller_test.go | 22 +++++++-------- .../volume/ephemeral/metrics/metrics.go | 27 +++++++++++-------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/pkg/controller/volume/ephemeral/controller.go b/pkg/controller/volume/ephemeral/controller.go index 07c1c200771..0f474e9470f 100644 --- a/pkg/controller/volume/ephemeral/controller.go +++ b/pkg/controller/volume/ephemeral/controller.go @@ -25,7 +25,6 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" @@ -282,16 +281,10 @@ func (ec *ephemeralController) handleVolume(pod *v1.Pod, vol v1.Volume) error { }, Spec: ephemeral.VolumeClaimTemplate.Spec, } + ephemeralvolumemetrics.EphemeralVolumeCreateAttempts.Inc() _, err = ec.kubeClient.CoreV1().PersistentVolumeClaims(pod.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}) - reason := "" - if err != nil { - reason = string(apierrors.ReasonForError(err)) - if reason == "" { - reason = "Unknown" - } - } - ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues(reason).Inc() if err != nil { + ephemeralvolumemetrics.EphemeralVolumeCreateFailures.Inc() return fmt.Errorf("create PVC %s: %v", pvcName, err) } return nil diff --git a/pkg/controller/volume/ephemeral/controller_test.go b/pkg/controller/volume/ephemeral/controller_test.go index 158997001c5..7b1c29ea5a7 100644 --- a/pkg/controller/volume/ephemeral/controller_test.go +++ b/pkg/controller/volume/ephemeral/controller_test.go @@ -22,7 +22,7 @@ import ( "sort" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" // storagev1 "k8s.io/api/storage/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" // "k8s.io/apimachinery/pkg/types" @@ -116,7 +116,7 @@ func TestSyncHandler(t *testing.T) { name: "create-conflict", pods: []*v1.Pod{testPodWithEphemeral}, podKey: podKey(testPodWithEphemeral), - expectedMetrics: expectedMetrics{0, 1}, + expectedMetrics: expectedMetrics{1, 1}, expectedError: true, }, } @@ -139,7 +139,7 @@ func TestSyncHandler(t *testing.T) { } fakeKubeClient := createTestClient(objects...) - if tc.expectedMetrics.numConflicts > 0 { + if tc.expectedMetrics.numFailures > 0 { fakeKubeClient.PrependReactor("create", "persistentvolumeclaims", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { return true, nil, apierrors.NewConflict(action.GetResource().GroupResource(), "fake name", errors.New("fake conflict")) }) @@ -245,22 +245,22 @@ func createTestClient(objects ...runtime.Object) *fake.Clientset { // Metrics helpers type expectedMetrics struct { - numCreated int - numConflicts int + numCreated int + numFailures int } func expectMetrics(t *testing.T, em expectedMetrics) { t.Helper() - actualCreated, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues("")) + actualCreated, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreateAttempts) handleErr(t, err, "ephemeralVolumeCreate") if actualCreated != float64(em.numCreated) { t.Errorf("Expected PVCs to be created %d, got %v", em.numCreated, actualCreated) } - actualConflicts, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreate.WithLabelValues("Conflict")) + actualConflicts, err := testutil.GetCounterMetricValue(ephemeralvolumemetrics.EphemeralVolumeCreateFailures) handleErr(t, err, "ephemeralVolumeCreate/Conflict") - if actualConflicts != float64(em.numConflicts) { - t.Errorf("Expected PVCs to have conflicts %d, got %v", em.numConflicts, actualConflicts) + if actualConflicts != float64(em.numFailures) { + t.Errorf("Expected PVCs to have conflicts %d, got %v", em.numFailures, actualConflicts) } } @@ -272,6 +272,6 @@ func handleErr(t *testing.T, err error, metricName string) { func setupMetrics() { ephemeralvolumemetrics.RegisterMetrics() - ephemeralvolumemetrics.EphemeralVolumeCreate.Delete(map[string]string{"reason": ""}) - ephemeralvolumemetrics.EphemeralVolumeCreate.Delete(map[string]string{"reason": "Conflict"}) + ephemeralvolumemetrics.EphemeralVolumeCreateAttempts.Reset() + ephemeralvolumemetrics.EphemeralVolumeCreateFailures.Reset() } diff --git a/pkg/controller/volume/ephemeral/metrics/metrics.go b/pkg/controller/volume/ephemeral/metrics/metrics.go index 813f56ba0a5..20eb9f1f6e4 100644 --- a/pkg/controller/volume/ephemeral/metrics/metrics.go +++ b/pkg/controller/volume/ephemeral/metrics/metrics.go @@ -27,20 +27,24 @@ import ( const EphemeralVolumeSubsystem = "ephemeral_volume_controller" var ( - // EphemeralVolumeCreate tracks the number of - // PersistentVolumeClaims().Create calls for each failure - // reason - // (https://pkg.go.dev/k8s.io/apimachinery@v0.20.2/pkg/apis/meta/v1#StatusReason), - // with empty for successful calls. - EphemeralVolumeCreate = metrics.NewCounterVec( + // EphemeralVolumeCreateAttempts tracks the number of + // PersistentVolumeClaims().Create calls (both successful and unsuccessful) + EphemeralVolumeCreateAttempts = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: EphemeralVolumeSubsystem, - Name: "create", + Name: "create_total", Help: "Number of PersistenVolumeClaims creation requests", StabilityLevel: metrics.ALPHA, - }, - []string{"reason"}, - ) + }) + // EphemeralVolumeCreateFailures tracks the number of unsuccessful + // PersistentVolumeClaims().Create calls + EphemeralVolumeCreateFailures = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: EphemeralVolumeSubsystem, + Name: "create_failures_total", + Help: "Number of PersistenVolumeClaims creation requests", + StabilityLevel: metrics.ALPHA, + }) ) var registerMetrics sync.Once @@ -48,6 +52,7 @@ var registerMetrics sync.Once // RegisterMetrics registers EphemeralVolume metrics. func RegisterMetrics() { registerMetrics.Do(func() { - legacyregistry.MustRegister(EphemeralVolumeCreate) + legacyregistry.MustRegister(EphemeralVolumeCreateAttempts) + legacyregistry.MustRegister(EphemeralVolumeCreateFailures) }) }