From 564e531aa72f48afde659e5ce464d0c35d608889 Mon Sep 17 00:00:00 2001 From: Grant Griffiths Date: Fri, 8 Jan 2021 15:54:59 -0800 Subject: [PATCH] Add Snapshot Controller e2e metric tests Signed-off-by: Grant Griffiths --- ...volume-snapshot-controller-deployment.yaml | 4 +- test/e2e/apimachinery/garbage_collector.go | 2 +- test/e2e/framework/framework.go | 4 +- test/e2e/framework/metrics/kubelet_metrics.go | 2 +- test/e2e/framework/metrics/metrics_grabber.go | 121 ++++++-- .../metrics/snapshot_controller_metrics.go | 40 +++ test/e2e/framework/test_context.go | 9 + test/e2e/framework/timeouts.go | 57 ++-- .../monitoring/metrics_grabber.go | 2 +- test/e2e/storage/csi_mock_volume.go | 282 ++++++++++++++++++ test/e2e/storage/testsuites/base.go | 2 +- test/e2e/storage/volume_metrics.go | 2 +- test/e2e/suites.go | 2 +- 13 files changed, 467 insertions(+), 62 deletions(-) create mode 100644 test/e2e/framework/metrics/snapshot_controller_metrics.go diff --git a/cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml b/cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml index bad49c60b9c..e583dadb025 100644 --- a/cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml +++ b/cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml @@ -22,6 +22,8 @@ spec: serviceAccount: volume-snapshot-controller containers: - name: volume-snapshot-controller - image: k8s.gcr.io/sig-storage/snapshot-controller:v3.0.2 + image: k8s.gcr.io/sig-storage/snapshot-controller:v4.0.0 args: - "--v=5" + - "--metrics-path=/metrics" + - "--http-endpoint=:9102" diff --git a/test/e2e/apimachinery/garbage_collector.go b/test/e2e/apimachinery/garbage_collector.go index 30db7bdb463..5b449c1afe6 100644 --- a/test/e2e/apimachinery/garbage_collector.go +++ b/test/e2e/apimachinery/garbage_collector.go @@ -259,7 +259,7 @@ func verifyRemainingObjects(f *framework.Framework, objects map[string]int) (boo func gatherMetrics(f *framework.Framework) { ginkgo.By("Gathering metrics") var summary framework.TestDataSummary - grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false) + grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, false, false, true, false, false, false) if err != nil { framework.Logf("Failed to create MetricsGrabber. Skipping metrics gathering.") } else { diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 25c031bd27c..143ded1781c 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -296,7 +296,7 @@ func (f *Framework) BeforeEach() { gatherMetricsAfterTest := TestContext.GatherMetricsAfterTest == "true" || TestContext.GatherMetricsAfterTest == "master" if gatherMetricsAfterTest && TestContext.IncludeClusterAutoscalerMetrics { - grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics) + grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, !ProviderIs("kubemark"), false, false, false, TestContext.IncludeClusterAutoscalerMetrics, false) if err != nil { Logf("Failed to create MetricsGrabber (skipping ClusterAutoscaler metrics gathering before test): %v", err) } else { @@ -449,7 +449,7 @@ func (f *Framework) AfterEach() { ginkgo.By("Gathering metrics") // Grab apiserver, scheduler, controller-manager metrics and (optionally) nodes' kubelet metrics. grabMetricsFromKubelets := TestContext.GatherMetricsAfterTest != "master" && !ProviderIs("kubemark") - grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics) + grabber, err := e2emetrics.NewMetricsGrabber(f.ClientSet, f.KubemarkExternalClusterClientSet, grabMetricsFromKubelets, true, true, true, TestContext.IncludeClusterAutoscalerMetrics, false) if err != nil { Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err) } else { diff --git a/test/e2e/framework/metrics/kubelet_metrics.go b/test/e2e/framework/metrics/kubelet_metrics.go index 2c1810204ef..a26eeed5604 100644 --- a/test/e2e/framework/metrics/kubelet_metrics.go +++ b/test/e2e/framework/metrics/kubelet_metrics.go @@ -139,7 +139,7 @@ func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletM if c == nil { return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics") } - grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false) + grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false, false) if err != nil { return KubeletMetrics{}, err } diff --git a/test/e2e/framework/metrics/metrics_grabber.go b/test/e2e/framework/metrics/metrics_grabber.go index a94e70ee61c..6f795317420 100644 --- a/test/e2e/framework/metrics/metrics_grabber.go +++ b/test/e2e/framework/metrics/metrics_grabber.go @@ -38,40 +38,48 @@ const ( // kubeControllerManagerPort is the default port for the controller manager status server. kubeControllerManagerPort = 10257 metricsProxyPod = "metrics-proxy" + // snapshotControllerPort is the port for the snapshot controller + snapshotControllerPort = 9102 ) // Collection is metrics collection of components type Collection struct { - APIServerMetrics APIServerMetrics - ControllerManagerMetrics ControllerManagerMetrics - KubeletMetrics map[string]KubeletMetrics - SchedulerMetrics SchedulerMetrics - ClusterAutoscalerMetrics ClusterAutoscalerMetrics + APIServerMetrics APIServerMetrics + ControllerManagerMetrics ControllerManagerMetrics + SnapshotControllerMetrics SnapshotControllerMetrics + KubeletMetrics map[string]KubeletMetrics + SchedulerMetrics SchedulerMetrics + ClusterAutoscalerMetrics ClusterAutoscalerMetrics } // Grabber provides functions which grab metrics from components type Grabber struct { - client clientset.Interface - externalClient clientset.Interface - grabFromAPIServer bool - grabFromControllerManager bool - grabFromKubelets bool - grabFromScheduler bool - grabFromClusterAutoscaler bool - kubeScheduler string - waitForSchedulerReadyOnce sync.Once - kubeControllerManager string - waitForControllerManagerReadyOnce sync.Once + client clientset.Interface + externalClient clientset.Interface + grabFromAPIServer bool + grabFromControllerManager bool + grabFromKubelets bool + grabFromScheduler bool + grabFromClusterAutoscaler bool + grabFromSnapshotController bool + kubeScheduler string + waitForSchedulerReadyOnce sync.Once + kubeControllerManager string + waitForControllerManagerReadyOnce sync.Once + snapshotController string + waitForSnapshotControllerReadyOnce sync.Once } // NewMetricsGrabber returns new metrics which are initialized. -func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) { +func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) { kubeScheduler := "" kubeControllerManager := "" + snapshotControllerManager := "" regKubeScheduler := regexp.MustCompile("kube-scheduler-.*") regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*") + regSnapshotController := regexp.MustCompile("volume-snapshot-controller.*") podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{}) if err != nil { @@ -87,7 +95,10 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b if regKubeControllerManager.MatchString(pod.Name) { kubeControllerManager = pod.Name } - if kubeScheduler != "" && kubeControllerManager != "" { + if regSnapshotController.MatchString(pod.Name) { + snapshotControllerManager = pod.Name + } + if kubeScheduler != "" && kubeControllerManager != "" && snapshotControllerManager != "" { break } } @@ -99,20 +110,26 @@ func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets b controllers = false klog.Warningf("Can't find kube-controller-manager pod. Grabbing metrics from kube-controller-manager is disabled.") } + if snapshotControllerManager == "" { + snapshotController = false + klog.Warningf("Can't find snapshot-controller pod. Grabbing metrics from snapshot-controller is disabled.") + } if ec == nil { klog.Warningf("Did not receive an external client interface. Grabbing metrics from ClusterAutoscaler is disabled.") } return &Grabber{ - client: c, - externalClient: ec, - grabFromAPIServer: apiServer, - grabFromControllerManager: controllers, - grabFromKubelets: kubelets, - grabFromScheduler: scheduler, - grabFromClusterAutoscaler: clusterAutoscaler, - kubeScheduler: kubeScheduler, - kubeControllerManager: kubeControllerManager, + client: c, + externalClient: ec, + grabFromAPIServer: apiServer, + grabFromControllerManager: controllers, + grabFromKubelets: kubelets, + grabFromScheduler: scheduler, + grabFromClusterAutoscaler: clusterAutoscaler, + grabFromSnapshotController: snapshotController, + kubeScheduler: kubeScheduler, + kubeControllerManager: kubeControllerManager, + snapshotController: snapshotControllerManager, }, nil } @@ -220,6 +237,48 @@ func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) return parseControllerManagerMetrics(output) } +// GrabFromSnapshotController returns metrics from controller manager +func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) { + if g.snapshotController == "" { + return SnapshotControllerMetrics{}, fmt.Errorf("SnapshotController pod is not registered. Skipping SnapshotController's metrics gathering") + } + + // Use overrides if provided via test config flags. + // Otherwise, use the default snapshot controller pod name and port. + if podName == "" { + podName = g.snapshotController + } + if port == 0 { + port = snapshotControllerPort + } + + var err error + g.waitForSnapshotControllerReadyOnce.Do(func() { + if readyErr := e2epod.WaitForPodsReady(g.client, metav1.NamespaceSystem, podName, 0); readyErr != nil { + err = fmt.Errorf("error waiting for snapshot controller pod to be ready: %w", readyErr) + return + } + + var lastMetricsFetchErr error + if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + _, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port) + return lastMetricsFetchErr == nil, nil + }); metricsWaitErr != nil { + err = fmt.Errorf("error waiting for snapshot controller pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr) + return + } + }) + if err != nil { + return SnapshotControllerMetrics{}, err + } + + output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, port) + if err != nil { + return SnapshotControllerMetrics{}, err + } + return parseSnapshotControllerMetrics(output) +} + // GrabFromAPIServer returns metrics from API server func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) { output, err := g.getMetricsFromAPIServer() @@ -257,6 +316,14 @@ func (g *Grabber) Grab() (Collection, error) { result.ControllerManagerMetrics = metrics } } + if g.grabFromSnapshotController { + metrics, err := g.GrabFromSnapshotController(g.snapshotController, snapshotControllerPort) + if err != nil { + errs = append(errs, err) + } else { + result.SnapshotControllerMetrics = metrics + } + } if g.grabFromClusterAutoscaler { metrics, err := g.GrabFromClusterAutoscaler() if err != nil { diff --git a/test/e2e/framework/metrics/snapshot_controller_metrics.go b/test/e2e/framework/metrics/snapshot_controller_metrics.go new file mode 100644 index 00000000000..41e381b8102 --- /dev/null +++ b/test/e2e/framework/metrics/snapshot_controller_metrics.go @@ -0,0 +1,40 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "k8s.io/component-base/metrics/testutil" + +// SnapshotControllerMetrics is metrics for controller manager +type SnapshotControllerMetrics testutil.Metrics + +// Equal returns true if all metrics are the same as the arguments. +func (m *SnapshotControllerMetrics) Equal(o SnapshotControllerMetrics) bool { + return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) +} + +func newSnapshotControllerMetrics() SnapshotControllerMetrics { + result := testutil.NewMetrics() + return SnapshotControllerMetrics(result) +} + +func parseSnapshotControllerMetrics(data string) (SnapshotControllerMetrics, error) { + result := newSnapshotControllerMetrics() + if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { + return SnapshotControllerMetrics{}, err + } + return result, nil +} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index cce5ad97fec..832e51706bf 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -182,6 +182,12 @@ type TestContextType struct { // DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test. DockerConfigFile string + + // SnapshotControllerPodName is the name used for identifying the snapshot controller pod. + SnapshotControllerPodName string + + // SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint. + SnapshotControllerHTTPPort int } // NodeKillerConfig describes configuration of NodeKiller -- a utility to @@ -315,6 +321,9 @@ func RegisterCommonFlags(flags *flag.FlagSet) { flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.") flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.") flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A file that contains credentials which can be used to pull images from certain private registries, needed for a test.") + + flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.") + flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.") } // RegisterClusterFlags registers flags specific to the cluster e2e test suite. diff --git a/test/e2e/framework/timeouts.go b/test/e2e/framework/timeouts.go index 5819c54738e..c0aafcce490 100644 --- a/test/e2e/framework/timeouts.go +++ b/test/e2e/framework/timeouts.go @@ -20,19 +20,20 @@ import "time" const ( // Default timeouts to be used in TimeoutContext - podStartTimeout = 5 * time.Minute - podStartShortTimeout = 2 * time.Minute - podStartSlowTimeout = 15 * time.Minute - podDeleteTimeout = 5 * time.Minute - claimProvisionTimeout = 5 * time.Minute - claimProvisionShortTimeout = 1 * time.Minute - claimBoundTimeout = 3 * time.Minute - pvReclaimTimeout = 3 * time.Minute - pvBoundTimeout = 3 * time.Minute - pvDeleteTimeout = 3 * time.Minute - pvDeleteSlowTimeout = 20 * time.Minute - snapshotCreateTimeout = 5 * time.Minute - snapshotDeleteTimeout = 5 * time.Minute + podStartTimeout = 5 * time.Minute + podStartShortTimeout = 2 * time.Minute + podStartSlowTimeout = 15 * time.Minute + podDeleteTimeout = 5 * time.Minute + claimProvisionTimeout = 5 * time.Minute + claimProvisionShortTimeout = 1 * time.Minute + claimBoundTimeout = 3 * time.Minute + pvReclaimTimeout = 3 * time.Minute + pvBoundTimeout = 3 * time.Minute + pvDeleteTimeout = 3 * time.Minute + pvDeleteSlowTimeout = 20 * time.Minute + snapshotCreateTimeout = 5 * time.Minute + snapshotDeleteTimeout = 5 * time.Minute + snapshotControllerMetricsTimeout = 5 * time.Minute ) // TimeoutContext contains timeout settings for several actions. @@ -77,23 +78,27 @@ type TimeoutContext struct { // SnapshotDelete is how long for snapshot to delete snapshotContent. SnapshotDelete time.Duration + + // SnapshotControllerMetrics is how long to wait for snapshot controller metrics. + SnapshotControllerMetrics time.Duration } // NewTimeoutContextWithDefaults returns a TimeoutContext with default values. func NewTimeoutContextWithDefaults() *TimeoutContext { return &TimeoutContext{ - PodStart: podStartTimeout, - PodStartShort: podStartShortTimeout, - PodStartSlow: podStartSlowTimeout, - PodDelete: podDeleteTimeout, - ClaimProvision: claimProvisionTimeout, - ClaimProvisionShort: claimProvisionShortTimeout, - ClaimBound: claimBoundTimeout, - PVReclaim: pvReclaimTimeout, - PVBound: pvBoundTimeout, - PVDelete: pvDeleteTimeout, - PVDeleteSlow: pvDeleteSlowTimeout, - SnapshotCreate: snapshotCreateTimeout, - SnapshotDelete: snapshotDeleteTimeout, + PodStart: podStartTimeout, + PodStartShort: podStartShortTimeout, + PodStartSlow: podStartSlowTimeout, + PodDelete: podDeleteTimeout, + ClaimProvision: claimProvisionTimeout, + ClaimProvisionShort: claimProvisionShortTimeout, + ClaimBound: claimBoundTimeout, + PVReclaim: pvReclaimTimeout, + PVBound: pvBoundTimeout, + PVDelete: pvDeleteTimeout, + PVDeleteSlow: pvDeleteSlowTimeout, + SnapshotCreate: snapshotCreateTimeout, + SnapshotDelete: snapshotDeleteTimeout, + SnapshotControllerMetrics: snapshotControllerMetricsTimeout, } } diff --git a/test/e2e/instrumentation/monitoring/metrics_grabber.go b/test/e2e/instrumentation/monitoring/metrics_grabber.go index b4f2fbaab1b..8ccd877f96e 100644 --- a/test/e2e/instrumentation/monitoring/metrics_grabber.go +++ b/test/e2e/instrumentation/monitoring/metrics_grabber.go @@ -51,7 +51,7 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() { } } gomega.Eventually(func() error { - grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true) + grabber, err = e2emetrics.NewMetricsGrabber(c, ec, true, true, true, true, true, true) if err != nil { return fmt.Errorf("failed to create metrics grabber: %v", err) } diff --git a/test/e2e/storage/csi_mock_volume.go b/test/e2e/storage/csi_mock_volume.go index e05ed0ee14b..3427c442f87 100644 --- a/test/e2e/storage/csi_mock_volume.go +++ b/test/e2e/storage/csi_mock_volume.go @@ -48,6 +48,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" e2eevents "k8s.io/kubernetes/test/e2e/framework/events" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epv "k8s.io/kubernetes/test/e2e/framework/pv" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -1621,6 +1622,107 @@ var _ = utils.SIGDescribe("CSI mock volume", func() { }) } }) + + ginkgo.Context("CSI Snapshot Controller metrics [Feature:VolumeSnapshotDataSource]", func() { + tests := []struct { + name string + pattern storageframework.TestPattern + }{ + { + name: "snapshot controller should emit dynamic CreateSnapshot, CreateSnapshotAndReady, and DeleteSnapshot metrics", + pattern: storageframework.DynamicSnapshotDelete, + }, + { + name: "snapshot controller should emit pre-provisioned CreateSnapshot, CreateSnapshotAndReady, and DeleteSnapshot metrics", + pattern: storageframework.PreprovisionedSnapshotDelete, + }, + } + for _, test := range tests { + ginkgo.It(test.name, func() { + init(testParameters{ + disableAttach: true, + registerDriver: true, + enableSnapshot: true, + }) + + sDriver, ok := m.driver.(storageframework.SnapshottableTestDriver) + if !ok { + e2eskipper.Skipf("mock driver does not support snapshots -- skipping") + } + defer cleanup() + + metricsGrabber, err := e2emetrics.NewMetricsGrabber(m.config.Framework.ClientSet, nil, false, false, false, false, false, true) + if err != nil { + framework.Failf("Error creating metrics grabber : %v", err) + } + + // Grab initial metrics - if this fails, snapshot controller metrics are not setup. Skip in this case. + _, err = metricsGrabber.GrabFromSnapshotController(framework.TestContext.SnapshotControllerPodName, framework.TestContext.SnapshotControllerHTTPPort) + if err != nil { + e2eskipper.Skipf("Snapshot controller metrics not found -- skipping") + } + + ginkgo.By("getting all initial metric values") + metricsTestConfig := newSnapshotMetricsTestConfig("snapshot_controller_operation_total_seconds_count", + "count", + m.config.GetUniqueDriverName(), + "CreateSnapshot", + "success", + "", + test.pattern) + createSnapshotMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber) + originalCreateSnapshotCount, _ := createSnapshotMetrics.getSnapshotControllerMetricValue() + metricsTestConfig.operationName = "CreateSnapshotAndReady" + createSnapshotAndReadyMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber) + originalCreateSnapshotAndReadyCount, _ := createSnapshotAndReadyMetrics.getSnapshotControllerMetricValue() + + metricsTestConfig.operationName = "DeleteSnapshot" + deleteSnapshotMetrics := newSnapshotControllerMetrics(metricsTestConfig, metricsGrabber) + originalDeleteSnapshotCount, _ := deleteSnapshotMetrics.getSnapshotControllerMetricValue() + + ginkgo.By("Creating storage class") + var sc *storagev1.StorageClass + if dDriver, ok := m.driver.(storageframework.DynamicPVTestDriver); ok { + sc = dDriver.GetDynamicProvisionStorageClass(m.config, "") + } + class, err := m.cs.StorageV1().StorageClasses().Create(context.TODO(), sc, metav1.CreateOptions{}) + framework.ExpectNoError(err, "Failed to create storage class: %v", err) + m.sc[class.Name] = class + pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ + Name: "snapshot-test-pvc", + StorageClassName: &(class.Name), + }, f.Namespace.Name) + + ginkgo.By(fmt.Sprintf("Creating PVC %s/%s", pvc.Namespace, pvc.Name)) + pvc, err = m.cs.CoreV1().PersistentVolumeClaims(f.Namespace.Name).Create(context.TODO(), pvc, metav1.CreateOptions{}) + framework.ExpectNoError(err, "Failed to create claim: %v", err) + + ginkgo.By("Wait for PVC to be Bound") + _, err = e2epv.WaitForPVClaimBoundPhase(m.cs, []*v1.PersistentVolumeClaim{pvc}, 1*time.Minute) + framework.ExpectNoError(err, "Failed to create claim: %v", err) + + ginkgo.By("Creating snapshot") + parameters := map[string]string{} + sr := storageframework.CreateSnapshotResource(sDriver, m.config, test.pattern, pvc.Name, pvc.Namespace, f.Timeouts, parameters) + framework.ExpectNoError(err, "failed to create snapshot") + + ginkgo.By("Checking for CreateSnapshot metrics") + createSnapshotMetrics.waitForSnapshotControllerMetric(originalCreateSnapshotCount+1.0, f.Timeouts.SnapshotControllerMetrics) + + ginkgo.By("Checking for CreateSnapshotAndReady metrics") + err = utils.WaitForSnapshotReady(m.config.Framework.DynamicClient, pvc.Namespace, sr.Vs.GetName(), framework.Poll, f.Timeouts.SnapshotCreate) + framework.ExpectNoError(err, "failed to wait for snapshot ready") + createSnapshotAndReadyMetrics.waitForSnapshotControllerMetric(originalCreateSnapshotAndReadyCount+1.0, f.Timeouts.SnapshotControllerMetrics) + + // delete the snapshot and check if the snapshot is deleted + deleteSnapshot(m.cs, m.config, sr.Vs) + + ginkgo.By("check for delete metrics") + metricsTestConfig.operationName = "DeleteSnapshot" + deleteSnapshotMetrics.waitForSnapshotControllerMetric(originalDeleteSnapshotCount+1.0, f.Timeouts.SnapshotControllerMetrics) + }) + } + }) }) func deleteSnapshot(cs clientset.Interface, config *storageframework.PerTestConfig, snapshot *unstructured.Unstructured) { @@ -2136,3 +2238,183 @@ func createPreHook(method string, callback func(counter int64) error) *drivers.H }(), } } + +type snapshotMetricsTestConfig struct { + // expected values + metricName string + metricType string + driverName string + operationName string + operationStatus string + snapshotType string + le string +} + +type snapshotControllerMetrics struct { + // configuration for metric + cfg snapshotMetricsTestConfig + metricsGrabber *e2emetrics.Grabber + + // results + countMetrics map[string]float64 + sumMetrics map[string]float64 + bucketMetrics map[string]float64 +} + +func newSnapshotMetricsTestConfig(metricName, metricType, driverName, operationName, operationStatus, le string, pattern storageframework.TestPattern) snapshotMetricsTestConfig { + var snapshotType string + switch pattern.SnapshotType { + case storageframework.DynamicCreatedSnapshot: + snapshotType = "dynamic" + + case storageframework.PreprovisionedCreatedSnapshot: + snapshotType = "pre-provisioned" + + default: + framework.Failf("invalid snapshotType: %v", pattern.SnapshotType) + } + + return snapshotMetricsTestConfig{ + metricName: metricName, + metricType: metricType, + driverName: driverName, + operationName: operationName, + operationStatus: operationStatus, + snapshotType: snapshotType, + le: le, + } +} + +func newSnapshotControllerMetrics(cfg snapshotMetricsTestConfig, metricsGrabber *e2emetrics.Grabber) *snapshotControllerMetrics { + return &snapshotControllerMetrics{ + cfg: cfg, + metricsGrabber: metricsGrabber, + + countMetrics: make(map[string]float64), + sumMetrics: make(map[string]float64), + bucketMetrics: make(map[string]float64), + } +} + +func (scm *snapshotControllerMetrics) waitForSnapshotControllerMetric(expectedValue float64, timeout time.Duration) { + metricKey := scm.getMetricKey() + if successful := utils.WaitUntil(10*time.Second, timeout, func() bool { + // get metric value + actualValue, err := scm.getSnapshotControllerMetricValue() + if err != nil { + return false + } + + // Another operation could have finished from a previous test, + // so we check if we have at least the expected value. + if actualValue < expectedValue { + return false + } + + return true + }); successful { + return + } + + scm.showMetricsFailure(metricKey) + framework.Failf("Unable to get valid snapshot controller metrics after %v", timeout) +} + +func (scm *snapshotControllerMetrics) getSnapshotControllerMetricValue() (float64, error) { + metricKey := scm.getMetricKey() + + // grab and parse into readable format + err := scm.grabSnapshotControllerMetrics() + if err != nil { + return 0, err + } + + metrics := scm.getMetricsTable() + actual, ok := metrics[metricKey] + if !ok { + return 0, fmt.Errorf("did not find metric for key %s", metricKey) + } + + return actual, nil +} + +func (scm *snapshotControllerMetrics) getMetricsTable() map[string]float64 { + var metrics map[string]float64 + switch scm.cfg.metricType { + case "count": + metrics = scm.countMetrics + + case "sum": + metrics = scm.sumMetrics + + case "bucket": + metrics = scm.bucketMetrics + } + + return metrics +} + +func (scm *snapshotControllerMetrics) showMetricsFailure(metricKey string) { + framework.Logf("failed to find metric key %s inside of the following metrics:", metricKey) + + metrics := scm.getMetricsTable() + for k, v := range metrics { + framework.Logf("%s: %v", k, v) + } +} + +func (scm *snapshotControllerMetrics) grabSnapshotControllerMetrics() error { + // pull all metrics + metrics, err := scm.metricsGrabber.GrabFromSnapshotController(framework.TestContext.SnapshotControllerPodName, framework.TestContext.SnapshotControllerHTTPPort) + if err != nil { + return err + } + + for method, samples := range metrics { + + for _, sample := range samples { + operationName := string(sample.Metric["operation_name"]) + driverName := string(sample.Metric["driver_name"]) + operationStatus := string(sample.Metric["operation_status"]) + snapshotType := string(sample.Metric["snapshot_type"]) + le := string(sample.Metric["le"]) + key := snapshotMetricKey(scm.cfg.metricName, driverName, operationName, operationStatus, snapshotType, le) + + switch method { + case "snapshot_controller_operation_total_seconds_count": + for _, sample := range samples { + scm.countMetrics[key] = float64(sample.Value) + } + + case "snapshot_controller_operation_total_seconds_sum": + for _, sample := range samples { + scm.sumMetrics[key] = float64(sample.Value) + } + + case "snapshot_controller_operation_total_seconds_bucket": + for _, sample := range samples { + scm.bucketMetrics[key] = float64(sample.Value) + } + } + } + } + + return nil +} + +func (scm *snapshotControllerMetrics) getMetricKey() string { + return snapshotMetricKey(scm.cfg.metricName, scm.cfg.driverName, scm.cfg.operationName, scm.cfg.operationStatus, scm.cfg.snapshotType, scm.cfg.le) +} + +func snapshotMetricKey(metricName, driverName, operationName, operationStatus, snapshotType, le string) string { + key := driverName + + // build key for shorthand metrics storage + for _, s := range []string{metricName, operationName, operationStatus, snapshotType, le} { + if s != "" { + key = fmt.Sprintf("%s_%s", key, s) + } + } + + return key +} diff --git a/test/e2e/storage/testsuites/base.go b/test/e2e/storage/testsuites/base.go index 4eb34ca8915..01499aff92e 100644 --- a/test/e2e/storage/testsuites/base.go +++ b/test/e2e/storage/testsuites/base.go @@ -107,7 +107,7 @@ func getVolumeOpCounts(c clientset.Interface, pluginName string) opCounts { nodeLimit := 25 - metricsGrabber, err := e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false) + metricsGrabber, err := e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false, false) if err != nil { framework.ExpectNoError(err, "Error creating metrics grabber: %v", err) diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index c7de2534308..1bd8ef1f80f 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -72,7 +72,7 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { VolumeMode: &test.VolumeMode, }, ns) - metricsGrabber, err = e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false) + metricsGrabber, err = e2emetrics.NewMetricsGrabber(c, nil, true, false, true, false, false, false) if err != nil { framework.Failf("Error creating metrics grabber : %v", err) diff --git a/test/e2e/suites.go b/test/e2e/suites.go index d14da6dfe91..790a58978d6 100644 --- a/test/e2e/suites.go +++ b/test/e2e/suites.go @@ -61,7 +61,7 @@ func gatherTestSuiteMetrics() error { } // Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case) and cluster autoscaler (optionally). - grabber, err := e2emetrics.NewMetricsGrabber(c, nil, !framework.ProviderIs("kubemark"), true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics) + grabber, err := e2emetrics.NewMetricsGrabber(c, nil, !framework.ProviderIs("kubemark"), true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics, false) if err != nil { return fmt.Errorf("failed to create MetricsGrabber: %v", err) }