From a4c7e91b591a6704b9355d4cfefeabfaf4eb1d98 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 3 Jun 2021 10:36:02 +0200 Subject: [PATCH] e2e metrics: skip tests when metrics grabbing is disabled The MetricsGrabber checked whether a component supported metrics grabbing, but then tests didn't have an API to use the result of that check. Because metrics grabbing is an optional debug feature, tests must skip checks that depend on metrics data or, when the entire test is about metrics data, skip the test. This is now supported with a special error that gets wrapped and returned by the individual Grab functions. --- test/e2e/framework/metrics/metrics_grabber.go | 29 ++++++++++++++----- .../monitoring/metrics_grabber.go | 18 ++++++++++-- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/test/e2e/framework/metrics/metrics_grabber.go b/test/e2e/framework/metrics/metrics_grabber.go index 8b260c9903f..b794a7a0639 100644 --- a/test/e2e/framework/metrics/metrics_grabber.go +++ b/test/e2e/framework/metrics/metrics_grabber.go @@ -45,6 +45,12 @@ const ( snapshotControllerPort = 9102 ) +// MetricsGrabbingDisabledError is an error that is wrapped by the +// different MetricsGrabber.Wrap functions when metrics grabbing is +// not supported. Tests that check metrics data should then skip +// the check. +var MetricsGrabbingDisabledError = errors.New("metrics grabbing disabled") + // Collection is metrics collection of components type Collection struct { APIServerMetrics APIServerMetrics @@ -74,7 +80,14 @@ type Grabber struct { waitForSnapshotControllerReadyOnce sync.Once } -// NewMetricsGrabber returns new metrics which are initialized. +// NewMetricsGrabber prepares for grabbing metrics data from several different +// components. It should be called when those components are running because +// it needs to communicate with them to determine for which components +// metrics data can be retrieved. +// +// Collecting metrics data is an optional debug feature. Not all clusters will +// support it. If disabled for a component, the corresponding Grab function +// will immediately return an error derived from MetricsGrabbingDisabledError. func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, config *rest.Config, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool, snapshotController bool) (*Grabber, error) { kubeScheduler := "" @@ -183,8 +196,8 @@ func (g *Grabber) grabFromKubeletInternal(nodeName string, kubeletPort int) (Kub // GrabFromScheduler returns metrics from scheduler func (g *Grabber) GrabFromScheduler() (SchedulerMetrics, error) { - if g.kubeScheduler == "" { - return SchedulerMetrics{}, fmt.Errorf("kube-scheduler pod is not registered. Skipping Scheduler's metrics gathering") + if !g.grabFromScheduler { + return SchedulerMetrics{}, fmt.Errorf("kube-scheduler: %w", MetricsGrabbingDisabledError) } var err error @@ -208,7 +221,7 @@ func (g *Grabber) GrabFromScheduler() (SchedulerMetrics, error) { // GrabFromClusterAutoscaler returns metrics from cluster autoscaler func (g *Grabber) GrabFromClusterAutoscaler() (ClusterAutoscalerMetrics, error) { if !g.HasControlPlanePods() && g.externalClient == nil { - return ClusterAutoscalerMetrics{}, fmt.Errorf("Did not find control-plane pods. Skipping ClusterAutoscaler's metrics gathering") + return ClusterAutoscalerMetrics{}, fmt.Errorf("ClusterAutoscaler: %w", MetricsGrabbingDisabledError) } var client clientset.Interface var namespace string @@ -228,8 +241,8 @@ func (g *Grabber) GrabFromClusterAutoscaler() (ClusterAutoscalerMetrics, error) // GrabFromControllerManager returns metrics from controller manager func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) { - if g.kubeControllerManager == "" { - return ControllerManagerMetrics{}, fmt.Errorf("kube-controller-manager pod is not registered. Skipping ControllerManager's metrics gathering") + if !g.grabFromControllerManager { + return ControllerManagerMetrics{}, fmt.Errorf("kube-controller-manager: %w", MetricsGrabbingDisabledError) } var err error @@ -258,8 +271,8 @@ func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) // GrabFromSnapshotController returns metrics from controller manager func (g *Grabber) GrabFromSnapshotController(podName string, port int) (SnapshotControllerMetrics, error) { - if g.snapshotController == "" { - return SnapshotControllerMetrics{}, fmt.Errorf("SnapshotController pod is not registered. Skipping SnapshotController's metrics gathering") + if !g.grabFromSnapshotController { + return SnapshotControllerMetrics{}, fmt.Errorf("snapshot controller: %w", MetricsGrabbingDisabledError) } // Use overrides if provided via test config flags. diff --git a/test/e2e/instrumentation/monitoring/metrics_grabber.go b/test/e2e/instrumentation/monitoring/metrics_grabber.go index 0237f22f916..e1adb628106 100644 --- a/test/e2e/instrumentation/monitoring/metrics_grabber.go +++ b/test/e2e/instrumentation/monitoring/metrics_grabber.go @@ -18,6 +18,7 @@ package monitoring import ( "context" + "errors" "fmt" "strings" "time" @@ -30,6 +31,7 @@ import ( "k8s.io/kubernetes/test/e2e/framework" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common" ) @@ -65,6 +67,9 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() { ginkgo.It("should grab all metrics from API server.", func() { ginkgo.By("Connecting to /metrics endpoint") response, err := grabber.GrabFromAPIServer() + if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) { + e2eskipper.Skipf("%v", err) + } framework.ExpectNoError(err) gomega.Expect(response).NotTo(gomega.BeEmpty()) }) @@ -72,6 +77,9 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() { ginkgo.It("should grab all metrics from a Kubelet.", func() { ginkgo.By("Proxying to Node through the API server") node, err := e2enode.GetRandomReadySchedulableNode(f.ClientSet) + if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) { + e2eskipper.Skipf("%v", err) + } framework.ExpectNoError(err) response, err := grabber.GrabFromKubelet(node.Name) framework.ExpectNoError(err) @@ -81,10 +89,13 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() { ginkgo.It("should grab all metrics from a Scheduler.", func() { ginkgo.By("Proxying to Pod through the API server") if !masterRegistered { - framework.Logf("Master is node api.Registry. Skipping testing Scheduler metrics.") + e2eskipper.Skipf("Master is node api.Registry. Skipping testing Scheduler metrics.") return } response, err := grabber.GrabFromScheduler() + if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) { + e2eskipper.Skipf("%v", err) + } framework.ExpectNoError(err) gomega.Expect(response).NotTo(gomega.BeEmpty()) }) @@ -92,10 +103,13 @@ var _ = instrumentation.SIGDescribe("MetricsGrabber", func() { ginkgo.It("should grab all metrics from a ControllerManager.", func() { ginkgo.By("Proxying to Pod through the API server") if !masterRegistered { - framework.Logf("Master is node api.Registry. Skipping testing ControllerManager metrics.") + e2eskipper.Skipf("Master is node api.Registry. Skipping testing ControllerManager metrics.") return } response, err := grabber.GrabFromControllerManager() + if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) { + e2eskipper.Skipf("%v", err) + } framework.ExpectNoError(err) gomega.Expect(response).NotTo(gomega.BeEmpty()) })