From 41345418cb295202111e6be80b021d7ca85f01b0 Mon Sep 17 00:00:00 2001 From: Andy Goldstein Date: Thu, 25 May 2017 16:27:50 -0400 Subject: [PATCH] Support grabbing test suite metrics Update the "interesting" controller-manager metrics to match the current names for the garbage collector, and add namespace controller metrics to the list. --- hack/verify-flags/known-flags.txt | 1 + test/e2e/e2e.go | 39 ++++++++++++++++++++++++++++++ test/e2e/framework/framework.go | 5 ++-- test/e2e/framework/metrics_util.go | 20 ++++++++++++--- test/e2e/framework/test_context.go | 2 ++ 5 files changed, 61 insertions(+), 6 deletions(-) diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index cf0b1e31268..f65410506e1 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -290,6 +290,7 @@ garbage-collector-enabled gather-logs-sizes gather-metrics-at-teardown gather-resource-usage +gather-suite-metrics-at-teardown gce-multizone gce-project gce-service-account diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 37b76c85ae4..349c02adc1b 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -18,6 +18,7 @@ package e2e import ( "fmt" + "io/ioutil" "os" "path" "sync" @@ -39,6 +40,7 @@ import ( "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" "k8s.io/kubernetes/pkg/cloudprovider/providers/azure" gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce" + "k8s.io/kubernetes/pkg/metrics" "k8s.io/kubernetes/pkg/util/logs" commontest "k8s.io/kubernetes/test/e2e/common" "k8s.io/kubernetes/test/e2e/framework" @@ -266,8 +268,45 @@ var _ = ginkgo.SynchronizedAfterSuite(func() { if framework.TestContext.ReportDir != "" { framework.CoreDump(framework.TestContext.ReportDir) } + if framework.TestContext.GatherSuiteMetricsAfterTest { + if err := gatherTestSuiteMetrics(); err != nil { + framework.Logf("Error gathering metrics: %v", err) + } + } }) +func gatherTestSuiteMetrics() error { + framework.Logf("Gathering metrics") + c, err := framework.LoadClientset() + if err != nil { + return fmt.Errorf("error loading client: %v", err) + } + + // Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case). + grabber, err := metrics.NewMetricsGrabber(c, !framework.ProviderIs("kubemark"), true, true, true) + if err != nil { + return fmt.Errorf("failed to create MetricsGrabber: %v", err) + } + + received, err := grabber.Grab() + if err != nil { + return fmt.Errorf("failed to grab metrics: %v", err) + } + + metricsForE2E := (*framework.MetricsForE2E)(&received) + metricsJson := metricsForE2E.PrintJSON() + if framework.TestContext.ReportDir != "" { + filePath := path.Join(framework.TestContext.ReportDir, "MetricsForE2ESuite_"+time.Now().Format(time.RFC3339)+".json") + if err := ioutil.WriteFile(filePath, []byte(metricsJson), 0644); err != nil { + return fmt.Errorf("error writing to %q: %v", filePath, err) + } + } else { + framework.Logf("\n\nTest Suite Metrics:\n%s\n\n", metricsJson) + } + + return nil +} + // TestE2E checks configuration parameters (specified through flags) and then runs // E2E tests using the Ginkgo runner. // If a "report directory" is specified, one or more JUnit test reports will be diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index d9b0eb4fdc3..cb97e447442 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -325,9 +325,8 @@ func (f *Framework) AfterEach() { if TestContext.GatherMetricsAfterTest { By("Gathering metrics") - // Grab apiserver metrics and nodes' kubelet metrics (for non-kubemark case). - // TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered. - grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), false, false, true) + // Grab apiserver, scheduler, controller-manager metrics and nodes' kubelet metrics (for non-kubemark case). + grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), true, true, true) if err != nil { Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err) } else { diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 71d4b4621ee..f17d0ee2c45 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -122,9 +122,23 @@ var InterestingApiServerMetrics = []string{ } var InterestingControllerManagerMetrics = []string{ - "garbage_collector_event_queue_latency", - "garbage_collector_dirty_queue_latency", - "garbage_collector_orhan_queue_latency", + "garbage_collector_attempt_to_delete_queue_latency", + "garbage_collector_attempt_to_delete_work_duration", + "garbage_collector_attempt_to_orphan_queue_latency", + "garbage_collector_attempt_to_orphan_work_duration", + "garbage_collector_dirty_processing_latency_microseconds", + "garbage_collector_event_processing_latency_microseconds", + "garbage_collector_graph_changes_queue_latency", + "garbage_collector_graph_changes_work_duration", + "garbage_collector_orphan_processing_latency_microseconds", + + "namespace_queue_latency", + "namespace_queue_latency_sum", + "namespace_queue_latency_count", + "namespace_retries", + "namespace_work_duration", + "namespace_work_duration_sum", + "namespace_work_duration_count", } var InterestingKubeletMetrics = []string{ diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 779b5ceaf63..b149142ff16 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -73,6 +73,7 @@ type TestContextType struct { GatherKubeSystemResourceUsageData string GatherLogsSizes bool GatherMetricsAfterTest bool + GatherSuiteMetricsAfterTest bool // Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list. OutputPrintType string // NodeSchedulableTimeout is the timeout for waiting for all nodes to be schedulable. @@ -162,6 +163,7 @@ func RegisterCommonFlags() { flag.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.") flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.") flag.BoolVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.") + flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.") flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.") flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.") flag.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")