From f8cee91372dc5f973e7063d7445b5dbecdf72708 Mon Sep 17 00:00:00 2001 From: Shyam Jeedigunta Date: Wed, 24 Jan 2018 20:02:58 +0100 Subject: [PATCH] Introduce apiserver profile-gathering library in testing framework --- test/e2e/framework/BUILD | 1 + test/e2e/framework/profile_gatherer.go | 155 +++++++++++++++++++++++++ test/e2e/framework/test_context.go | 2 + 3 files changed, 158 insertions(+) create mode 100644 test/e2e/framework/profile_gatherer.go diff --git a/test/e2e/framework/BUILD b/test/e2e/framework/BUILD index 6a773a3957b..8b9d740012f 100644 --- a/test/e2e/framework/BUILD +++ b/test/e2e/framework/BUILD @@ -26,6 +26,7 @@ go_library( "nodes_util.go", "perf_util.go", "pods.go", + "profile_gatherer.go", "psp_util.go", "pv_util.go", "rc_util.go", diff --git a/test/e2e/framework/profile_gatherer.go b/test/e2e/framework/profile_gatherer.go new file mode 100644 index 00000000000..cac8a6ab40f --- /dev/null +++ b/test/e2e/framework/profile_gatherer.go @@ -0,0 +1,155 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + "path" + "strings" + "sync" +) + +const ( + // Default value for how long the CPU profile is gathered for. + DefaultCPUProfileSeconds = 30 +) + +func getProfilesDirectoryPath() string { + return path.Join(TestContext.ReportDir, "profiles") +} + +func createProfilesDirectoryIfNeeded() error { + profileDirPath := getProfilesDirectoryPath() + if _, err := os.Stat(profileDirPath); os.IsNotExist(err) { + if mkdirErr := os.Mkdir(profileDirPath, 0777); mkdirErr != nil { + return fmt.Errorf("Failed to create profiles dir: %v", mkdirErr) + } + } else if err != nil { + return fmt.Errorf("Failed to check existence of profiles dir: %v", err) + } + return nil +} + +func checkProfileGatheringPrerequisites() error { + if !TestContext.AllowGatheringProfiles { + return fmt.Errorf("Can't gather profiles as --allow-gathering-profiles is false") + } + if TestContext.ReportDir == "" { + return fmt.Errorf("Can't gather profiles as --report-dir is empty") + } + if err := createProfilesDirectoryIfNeeded(); err != nil { + return fmt.Errorf("Failed to ensure profiles dir: %v", err) + } + return nil +} + +func gatherProfileOfKind(profileBaseName, kind string) error { + // Check some prerequisites before gathering the profile. + if err := checkProfileGatheringPrerequisites(); err != nil { + return err + } + // Get the profile data over SSH. + getCommand := fmt.Sprintf("curl -s localhost:8080/debug/pprof/%s", kind) + sshResult, err := SSH(getCommand, GetMasterHost()+":22", TestContext.Provider) + if err != nil { + return fmt.Errorf("Failed to execute curl command on master through SSH: %v", err) + } + // Write the data to a temp file. + var tmpfile *os.File + tmpfile, err = ioutil.TempFile("", "apiserver-profile") + if err != nil { + return fmt.Errorf("Failed to create temp file for profile data: %v", err) + } + defer os.Remove(tmpfile.Name()) + if _, err := tmpfile.Write([]byte(sshResult.Stdout)); err != nil { + return fmt.Errorf("Failed to write temp file with profile data: %v", err) + } + if err := tmpfile.Close(); err != nil { + return fmt.Errorf("Failed to close temp file: %v", err) + } + // Create a graph from the data and write it to a pdf file. + var cmd *exec.Cmd + var profilePrefix string + switch { + // TODO: Support other profile kinds if needed (e.g inuse_space, alloc_objects, mutex, etc) + case kind == "heap": + cmd = exec.Command("go", "tool", "pprof", "-pdf", "--alloc_space", tmpfile.Name()) + profilePrefix = "ApiserverMemoryProfile_" + case strings.HasPrefix(kind, "profile"): + cmd = exec.Command("go", "tool", "pprof", "-pdf", tmpfile.Name()) + profilePrefix = "ApiserverCPUProfile_" + default: + return fmt.Errorf("Unknown profile kind provided: %s", kind) + } + outfilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pdf") + var outfile *os.File + outfile, err = os.Create(outfilePath) + if err != nil { + return fmt.Errorf("Failed to create file for the profile graph: %v", err) + } + defer outfile.Close() + cmd.Stdout = outfile + if err := cmd.Run(); nil != err { + return fmt.Errorf("Failed to run 'go tool pprof': %v", err) + } + return nil +} + +// The below exposed functions can take a while to execute as they SSH to the master, +// collect and copy the profile over and then graph it. To allow waiting for these to +// finish before the parent goroutine itself finishes, we accept a sync.WaitGroup +// argument in these functions. Typically you would use the following pattern: +// +// func TestFooBar() { +// var wg sync.WaitGroup +// wg.Add(3) +// go framework.GatherApiserverCPUProfile(&wg, "doing_foo") +// go framework.GatherApiserverMemoryProfile(&wg, "doing_foo") +// <<<< some code doing foo >>>>>> +// go framework.GatherApiserverCPUProfile(&wg, "doing_bar") +// <<<< some code doing bar >>>>>> +// wg.Wait() +// } +// +// If you do not wish to exercise the waiting logic, pass a nil value for the +// waitgroup argument instead. However, then you would be responsible for ensuring +// that the function finishes. + +func GatherApiserverCPUProfile(wg *sync.WaitGroup, profileBaseName string) { + GatherApiserverCPUProfileForNSeconds(wg, profileBaseName, DefaultCPUProfileSeconds) +} + +func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName string, n int) { + if wg != nil { + defer wg.Done() + } + if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil { + Logf("Failed to gather apiserver CPU profile: %v", err) + } +} + +func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) { + if wg != nil { + defer wg.Done() + } + if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil { + Logf("Failed to gather apiserver memory profile: %v", err) + } +} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index b6e87ef444a..56af8106ae5 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -85,6 +85,7 @@ type TestContextType struct { GatherLogsSizes bool GatherMetricsAfterTest string GatherSuiteMetricsAfterTest bool + AllowGatheringProfiles bool // If set to 'true' framework will gather ClusterAutoscaler metrics when gathering them for other components. IncludeClusterAutoscalerMetrics bool // Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list. @@ -190,6 +191,7 @@ func RegisterCommonFlags() { flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.") flag.StringVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", "false", "If set to 'true' framework will gather metrics from all components after each test. If set to 'master' only master component metrics would be gathered.") flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.") + flag.BoolVar(&TestContext.AllowGatheringProfiles, "allow-gathering-profiles", true, "If set to true framework will allow to gather CPU/memory allocation pprof profiles from the master.") flag.BoolVar(&TestContext.IncludeClusterAutoscalerMetrics, "include-cluster-autoscaler", false, "If set to true, framework will include Cluster Autoscaler when gathering metrics.") flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.") flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")