diff --git a/test/e2e/framework/profile_gatherer.go b/test/e2e/framework/profile_gatherer.go index cac8a6ab40f..f643c7ce6a7 100644 --- a/test/e2e/framework/profile_gatherer.go +++ b/test/e2e/framework/profile_gatherer.go @@ -24,6 +24,7 @@ import ( "path" "strings" "sync" + "time" ) const ( @@ -140,6 +141,9 @@ func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName st if wg != nil { defer wg.Done() } + if profileBaseName == "" { + profileBaseName = time.Now().Format(time.RFC3339) + } if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil { Logf("Failed to gather apiserver CPU profile: %v", err) } @@ -149,7 +153,28 @@ func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) { if wg != nil { defer wg.Done() } + if profileBaseName == "" { + profileBaseName = time.Now().Format(time.RFC3339) + } if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil { Logf("Failed to gather apiserver memory profile: %v", err) } } + +// StartApiserverCPUProfileGatherer is a polling-based gatherer of the apiserver's +// CPU profile. It takes the delay b/w consecutive gatherings as an argument and +// starts the gathering goroutine. To stop the gatherer, close the returned channel. +func StartApiserverCPUProfileGatherer(delay time.Duration) chan struct{} { + stopCh := make(chan struct{}) + go func() { + for { + select { + case <-time.After(delay): + GatherApiserverCPUProfile(nil, "") + case <-stopCh: + return + } + } + }() + return stopCh +} diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go index 6e49d068599..618e776abfc 100644 --- a/test/e2e/scalability/density.go +++ b/test/e2e/scalability/density.go @@ -356,9 +356,16 @@ var _ = SIGDescribe("Density", func() { testCaseBaseName := "density" missingMeasurements := 0 var testPhaseDurations *timer.TestPhaseTimer + var profileGathererStopCh chan struct{} // Gathers data prior to framework namespace teardown AfterEach(func() { + // Stop apiserver CPU profile gatherer and gather memory allocations profile. + close(profileGathererStopCh) + wg := sync.WaitGroup{} + framework.GatherApiserverMemoryProfile(&wg, "density") + wg.Wait() + saturationThreshold := time.Duration((totalPods / MinPodsPerSecondThroughput)) * time.Second if saturationThreshold < MinSaturationThreshold { saturationThreshold = MinSaturationThreshold @@ -442,6 +449,10 @@ var _ = SIGDescribe("Density", func() { } framework.Logf("Name: %v, clusterIP: %v, externalIP: %v", node.ObjectMeta.Name, internalIP, externalIP) } + + // Start apiserver CPU profile gatherer with frequency based on cluster size. + profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute + profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay) }) type Density struct { diff --git a/test/e2e/scalability/load.go b/test/e2e/scalability/load.go index 9bb6f311df6..2825f3f3084 100644 --- a/test/e2e/scalability/load.go +++ b/test/e2e/scalability/load.go @@ -96,10 +96,17 @@ var _ = SIGDescribe("Load capacity", func() { testCaseBaseName := "load" var testPhaseDurations *timer.TestPhaseTimer + var profileGathererStopCh chan struct{} // Gathers metrics before teardown // TODO add flag that allows to skip cleanup on failure AfterEach(func() { + // Stop apiserver CPU profile gatherer and gather memory allocations profile. + close(profileGathererStopCh) + wg := sync.WaitGroup{} + framework.GatherApiserverMemoryProfile(&wg, "load") + wg.Wait() + // Verify latency metrics highLatencyRequests, metrics, err := framework.HighLatencyRequests(clientset, nodeCount) framework.ExpectNoError(err) @@ -147,6 +154,10 @@ var _ = SIGDescribe("Load capacity", func() { framework.ExpectNoError(err) framework.ExpectNoError(framework.ResetMetrics(clientset)) + + // Start apiserver CPU profile gatherer with frequency based on cluster size. + profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute + profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay) }) type Load struct {