Merge pull request #59352 from shyamjvs/add-profiling-to-scalability-tests

Automatic merge from submit-queue (batch tested with PRs 54685, 59352). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Add apiserver profiling to our scalability tests Follows PR https://github.com/kubernetes/kubernetes/pull/58763 /cc @wojtek-t @porridge @kubernetes/sig-scalability-misc
2025-07-25 12:43:23 +00:00 · 2018-02-08 04:50:31 -08:00 · 2018-02-08 04:50:31 -08:00 · ee8c896737
commit ee8c896737
parent b922ffc491 c3db0828f9
3 changed files with 47 additions and 0 deletions
--- a/test/e2e/framework/profile_gatherer.go
+++ b/test/e2e/framework/profile_gatherer.go
@ -24,6 +24,7 @@ import (
 	"path"
 	"strings"
 	"sync"
+	"time"
 )

 const (
@ -140,6 +141,9 @@ func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName st
 	if wg != nil {
 		defer wg.Done()
 	}
+	if profileBaseName == "" {
+		profileBaseName = time.Now().Format(time.RFC3339)
+	}
 	if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil {
 		Logf("Failed to gather apiserver CPU profile: %v", err)
 	}
@ -149,7 +153,28 @@ func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) {
 	if wg != nil {
 		defer wg.Done()
 	}
+	if profileBaseName == "" {
+		profileBaseName = time.Now().Format(time.RFC3339)
+	}
 	if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil {
 		Logf("Failed to gather apiserver memory profile: %v", err)
 	}
 }
+
+// StartApiserverCPUProfileGatherer is a polling-based gatherer of the apiserver's
+// CPU profile. It takes the delay b/w consecutive gatherings as an argument and
+// starts the gathering goroutine. To stop the gatherer, close the returned channel.
+func StartApiserverCPUProfileGatherer(delay time.Duration) chan struct{} {
+	stopCh := make(chan struct{})
+	go func() {
+		for {
+			select {
+			case <-time.After(delay):
+				GatherApiserverCPUProfile(nil, "")
+			case <-stopCh:
+				return
+			}
+		}
+	}()
+	return stopCh
+}
--- a/test/e2e/scalability/density.go
+++ b/test/e2e/scalability/density.go
@ -356,9 +356,16 @@ var _ = SIGDescribe("Density", func() {
 	testCaseBaseName := "density"
 	missingMeasurements := 0
 	var testPhaseDurations *timer.TestPhaseTimer
+	var profileGathererStopCh chan struct{}

 	// Gathers data prior to framework namespace teardown
 	AfterEach(func() {
+		// Stop apiserver CPU profile gatherer and gather memory allocations profile.
+		close(profileGathererStopCh)
+		wg := sync.WaitGroup{}
+		framework.GatherApiserverMemoryProfile(&wg, "density")
+		wg.Wait()
+
 		saturationThreshold := time.Duration((totalPods / MinPodsPerSecondThroughput)) * time.Second
 		if saturationThreshold < MinSaturationThreshold {
 			saturationThreshold = MinSaturationThreshold
@ -442,6 +449,10 @@ var _ = SIGDescribe("Density", func() {
 			}
 			framework.Logf("Name: %v, clusterIP: %v, externalIP: %v", node.ObjectMeta.Name, internalIP, externalIP)
 		}
+
+		// Start apiserver CPU profile gatherer with frequency based on cluster size.
+		profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute
+		profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay)
 	})

 	type Density struct {
--- a/test/e2e/scalability/load.go
+++ b/test/e2e/scalability/load.go
@ -96,10 +96,17 @@ var _ = SIGDescribe("Load capacity", func() {

 	testCaseBaseName := "load"
 	var testPhaseDurations *timer.TestPhaseTimer
+	var profileGathererStopCh chan struct{}

 	// Gathers metrics before teardown
 	// TODO add flag that allows to skip cleanup on failure
 	AfterEach(func() {
+		// Stop apiserver CPU profile gatherer and gather memory allocations profile.
+		close(profileGathererStopCh)
+		wg := sync.WaitGroup{}
+		framework.GatherApiserverMemoryProfile(&wg, "load")
+		wg.Wait()
+
 		// Verify latency metrics
 		highLatencyRequests, metrics, err := framework.HighLatencyRequests(clientset, nodeCount)
 		framework.ExpectNoError(err)
@ -147,6 +154,10 @@ var _ = SIGDescribe("Load capacity", func() {
 		framework.ExpectNoError(err)

 		framework.ExpectNoError(framework.ResetMetrics(clientset))
+
+		// Start apiserver CPU profile gatherer with frequency based on cluster size.
+		profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute
+		profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay)
 	})

 	type Load struct {