Merge pull request #66903 from mborsz/repeat

Automatic merge from submit-queue (batch tested with PRs 61389, 66817, 66903, 66675, 66965). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Scalability tests: Increase sample size for pod startup latency measurement **What this PR does / why we need it**: This PR adds a way to increase a sample size used in pod startup latency measurement. We found that this phase is flaky and want to check if increasing a number of samples will help reducing a flakiness. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note NONE ```
2025-07-27 05:27:21 +00:00 · 2018-08-03 07:33:13 -07:00 · 2018-08-03 07:33:13 -07:00 · b43ca0a037
commit b43ca0a037
parent 43a46cc9b7 2632a6a619
1 changed files with 68 additions and 58 deletions
--- a/test/e2e/scalability/density.go
+++ b/test/e2e/scalability/density.go
@ -56,6 +56,7 @@ const (
 	MinSaturationThreshold     = 2 * time.Minute
 	MinPodsPerSecondThroughput = 8
 	DensityPollInterval        = 10 * time.Second
 	MinPodStartupMeasurements  = 500
 )
 // Maximum container failures this test tolerates before failing.
@ -696,7 +697,10 @@ var _ = SIGDescribe("Density", func() {
 			}
 			e2eStartupTime = runDensityTest(dConfig, testPhaseDurations, &scheduleThroughputs)
 			if itArg.runLatencyTest {
-				By("Scheduling additional Pods to measure startup latencies")
+				// Pick latencyPodsIterations so that:
 				// latencyPodsIterations * nodeCount >= MinPodStartupMeasurements.
 				latencyPodsIterations := (MinPodStartupMeasurements + nodeCount - 1) / nodeCount
 				By(fmt.Sprintf("Scheduling additional %d Pods to measure startup latencies", latencyPodsIterations*nodeCount))
 				createTimes := make(map[string]metav1.Time, 0)
 				nodeNames := make(map[string]string, 0)
@ -775,58 +779,76 @@ var _ = SIGDescribe("Density", func() {
 					go controller.Run(stopCh)
 				}
 				for latencyPodsIteration := 0; latencyPodsIteration < latencyPodsIterations; latencyPodsIteration++ {
 					podIndexOffset := latencyPodsIteration * nodeCount
 					framework.Logf("Creating %d latency pods in range [%d, %d]", nodeCount, podIndexOffset+1, podIndexOffset+nodeCount)
-				// Create some additional pods with throughput ~5 pods/sec.
+					watchTimesLen := len(watchTimes)
 				latencyPodStartupPhase := testPhaseDurations.StartPhase(800, "latency pods creation")
 				defer latencyPodStartupPhase.End()
 				var wg sync.WaitGroup
 				wg.Add(nodeCount)
 				// Explicitly set requests here.
 				// Thanks to it we trigger increasing priority function by scheduling
 				// a pod to a node, which in turn will result in spreading latency pods
 				// more evenly between nodes.
 				cpuRequest := *resource.NewMilliQuantity(nodeCpuCapacity/5, resource.DecimalSI)
 				memRequest := *resource.NewQuantity(nodeMemCapacity/5, resource.DecimalSI)
 				if podsPerNode > 30 {
 					// This is to make them schedulable on high-density tests
 					// (e.g. 100 pods/node kubemark).
 					cpuRequest = *resource.NewMilliQuantity(0, resource.DecimalSI)
 					memRequest = *resource.NewQuantity(0, resource.DecimalSI)
 				}
 				rcNameToNsMap := map[string]string{}
 				for i := 1; i <= nodeCount; i++ {
 					name := additionalPodsPrefix + "-" + strconv.Itoa(i)
 					nsName := namespaces[i%len(namespaces)].Name
 					rcNameToNsMap[name] = nsName
 					go createRunningPodFromRC(&wg, c, name, nsName, imageutils.GetPauseImageName(), additionalPodsPrefix, cpuRequest, memRequest)
 					time.Sleep(200 * time.Millisecond)
 				}
 				wg.Wait()
 				latencyPodStartupPhase.End()
-				latencyMeasurementPhase := testPhaseDurations.StartPhase(810, "pod startup latencies measurement")
+					// Create some additional pods with throughput ~5 pods/sec.
-				defer latencyMeasurementPhase.End()
+					latencyPodStartupPhase := testPhaseDurations.StartPhase(800+latencyPodsIteration*10, "latency pods creation")
-				By("Waiting for all Pods begin observed by the watch...")
+					defer latencyPodStartupPhase.End()
-				waitTimeout := 10 * time.Minute
+					var wg sync.WaitGroup
-				for start := time.Now(); len(watchTimes) < nodeCount; time.Sleep(10 * time.Second) {
+					wg.Add(nodeCount)
-					if time.Since(start) < waitTimeout {
+					// Explicitly set requests here.
-						framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
+					// Thanks to it we trigger increasing priority function by scheduling
 					// a pod to a node, which in turn will result in spreading latency pods
 					// more evenly between nodes.
 					cpuRequest := *resource.NewMilliQuantity(nodeCpuCapacity/5, resource.DecimalSI)
 					memRequest := *resource.NewQuantity(nodeMemCapacity/5, resource.DecimalSI)
 					if podsPerNode > 30 {
 						// This is to make them schedulable on high-density tests
 						// (e.g. 100 pods/node kubemark).
 						cpuRequest = *resource.NewMilliQuantity(0, resource.DecimalSI)
 						memRequest = *resource.NewQuantity(0, resource.DecimalSI)
 					}
-				}
+					rcNameToNsMap := map[string]string{}
-				close(stopCh)
+					for i := 1; i <= nodeCount; i++ {
-
+						name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i)
-				nodeToLatencyPods := make(map[string]int)
+						nsName := namespaces[i%len(namespaces)].Name
-				for i := range latencyPodStores {
+						rcNameToNsMap[name] = nsName
-					for _, item := range latencyPodStores[i].List() {
+						go createRunningPodFromRC(&wg, c, name, nsName, imageutils.GetPauseImageName(), additionalPodsPrefix, cpuRequest, memRequest)
-						pod := item.(*v1.Pod)
+						time.Sleep(200 * time.Millisecond)
 						nodeToLatencyPods[pod.Spec.NodeName]++
 					}
-					for node, count := range nodeToLatencyPods {
+					wg.Wait()
-						if count > 1 {
+					latencyPodStartupPhase.End()
-							framework.Logf("%d latency pods scheduled on %s", count, node)
+
 					latencyMeasurementPhase := testPhaseDurations.StartPhase(801+latencyPodsIteration*10, "pod startup latencies measurement")
 					defer latencyMeasurementPhase.End()
 					By("Waiting for all Pods begin observed by the watch...")
 					waitTimeout := 10 * time.Minute
 					for start := time.Now(); len(watchTimes) < watchTimesLen+nodeCount; time.Sleep(10 * time.Second) {
 						if time.Since(start) < waitTimeout {
 							framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
 						}
 					}
 					nodeToLatencyPods := make(map[string]int)
 					for i := range latencyPodStores {
 						for _, item := range latencyPodStores[i].List() {
 							pod := item.(*v1.Pod)
 							nodeToLatencyPods[pod.Spec.NodeName]++
 						}
 						for node, count := range nodeToLatencyPods {
 							if count > 1 {
 								framework.Logf("%d latency pods scheduled on %s", count, node)
 							}
 						}
 					}
 					latencyMeasurementPhase.End()
 					By("Removing additional replication controllers")
 					podDeletionPhase := testPhaseDurations.StartPhase(802+latencyPodsIteration*10, "latency pods deletion")
 					defer podDeletionPhase.End()
 					deleteRC := func(i int) {
 						defer GinkgoRecover()
 						name := additionalPodsPrefix + "-" + strconv.Itoa(podIndexOffset+i+1)
 						framework.ExpectNoError(framework.DeleteRCAndWaitForGC(c, rcNameToNsMap[name], name))
 					}
 					workqueue.Parallelize(25, nodeCount, deleteRC)
 					podDeletionPhase.End()
 				}
 				close(stopCh)
 				for i := 0; i < len(namespaces); i++ {
 					nsName := namespaces[i].Name
@ -914,18 +936,6 @@ var _ = SIGDescribe("Density", func() {
 				framework.ExpectNoError(framework.VerifyLatencyWithinThreshold(podStartupLatencyThreshold, podStartupLatency.E2ELatency, "pod startup"))
 				framework.LogSuspiciousLatency(startupLag, e2eLag, nodeCount, c)
 				latencyMeasurementPhase.End()
 				By("Removing additional replication controllers")
 				podDeletionPhase := testPhaseDurations.StartPhase(820, "latency pods deletion")
 				defer podDeletionPhase.End()
 				deleteRC := func(i int) {
 					defer GinkgoRecover()
 					name := additionalPodsPrefix + "-" + strconv.Itoa(i+1)
 					framework.ExpectNoError(framework.DeleteRCAndWaitForGC(c, rcNameToNsMap[name], name))
 				}
 				workqueue.Parallelize(25, nodeCount, deleteRC)
 				podDeletionPhase.End()
 			}
 			cleanupDensityTest(dConfig, testPhaseDurations)
 		})