Merge pull request #9478 from fgrzadkowski/fix_load_test

Refactor load test and reduce the load during the test.
2025-07-28 22:17:14 +00:00 · 2015-06-09 16:54:25 +02:00 · 2015-06-09 16:54:25 +02:00 · dafe79e5de
commit dafe79e5de
parent b68e08f55f ea7a615ce6
2 changed files with 104 additions and 104 deletions
--- a/test/e2e/load.go
+++ b/test/e2e/load.go
@ -34,14 +34,13 @@ import (
 const (
 	image             = "gcr.io/google_containers/serve_hostname:1.1"
 	simulationTime    = 10 * time.Minute
 	smallRCSize       = 5
 	mediumRCSize      = 30
 	bigRCSize         = 250
 	smallRCGroupName  = "load-test-small-rc"
 	mediumRCGroupName = "load-test-medium-rc"
 	bigRCGroupName    = "load-test-big-rc"
-	smallRCBatchSize  = 20
+	smallRCBatchSize  = 30
 	mediumRCBatchSize = 5
 	bigRCBatchSize    = 1
 )
@ -54,7 +53,7 @@ var _ = Describe("Load capacity", func() {
 	var c *client.Client
 	var nodeCount int
 	var ns string
-	var smallRCCount, mediumRCCount, bigRCCount int
+	var configs []*RCConfig
 	BeforeEach(func() {
 		var err error
@ -71,9 +70,7 @@ var _ = Describe("Load capacity", func() {
 	// TODO add flag that allows to skip cleanup on failure
 	AfterEach(func() {
-		cleanRCGroup(c, ns, smallRCGroupName, smallRCSize, smallRCCount)
+		deleteAllRC(configs)
 		cleanRCGroup(c, ns, mediumRCGroupName, mediumRCSize, mediumRCCount)
 		cleanRCGroup(c, ns, bigRCGroupName, bigRCSize, bigRCCount)
 		By(fmt.Sprintf("Destroying namespace for this suite %v", ns))
 		if err := c.Namespaces().Delete(ns); err != nil {
@ -100,27 +97,25 @@ var _ = Describe("Load capacity", func() {
 		name := fmt.Sprintf("[Skipped] should be able to handle %v pods per node", testArg.podsPerNode)
 		It(name, func() {
-			totalPods := testArg.podsPerNode * nodeCount
+			configs = generateRCConfigs(testArg.podsPerNode*nodeCount, c, ns)
 			smallRCCount, mediumRCCount, bigRCCount = computeRCCounts(totalPods)
 			threads := smallRCCount + mediumRCCount + bigRCCount
 			// TODO refactor this code to iterate over slice of RC group description.
 			createRCGroup(c, ns, smallRCGroupName, smallRCSize, smallRCCount, smallRCBatchSize)
 			createRCGroup(c, ns, mediumRCGroupName, mediumRCSize, mediumRCCount, mediumRCBatchSize)
 			createRCGroup(c, ns, bigRCGroupName, bigRCSize, bigRCCount, bigRCBatchSize)
 			// Simulate lifetime of RC:
 			//  * create with initial size
 			//  * scale RC to a random size and list all pods
 			//  * scale RC to a random size and list all pods
 			//  * delete it
 			//
 			// This will generate ~5 creations/deletions per second assuming:
 			//  - 300 small RCs each 5 pods
 			//  - 25 medium RCs each 30 pods
 			//  - 3 big RCs each 250 pods
 			createAllRC(configs)
 			// TODO add reseting latency metrics here, once it would be supported.
-
+			By("============================================================================")
-			var wg sync.WaitGroup
+			scaleAllRC(configs)
-			wg.Add(threads)
+			By("============================================================================")
-
+			scaleAllRC(configs)
-			// Run RC load for all kinds of RC.
+			By("============================================================================")
 			runRCLoad(c, &wg, ns, smallRCGroupName, smallRCSize, smallRCCount)
 			runRCLoad(c, &wg, ns, mediumRCGroupName, mediumRCSize, mediumRCCount)
 			runRCLoad(c, &wg, ns, bigRCGroupName, bigRCSize, bigRCCount)
 			// Wait for all the pods from all the RC's to return.
 			wg.Wait()
 		})
 	}
 })
@ -128,97 +123,102 @@ var _ = Describe("Load capacity", func() {
 func computeRCCounts(total int) (int, int, int) {
 	// Small RCs owns ~0.5 of total number of pods, medium and big RCs ~0.25 each.
 	// For example for 3000 pods (100 nodes, 30 pods per node) there are:
-	//  - 500 small RCs each 5 pods
+	//  - 300 small RCs each 5 pods
 	//  - 25 medium RCs each 30 pods
 	//  - 3 big RCs each 250 pods
 	bigRCCount := total / 4 / bigRCSize
-	mediumRCCount := (total - bigRCCount*bigRCSize) / 3 / mediumRCSize
+	mediumRCCount := total / 4 / mediumRCSize
-	smallRCCount := (total - bigRCCount*bigRCSize - mediumRCCount*mediumRCSize) / smallRCSize
+	smallRCCount := total / 2 / smallRCSize
 	return smallRCCount, mediumRCCount, bigRCCount
 }
-// The function every few second scales RC to a random size and with 0.1 probability deletes it.
+func generateRCConfigs(totalPods int, c *client.Client, ns string) []*RCConfig {
-// Assumes that given RC exists.
+	configs := make([]*RCConfig, 0)
-func playWithRC(c *client.Client, wg *sync.WaitGroup, ns, name string, size int) {
+
-	By(fmt.Sprintf("Playing with Replication Controller %v", name))
+	smallRCCount, mediumRCCount, bigRCCount := computeRCCounts(totalPods)
-	defer GinkgoRecover()
+	configs = append(configs, generateRCConfigsForGroup(c, ns, smallRCGroupName, smallRCSize, smallRCCount)...)
-	defer wg.Done()
+	configs = append(configs, generateRCConfigsForGroup(c, ns, mediumRCGroupName, mediumRCSize, mediumRCCount)...)
-	// Wait some time to prevent from performing all operations at the same time.
+	configs = append(configs, generateRCConfigsForGroup(c, ns, bigRCGroupName, bigRCSize, bigRCCount)...)
-	time.Sleep(time.Duration(rand.Intn(60)) * time.Second)
+
-	rcExist := true
+	return configs
 	// Once every 1-2 minutes perform scale of RC.
 	for start := time.Now(); time.Since(start) < simulationTime; time.Sleep(time.Duration(60+rand.Intn(60)) * time.Second) {
 		if !rcExist {
 			config := RCConfig{Client: c,
 				Name:      name,
 				Namespace: ns,
 				Image:     image,
 				Replicas:  size,
 			}
 			expectNoError(RunRC(config), fmt.Sprintf("creating rc %s in namespace %s", name, ns))
 			rcExist = true
 		}
 		// Scale RC to a random size between 0.5x and 1.5x of the original size.
 		newSize := uint(rand.Intn(size+1) + size/2)
 		expectNoError(ScaleRC(c, ns, name, newSize), fmt.Sprintf("scaling rc %s in namespace %s", name, ns))
 		// List all pods within this RC.
 		_, err := c.Pods(ns).List(labels.SelectorFromSet(labels.Set(map[string]string{"name": name})), fields.Everything())
 		expectNoError(err, fmt.Sprintf("listing pods from rc %v in namespace %v", name, ns))
 		// With probability 0.1 remove this RC.
 		if rand.Intn(10) == 0 {
 			expectNoError(DeleteRC(c, ns, name), fmt.Sprintf("deleting rc %s in namespace %s", name, ns))
 			rcExist = false
 		}
 	}
 	if rcExist {
 		expectNoError(DeleteRC(c, ns, name), fmt.Sprintf("deleting rc %s in namespace %s after test completion", name, ns))
 	}
 }
-func runRCLoad(c *client.Client, wg *sync.WaitGroup, ns, groupName string, size, count int) {
+func generateRCConfigsForGroup(c *client.Client, ns, groupName string, size, count int) []*RCConfig {
 	configs := make([]*RCConfig, 0, count)
 	for i := 1; i <= count; i++ {
-		go playWithRC(c, wg, ns, groupName+"-"+strconv.Itoa(i), size)
+		config := &RCConfig{
-	}
+			Client:    c,
-}
+			Name:      groupName + "-" + strconv.Itoa(i),
-
+			Namespace: ns,
-// Creates <count> RCs with size <size> in namespace <ns>. The requests are sent in batches of size <batchSize>.
+			Image:     image,
-func createRCGroup(c *client.Client, ns, groupName string, size, count, batchSize int) {
+			Replicas:  size,
 	By(fmt.Sprintf("Creating %v Replication Controllers with size %v", count, size))
 	for i := 1; i <= count; {
 		// Create up to <batchSize> RCs in parallel.
 		var wg sync.WaitGroup
 		for j := 1; j <= batchSize && i <= count; i, j = i+1, j+1 {
 			wg.Add(1)
 			go func(i int) {
 				defer GinkgoRecover()
 				defer wg.Done()
 				name := groupName + "-" + strconv.Itoa(i)
 				config := RCConfig{Client: c,
 					Name:      name,
 					Namespace: ns,
 					Image:     image,
 					Replicas:  size,
 				}
 				expectNoError(RunRC(config), fmt.Sprintf("creating rc %s in namespace %s for the first time", name, ns))
 			}(i)
 		}
-		wg.Wait()
+		configs = append(configs, config)
 	}
 	return configs
 }
-// Removes group of RCs if not removed. This function is for cleanup purposes, so ignores errors.
+func sleepUpTo(d time.Duration) {
-func cleanRCGroup(c *client.Client, ns, groupName string, size, count int) {
+	time.Sleep(time.Duration(rand.Int63n(d.Nanoseconds())))
-	By(fmt.Sprintf("Removing %v Replication Controllers with size %v if not removed", count, size))
+}
 func createAllRC(configs []*RCConfig) {
 	var wg sync.WaitGroup
-	wg.Add(count)
+	wg.Add(len(configs))
-	for i := 1; i <= count; i++ {
+	for _, config := range configs {
-		go func(i int) {
+		go createRC(&wg, config)
 			defer GinkgoRecover()
 			defer wg.Done()
 			name := groupName + "-" + strconv.Itoa(i)
 			// Since it is cleanup ignore any error.
 			DeleteRC(c, name, ns)
 		}(i)
 	}
 	wg.Wait()
 }
 func createRC(wg *sync.WaitGroup, config *RCConfig) {
 	defer GinkgoRecover()
 	defer wg.Done()
 	creatingTime := 10 * time.Minute
 	sleepUpTo(creatingTime)
 	expectNoError(RunRC(*config), fmt.Sprintf("creating rc %s", config.Name))
 }
 func scaleAllRC(configs []*RCConfig) {
 	var wg sync.WaitGroup
 	wg.Add(len(configs))
 	for _, config := range configs {
 		go scaleRC(&wg, config)
 	}
 	wg.Wait()
 }
 // Scales RC to a random size within [0.5*size, 1.5*size] and lists all the pods afterwards.
 // Scaling happens always based on original size, not the current size.
 func scaleRC(wg *sync.WaitGroup, config *RCConfig) {
 	defer GinkgoRecover()
 	defer wg.Done()
 	resizingTime := 3 * time.Minute
 	sleepUpTo(resizingTime)
 	newSize := uint(rand.Intn(config.Replicas) + config.Replicas/2)
 	expectNoError(ScaleRC(config.Client, config.Namespace, config.Name, newSize),
 		fmt.Sprintf("scaling rc %s for the first time", config.Name))
 	selector := labels.SelectorFromSet(labels.Set(map[string]string{"name": config.Name}))
 	_, err := config.Client.Pods(config.Namespace).List(selector, fields.Everything())
 	expectNoError(err, fmt.Sprintf("listing pods from rc %v", config.Name))
 }
 func deleteAllRC(configs []*RCConfig) {
 	var wg sync.WaitGroup
 	wg.Add(len(configs))
 	for _, config := range configs {
 		go deleteRC(&wg, config)
 	}
 	wg.Wait()
 }
 func deleteRC(wg *sync.WaitGroup, config *RCConfig) {
 	defer GinkgoRecover()
 	defer wg.Done()
 	deletingTime := 10 * time.Minute
 	sleepUpTo(deletingTime)
 	expectNoError(DeleteRC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
 }
--- a/test/e2e/util.go
+++ b/test/e2e/util.go
@ -1014,7 +1014,7 @@ func RunRC(config RCConfig) error {
 }
 func ScaleRC(c *client.Client, ns, name string, size uint) error {
-	By(fmt.Sprintf("Scaling replication controller %s in namespace %s to %d", name, ns, size))
+	By(fmt.Sprintf("%v Scaling replication controller %s in namespace %s to %d", time.Now(), name, ns, size))
 	scaler, err := kubectl.ScalerFor("ReplicationController", kubectl.NewScalerClient(c))
 	if err != nil {
 		return err
@ -1050,7 +1050,7 @@ func waitForRCPodsRunning(c *client.Client, ns, rcName string) error {
 // Delete a Replication Controller and all pods it spawned
 func DeleteRC(c *client.Client, ns, name string) error {
-	By(fmt.Sprintf("Deleting replication controller %s in namespace %s", name, ns))
+	By(fmt.Sprintf("%v Deleting replication controller %s in namespace %s", time.Now(), name, ns))
 	reaper, err := kubectl.ReaperForReplicationController(c, 10*time.Minute)
 	if err != nil {
 		return err