Merge pull request #24434 from gmarek/services

Automatic merge from submit-queue Create multiple RCs in NC - prerequisite for adding services WIP because I need to make the logs readable again.
2025-08-05 10:19:50 +00:00 · 2016-04-23 15:19:36 -07:00 · 2016-04-23 15:19:36 -07:00 · 0057931f4e
commit 0057931f4e
parent f583174d76 d344c2e32b
2 changed files with 147 additions and 84 deletions
--- a/test/e2e/density.go
+++ b/test/e2e/density.go
@ -84,6 +84,26 @@ func density30AddonResourceVerifier() map[string]framework.ResourceConstraint {
 	return constraints
 }

+func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) {
+	label := labels.SelectorFromSet(labels.Set(observedLabels))
+	podStore := framework.NewPodStore(c, ns, label, fields.Everything())
+	defer podStore.Stop()
+	ticker := time.NewTicker(period)
+	for {
+		select {
+		case <-ticker.C:
+			pods := podStore.List()
+			startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
+			startupStatus.Print("Density")
+		case <-stopCh:
+			pods := podStore.List()
+			startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods)
+			startupStatus.Print("Density")
+			return
+		}
+	}
+}
+
 // This test suite can take a long time to run, and can affect or be affected by other tests.
 // So by default it is added to the ginkgo.skip list (see driver.go).
 // To run this suite you must explicitly ask for it by setting the
@ -185,7 +205,7 @@ var _ = framework.KubeDescribe("Density", func() {
 		{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second},
 		{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second},
 		{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second},
-		{podsPerNode: 100, runLatencyTest: false, interval: 1 * time.Second},
+		{podsPerNode: 100, runLatencyTest: false, interval: 10 * time.Second},
 	}

 	for _, testArg := range densityTests {
@ -201,22 +221,29 @@ var _ = framework.KubeDescribe("Density", func() {
 		}
 		itArg := testArg
 		It(name, func() {
-			podsPerNode := itArg.podsPerNode
-			totalPods = podsPerNode * nodeCount
-			RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid
 			fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid))
 			framework.ExpectNoError(err)
 			defer fileHndl.Close()
-			config := framework.RCConfig{Client: c,
-				Image:                "gcr.io/google_containers/pause:2.0",
-				Name:                 RCName,
-				Namespace:            ns,
-				PollInterval:         itArg.interval,
-				PodStatusFile:        fileHndl,
-				Replicas:             totalPods,
-				CpuRequest:           nodeCpuCapacity / 100,
-				MemRequest:           nodeMemCapacity / 100,
-				MaxContainerFailures: &MaxContainerFailures,
+			podsPerNode := itArg.podsPerNode
+			totalPods = podsPerNode * nodeCount
+			// TODO: loop to podsPerNode instead of 1 when we're ready.
+			numberOrRCs := 1
+			RCConfigs := make([]framework.RCConfig, numberOrRCs)
+			for i := 0; i < numberOrRCs; i++ {
+				RCName = "density" + strconv.Itoa(totalPods) + "-" + strconv.Itoa(i) + "-" + uuid
+				RCConfigs[i] = framework.RCConfig{Client: c,
+					Image:                "gcr.io/google_containers/pause:2.0",
+					Name:                 RCName,
+					Namespace:            ns,
+					Labels:               map[string]string{"type": "densityPod"},
+					PollInterval:         itArg.interval,
+					PodStatusFile:        fileHndl,
+					Replicas:             (totalPods + numberOrRCs - 1) / numberOrRCs,
+					CpuRequest:           nodeCpuCapacity / 100,
+					MemRequest:           nodeMemCapacity / 100,
+					MaxContainerFailures: &MaxContainerFailures,
+					Silent:               true,
+				}
 			}

 			// Create a listener for events.
@ -249,7 +276,7 @@ var _ = framework.KubeDescribe("Density", func() {
 			// uLock is a lock protects the updateCount
 			var uLock sync.Mutex
 			updateCount := 0
-			label := labels.SelectorFromSet(labels.Set(map[string]string{"name": RCName}))
+			label := labels.SelectorFromSet(labels.Set(map[string]string{"type": "densityPod"}))
 			_, updateController := controllerframework.NewInformer(
 				&cache.ListWatch{
 					ListFunc: func(options api.ListOptions) (runtime.Object, error) {
@ -273,10 +300,22 @@ var _ = framework.KubeDescribe("Density", func() {
 			)
 			go updateController.Run(stop)

-			// Start the replication controller.
+			// Start all replication controllers.
 			startTime := time.Now()
-			framework.ExpectNoError(framework.RunRC(config))
+			wg := sync.WaitGroup{}
+			wg.Add(len(RCConfigs))
+			for i := range RCConfigs {
+				rcConfig := RCConfigs[i]
+				go func() {
+					framework.ExpectNoError(framework.RunRC(rcConfig))
+					wg.Done()
+				}()
+			}
+			logStopCh := make(chan struct{})
+			go logPodStartupStatus(c, totalPods, ns, map[string]string{"type": "densityPod"}, itArg.interval, logStopCh)
+			wg.Wait()
 			e2eStartupTime = time.Now().Sub(startTime)
+			close(logStopCh)
 			framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime)
 			framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second))

@ -506,11 +545,14 @@ var _ = framework.KubeDescribe("Density", func() {

 			By("Deleting ReplicationController")
 			// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics.
-			rc, err := c.ReplicationControllers(ns).Get(RCName)
-			if err == nil && rc.Spec.Replicas != 0 {
-				By("Cleaning up the replication controller")
-				err := framework.DeleteRC(c, ns, RCName)
-				framework.ExpectNoError(err)
+			for i := range RCConfigs {
+				rcName := RCConfigs[i].Name
+				rc, err := c.ReplicationControllers(ns).Get(rcName)
+				if err == nil && rc.Spec.Replicas != 0 {
+					By("Cleaning up the replication controller")
+					err := framework.DeleteRC(c, ns, rcName)
+					framework.ExpectNoError(err)
+				}
 			}

 			By("Removing additional replication controllers if any")
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@ -242,6 +242,9 @@ type RCConfig struct {
 	// Maximum allowable container failures. If exceeded, RunRC returns an error.
 	// Defaults to replicas*0.1 if unspecified.
 	MaxContainerFailures *int
+
+	// If set to false starting RC will print progress, otherwise only errors will be printed.
+	Silent bool
 }

 type DeploymentConfig struct {
@ -1934,6 +1937,70 @@ func (config *RCConfig) applyTo(template *api.PodTemplateSpec) {
 	}
 }

+type RCStartupStatus struct {
+	Expected              int
+	Terminating           int
+	Running               int
+	RunningButNotReady    int
+	Waiting               int
+	Pending               int
+	Unknown               int
+	Inactive              int
+	FailedContainers      int
+	Created               []*api.Pod
+	ContainerRestartNodes sets.String
+}
+
+func (s *RCStartupStatus) Print(name string) {
+	Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
+		name, len(s.Created), s.Expected, s.Running, s.Pending, s.Waiting, s.Inactive, s.Terminating, s.Unknown, s.RunningButNotReady)
+}
+
+func ComputeRCStartupStatus(pods []*api.Pod, expected int) RCStartupStatus {
+	startupStatus := RCStartupStatus{
+		Expected:              expected,
+		Created:               make([]*api.Pod, 0, expected),
+		ContainerRestartNodes: sets.NewString(),
+	}
+	for _, p := range pods {
+		if p.DeletionTimestamp != nil {
+			startupStatus.Terminating++
+			continue
+		}
+		startupStatus.Created = append(startupStatus.Created, p)
+		if p.Status.Phase == api.PodRunning {
+			ready := false
+			for _, c := range p.Status.Conditions {
+				if c.Type == api.PodReady && c.Status == api.ConditionTrue {
+					ready = true
+					break
+				}
+			}
+			if ready {
+				// Only count a pod is running when it is also ready.
+				startupStatus.Running++
+			} else {
+				startupStatus.RunningButNotReady++
+			}
+			for _, v := range FailedContainers(p) {
+				startupStatus.FailedContainers = startupStatus.FailedContainers + v.Restarts
+				startupStatus.ContainerRestartNodes.Insert(p.Spec.NodeName)
+			}
+		} else if p.Status.Phase == api.PodPending {
+			if p.Spec.NodeName == "" {
+				startupStatus.Waiting++
+			} else {
+				startupStatus.Pending++
+			}
+		} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
+			startupStatus.Inactive++
+		} else if p.Status.Phase == api.PodUnknown {
+			startupStatus.Unknown++
+		}
+	}
+	return startupStatus
+}
+
 func (config *RCConfig) start() error {
 	// Don't force tests to fail if they don't care about containers restarting.
 	var maxContainerFailures int
@ -1962,74 +2029,28 @@ func (config *RCConfig) start() error {
 	for oldRunning != config.Replicas {
 		time.Sleep(interval)

-		terminating := 0
-
-		running := 0
-		runningButNotReady := 0
-		waiting := 0
-		pending := 0
-		unknown := 0
-		inactive := 0
-		failedContainers := 0
-		containerRestartNodes := sets.NewString()
-
 		pods := PodStore.List()
-		created := []*api.Pod{}
-		for _, p := range pods {
-			if p.DeletionTimestamp != nil {
-				terminating++
-				continue
-			}
-			created = append(created, p)
-			if p.Status.Phase == api.PodRunning {
-				ready := false
-				for _, c := range p.Status.Conditions {
-					if c.Type == api.PodReady && c.Status == api.ConditionTrue {
-						ready = true
-						break
-					}
-				}
-				if ready {
-					// Only count a pod is running when it is also ready.
-					running++
-				} else {
-					runningButNotReady++
-				}
-				for _, v := range FailedContainers(p) {
-					failedContainers = failedContainers + v.Restarts
-					containerRestartNodes.Insert(p.Spec.NodeName)
-				}
-			} else if p.Status.Phase == api.PodPending {
-				if p.Spec.NodeName == "" {
-					waiting++
-				} else {
-					pending++
-				}
-			} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed {
-				inactive++
-			} else if p.Status.Phase == api.PodUnknown {
-				unknown++
-			}
-		}
-		pods = created
+		startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
+
+		pods = startupStatus.Created
 		if config.CreatedPods != nil {
 			*config.CreatedPods = pods
 		}
-
-		Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ",
-			config.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown, runningButNotReady)
-
-		promPushRunningPending(running, pending)
-
-		if config.PodStatusFile != nil {
-			fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", running, pending, waiting, inactive, unknown, runningButNotReady)
+		if !config.Silent {
+			startupStatus.Print(config.Name)
 		}

-		if failedContainers > maxContainerFailures {
-			DumpNodeDebugInfo(config.Client, containerRestartNodes.List())
+		promPushRunningPending(startupStatus.Running, startupStatus.Pending)
+
+		if config.PodStatusFile != nil {
+			fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", startupStatus.Running, startupStatus.Pending, startupStatus.Waiting, startupStatus.Inactive, startupStatus.Unknown, startupStatus.RunningButNotReady)
+		}
+
+		if startupStatus.FailedContainers > maxContainerFailures {
+			DumpNodeDebugInfo(config.Client, startupStatus.ContainerRestartNodes.List())
 			// Get the logs from the failed containers to help diagnose what caused them to fail
 			LogFailedContainers(config.Namespace)
-			return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures)
+			return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
 		}
 		if len(pods) < len(oldPods) || len(pods) > config.Replicas {
 			// This failure mode includes:
@ -2043,11 +2064,11 @@ func (config *RCConfig) start() error {
 			return fmt.Errorf(errorStr)
 		}

-		if len(pods) > len(oldPods) || running > oldRunning {
+		if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
 			lastChange = time.Now()
 		}
 		oldPods = pods
-		oldRunning = running
+		oldRunning = startupStatus.Running

 		if time.Since(lastChange) > timeout {
 			dumpPodDebugInfo(config.Client, pods)