From 1b0f981f82ce7ba9fe906d29332aa78f2a0d842d Mon Sep 17 00:00:00 2001
From: Andrzej Wasylkowski <wasylkowski@google.com>
Date: Thu, 1 Jun 2017 13:12:27 +0200
Subject: [PATCH 1/3] Made the GetReplicas function count only ready replicas.

Counting all replicas makes functions dependent on GetReplicas proceed
with trying to use replicas that are not ready yet.
---
 test/e2e/common/autoscaling_utils.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/e2e/common/autoscaling_utils.go b/test/e2e/common/autoscaling_utils.go
index a2abe0a3906..aa16ef4e207 100644
--- a/test/e2e/common/autoscaling_utils.go
+++ b/test/e2e/common/autoscaling_utils.go
@@ -321,21 +321,21 @@ func (rc *ResourceConsumer) GetReplicas() int {
 		if replicationController == nil {
 			framework.Failf(rcIsNil)
 		}
-		return int(replicationController.Status.Replicas)
+		return int(replicationController.Status.ReadyReplicas)
 	case KindDeployment:
 		deployment, err := rc.framework.ClientSet.Extensions().Deployments(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
 		framework.ExpectNoError(err)
 		if deployment == nil {
 			framework.Failf(deploymentIsNil)
 		}
-		return int(deployment.Status.Replicas)
+		return int(deployment.Status.ReadyReplicas)
 	case KindReplicaSet:
 		rs, err := rc.framework.ClientSet.Extensions().ReplicaSets(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
 		framework.ExpectNoError(err)
 		if rs == nil {
 			framework.Failf(rsIsNil)
 		}
-		return int(rs.Status.Replicas)
+		return int(rs.Status.ReadyReplicas)
 	default:
 		framework.Failf(invalidKind)
 	}

From 38f175f11583d2957eaed28827698600ff4c2d57 Mon Sep 17 00:00:00 2001
From: Andrzej Wasylkowski <wasylkowski@google.com>
Date: Sat, 10 Jun 2017 10:47:31 +0200
Subject: [PATCH 2/3] Made WaitForReplicas take a "how long to wait" parameter
 instead of it being hardcoded.

---
 test/e2e/autoscaling/horizontal_pod_autoscaling.go | 5 +++--
 test/e2e/common/autoscaling_utils.go               | 3 +--
 test/e2e/stackdriver_monitoring.go                 | 2 +-
 test/e2e/upgrades/horizontal_pod_autoscalers.go    | 8 +++++---
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/test/e2e/autoscaling/horizontal_pod_autoscaling.go b/test/e2e/autoscaling/horizontal_pod_autoscaling.go
index 826215ca803..1e76cea54f1 100644
--- a/test/e2e/autoscaling/horizontal_pod_autoscaling.go
+++ b/test/e2e/autoscaling/horizontal_pod_autoscaling.go
@@ -114,17 +114,18 @@ type HPAScaleTest struct {
 // The second state change (optional) is due to the CPU burst parameter, which HPA again responds to.
 // TODO The use of 3 states is arbitrary, we could eventually make this test handle "n" states once this test stabilizes.
 func (scaleTest *HPAScaleTest) run(name, kind string, rc *common.ResourceConsumer, f *framework.Framework) {
+	const timeToWait = 15 * time.Minute
 	rc = common.NewDynamicResourceConsumer(name, kind, int(scaleTest.initPods), int(scaleTest.totalInitialCPUUsage), 0, 0, scaleTest.perPodCPURequest, 200, f)
 	defer rc.CleanUp()
 	hpa := common.CreateCPUHorizontalPodAutoscaler(rc, scaleTest.targetCPUUtilizationPercent, scaleTest.minPods, scaleTest.maxPods)
 	defer common.DeleteHorizontalPodAutoscaler(rc, hpa.Name)
-	rc.WaitForReplicas(int(scaleTest.firstScale))
+	rc.WaitForReplicas(int(scaleTest.firstScale), timeToWait)
 	if scaleTest.firstScaleStasis > 0 {
 		rc.EnsureDesiredReplicas(int(scaleTest.firstScale), scaleTest.firstScaleStasis)
 	}
 	if scaleTest.cpuBurst > 0 && scaleTest.secondScale > 0 {
 		rc.ConsumeCPU(scaleTest.cpuBurst)
-		rc.WaitForReplicas(int(scaleTest.secondScale))
+		rc.WaitForReplicas(int(scaleTest.secondScale), timeToWait)
 	}
 }
 
diff --git a/test/e2e/common/autoscaling_utils.go b/test/e2e/common/autoscaling_utils.go
index aa16ef4e207..8d15bc0295a 100644
--- a/test/e2e/common/autoscaling_utils.go
+++ b/test/e2e/common/autoscaling_utils.go
@@ -342,8 +342,7 @@ func (rc *ResourceConsumer) GetReplicas() int {
 	return 0
 }
 
-func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int) {
-	duration := 15 * time.Minute
+func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int, duration time.Duration) {
 	interval := 20 * time.Second
 	err := wait.PollImmediate(interval, duration, func() (bool, error) {
 		replicas := rc.GetReplicas()
diff --git a/test/e2e/stackdriver_monitoring.go b/test/e2e/stackdriver_monitoring.go
index f8060d90854..f175c13e7d3 100644
--- a/test/e2e/stackdriver_monitoring.go
+++ b/test/e2e/stackdriver_monitoring.go
@@ -89,7 +89,7 @@ func testStackdriverMonitoring(f *framework.Framework, pods, allPodsCPU int, per
 	rc := common.NewDynamicResourceConsumer(rcName, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f)
 	defer rc.CleanUp()
 
-	rc.WaitForReplicas(pods)
+	rc.WaitForReplicas(pods, 15*time.Minute)
 
 	metricsMap := map[string]bool{}
 	pollingFunction := checkForMetrics(projectId, gcmService, time.Now(), metricsMap, allPodsCPU, perPodCPU)
diff --git a/test/e2e/upgrades/horizontal_pod_autoscalers.go b/test/e2e/upgrades/horizontal_pod_autoscalers.go
index 51074e91a98..bb5110ebb38 100644
--- a/test/e2e/upgrades/horizontal_pod_autoscalers.go
+++ b/test/e2e/upgrades/horizontal_pod_autoscalers.go
@@ -18,6 +18,7 @@ package upgrades
 
 import (
 	"fmt"
+	"time"
 
 	autoscalingv1 "k8s.io/kubernetes/pkg/apis/autoscaling/v1"
 	"k8s.io/kubernetes/test/e2e/common"
@@ -72,22 +73,23 @@ func (t *HPAUpgradeTest) Teardown(f *framework.Framework) {
 }
 
 func (t *HPAUpgradeTest) test() {
+	const timeToWait = 15 * time.Minute
 	t.rc.Resume()
 
 	By(fmt.Sprintf("HPA scales to 1 replica: consume 10 millicores, target per pod 100 millicores, min pods 1."))
 	t.rc.ConsumeCPU(10) /* millicores */
 	By(fmt.Sprintf("HPA waits for 1 replica"))
-	t.rc.WaitForReplicas(1)
+	t.rc.WaitForReplicas(1, timeToWait)
 
 	By(fmt.Sprintf("HPA scales to 3 replicas: consume 250 millicores, target per pod 100 millicores."))
 	t.rc.ConsumeCPU(250) /* millicores */
 	By(fmt.Sprintf("HPA waits for 3 replicas"))
-	t.rc.WaitForReplicas(3)
+	t.rc.WaitForReplicas(3, timeToWait)
 
 	By(fmt.Sprintf("HPA scales to 5 replicas: consume 700 millicores, target per pod 100 millicores, max pods 5."))
 	t.rc.ConsumeCPU(700) /* millicores */
 	By(fmt.Sprintf("HPA waits for 5 replicas"))
-	t.rc.WaitForReplicas(5)
+	t.rc.WaitForReplicas(5, timeToWait)
 
 	// We need to pause background goroutines as during upgrade master is unavailable and requests issued by them fail.
 	t.rc.Pause()

From ce9f3bcfefdbbaaec2c2c3f71f81c617dbd1e242 Mon Sep 17 00:00:00 2001
From: Andrzej Wasylkowski <wasylkowski@google.com>
Date: Thu, 1 Jun 2017 13:13:18 +0200
Subject: [PATCH 3/3] Added an end-to-end test measuring autoscaling's
 efficiency.

---
 test/e2e/autoscaling/BUILD                |   1 +
 test/e2e/autoscaling/autoscaling_timer.go | 111 ++++++++++++++++++++++
 2 files changed, 112 insertions(+)
 create mode 100644 test/e2e/autoscaling/autoscaling_timer.go

diff --git a/test/e2e/autoscaling/BUILD b/test/e2e/autoscaling/BUILD
index 1517a3c440f..d257c00fbac 100644
--- a/test/e2e/autoscaling/BUILD
+++ b/test/e2e/autoscaling/BUILD
@@ -10,6 +10,7 @@ load(
 go_library(
     name = "go_default_library",
     srcs = [
+        "autoscaling_timer.go",
         "cluster_size_autoscaling.go",
         "dns_autoscaling.go",
         "horizontal_pod_autoscaling.go",
diff --git a/test/e2e/autoscaling/autoscaling_timer.go b/test/e2e/autoscaling/autoscaling_timer.go
new file mode 100644
index 00000000000..c0b71930c42
--- /dev/null
+++ b/test/e2e/autoscaling/autoscaling_timer.go
@@ -0,0 +1,111 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package autoscaling
+
+import (
+	"strings"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/kubernetes/pkg/api/v1"
+	"k8s.io/kubernetes/test/e2e/common"
+	"k8s.io/kubernetes/test/e2e/framework"
+
+	. "github.com/onsi/ginkgo"
+	. "github.com/onsi/gomega"
+)
+
+var _ = framework.KubeDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling", func() {
+	f := framework.NewDefaultFramework("autoscaling")
+
+	framework.KubeDescribe("Autoscaling a service", func() {
+		BeforeEach(func() {
+			// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
+			_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get("cluster-autoscaler-status", metav1.GetOptions{})
+			if err != nil {
+				framework.Skipf("test expects Cluster Autoscaler to be enabled")
+			}
+		})
+
+		Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
+			const nodesNum = 3       // Expect there to be 3 nodes before and after the test.
+			var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test.
+			var nodes *v1.NodeList   // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes.
+
+			BeforeEach(func() {
+				// Make sure there is only 1 node group, otherwise this test becomes useless.
+				nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")
+				if len(nodeGroups) != 1 {
+					framework.Skipf("test expects 1 node group, found %d", len(nodeGroups))
+				}
+				nodeGroupName = nodeGroups[0]
+
+				// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless.
+				nodeGroupSize, err := framework.GroupSize(nodeGroupName)
+				framework.ExpectNoError(err)
+				if nodeGroupSize != nodesNum {
+					framework.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize)
+				}
+
+				// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state.
+				nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet)
+				schedulableCount := len(nodes.Items)
+				Expect(schedulableCount).To(Equal(nodeGroupSize), "not all nodes are schedulable")
+			})
+
+			AfterEach(func() {
+				// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
+				framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
+				framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, nodesNum, 15*time.Minute))
+			})
+
+			Measure("takes less than 15 minutes", func(b Benchmarker) {
+				// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
+				const timeToWait = 15 * time.Minute
+
+				// Calculate the CPU request of the service.
+				// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
+				// Make it so that 'nodesNum' pods fit perfectly per node (in practice other things take space, so less than that will fit).
+				nodeCpus := nodes.Items[0].Status.Capacity[v1.ResourceCPU]
+				nodeCpuMillis := (&nodeCpus).MilliValue()
+				cpuRequestMillis := int64(nodeCpuMillis / nodesNum)
+
+				// Start the service we want to scale and wait for it to be up and running.
+				nodeMemoryBytes := nodes.Items[0].Status.Capacity[v1.ResourceMemory]
+				nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
+				memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's total memory.
+				replicas := 1
+				resourceConsumer := common.NewDynamicResourceConsumer("resource-consumer", common.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f)
+				defer resourceConsumer.CleanUp()
+				resourceConsumer.WaitForReplicas(replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
+
+				// Enable Horizontal Pod Autoscaler with 50% target utilization and
+				// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
+				targetCpuUtilizationPercent := int32(50)
+				hpa := common.CreateCPUHorizontalPodAutoscaler(resourceConsumer, targetCpuUtilizationPercent, 1, 10)
+				defer common.DeleteHorizontalPodAutoscaler(resourceConsumer, hpa.Name)
+				cpuLoad := 8 * cpuRequestMillis * int64(targetCpuUtilizationPercent) / 100 // 8 pods utilized to the target level
+				resourceConsumer.ConsumeCPU(int(cpuLoad))
+
+				// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
+				b.Time("total scale-up time", func() {
+					resourceConsumer.WaitForReplicas(8, timeToWait)
+				})
+			}, 1) // Increase to run the test more than once.
+		})
+	})
+})