Merge pull request #47332 from wasylkowski/e2e-autoscaling-timer

Automatic merge from submit-queue

Added an e2e test timing HPA + CA scaling up from 1 to 8 pods and from 3 to >=4 clusters

**What this PR does / why we need it**:

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #46847 

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2017-06-20 10:06:34 -07:00 committed by GitHub
commit 4fff2fbad3
6 changed files with 125 additions and 11 deletions

View File

@ -10,6 +10,7 @@ load(
go_library(
name = "go_default_library",
srcs = [
"autoscaling_timer.go",
"cluster_size_autoscaling.go",
"dns_autoscaling.go",
"horizontal_pod_autoscaling.go",

View File

@ -0,0 +1,111 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package autoscaling
import (
"strings"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = framework.KubeDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling", func() {
f := framework.NewDefaultFramework("autoscaling")
framework.KubeDescribe("Autoscaling a service", func() {
BeforeEach(func() {
// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get("cluster-autoscaler-status", metav1.GetOptions{})
if err != nil {
framework.Skipf("test expects Cluster Autoscaler to be enabled")
}
})
Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
const nodesNum = 3 // Expect there to be 3 nodes before and after the test.
var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test.
var nodes *v1.NodeList // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes.
BeforeEach(func() {
// Make sure there is only 1 node group, otherwise this test becomes useless.
nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")
if len(nodeGroups) != 1 {
framework.Skipf("test expects 1 node group, found %d", len(nodeGroups))
}
nodeGroupName = nodeGroups[0]
// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless.
nodeGroupSize, err := framework.GroupSize(nodeGroupName)
framework.ExpectNoError(err)
if nodeGroupSize != nodesNum {
framework.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize)
}
// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state.
nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet)
schedulableCount := len(nodes.Items)
Expect(schedulableCount).To(Equal(nodeGroupSize), "not all nodes are schedulable")
})
AfterEach(func() {
// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, nodesNum, 15*time.Minute))
})
Measure("takes less than 15 minutes", func(b Benchmarker) {
// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
const timeToWait = 15 * time.Minute
// Calculate the CPU request of the service.
// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
// Make it so that 'nodesNum' pods fit perfectly per node (in practice other things take space, so less than that will fit).
nodeCpus := nodes.Items[0].Status.Capacity[v1.ResourceCPU]
nodeCpuMillis := (&nodeCpus).MilliValue()
cpuRequestMillis := int64(nodeCpuMillis / nodesNum)
// Start the service we want to scale and wait for it to be up and running.
nodeMemoryBytes := nodes.Items[0].Status.Capacity[v1.ResourceMemory]
nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's total memory.
replicas := 1
resourceConsumer := common.NewDynamicResourceConsumer("resource-consumer", common.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f)
defer resourceConsumer.CleanUp()
resourceConsumer.WaitForReplicas(replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
// Enable Horizontal Pod Autoscaler with 50% target utilization and
// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
targetCpuUtilizationPercent := int32(50)
hpa := common.CreateCPUHorizontalPodAutoscaler(resourceConsumer, targetCpuUtilizationPercent, 1, 10)
defer common.DeleteHorizontalPodAutoscaler(resourceConsumer, hpa.Name)
cpuLoad := 8 * cpuRequestMillis * int64(targetCpuUtilizationPercent) / 100 // 8 pods utilized to the target level
resourceConsumer.ConsumeCPU(int(cpuLoad))
// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
b.Time("total scale-up time", func() {
resourceConsumer.WaitForReplicas(8, timeToWait)
})
}, 1) // Increase to run the test more than once.
})
})
})

View File

@ -114,17 +114,18 @@ type HPAScaleTest struct {
// The second state change (optional) is due to the CPU burst parameter, which HPA again responds to.
// TODO The use of 3 states is arbitrary, we could eventually make this test handle "n" states once this test stabilizes.
func (scaleTest *HPAScaleTest) run(name, kind string, rc *common.ResourceConsumer, f *framework.Framework) {
const timeToWait = 15 * time.Minute
rc = common.NewDynamicResourceConsumer(name, kind, int(scaleTest.initPods), int(scaleTest.totalInitialCPUUsage), 0, 0, scaleTest.perPodCPURequest, 200, f)
defer rc.CleanUp()
hpa := common.CreateCPUHorizontalPodAutoscaler(rc, scaleTest.targetCPUUtilizationPercent, scaleTest.minPods, scaleTest.maxPods)
defer common.DeleteHorizontalPodAutoscaler(rc, hpa.Name)
rc.WaitForReplicas(int(scaleTest.firstScale))
rc.WaitForReplicas(int(scaleTest.firstScale), timeToWait)
if scaleTest.firstScaleStasis > 0 {
rc.EnsureDesiredReplicas(int(scaleTest.firstScale), scaleTest.firstScaleStasis)
}
if scaleTest.cpuBurst > 0 && scaleTest.secondScale > 0 {
rc.ConsumeCPU(scaleTest.cpuBurst)
rc.WaitForReplicas(int(scaleTest.secondScale))
rc.WaitForReplicas(int(scaleTest.secondScale), timeToWait)
}
}

View File

@ -321,29 +321,28 @@ func (rc *ResourceConsumer) GetReplicas() int {
if replicationController == nil {
framework.Failf(rcIsNil)
}
return int(replicationController.Status.Replicas)
return int(replicationController.Status.ReadyReplicas)
case KindDeployment:
deployment, err := rc.framework.ClientSet.Extensions().Deployments(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
framework.ExpectNoError(err)
if deployment == nil {
framework.Failf(deploymentIsNil)
}
return int(deployment.Status.Replicas)
return int(deployment.Status.ReadyReplicas)
case KindReplicaSet:
rs, err := rc.framework.ClientSet.Extensions().ReplicaSets(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
framework.ExpectNoError(err)
if rs == nil {
framework.Failf(rsIsNil)
}
return int(rs.Status.Replicas)
return int(rs.Status.ReadyReplicas)
default:
framework.Failf(invalidKind)
}
return 0
}
func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int) {
duration := 15 * time.Minute
func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int, duration time.Duration) {
interval := 20 * time.Second
err := wait.PollImmediate(interval, duration, func() (bool, error) {
replicas := rc.GetReplicas()

View File

@ -89,7 +89,7 @@ func testStackdriverMonitoring(f *framework.Framework, pods, allPodsCPU int, per
rc := common.NewDynamicResourceConsumer(rcName, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f)
defer rc.CleanUp()
rc.WaitForReplicas(pods)
rc.WaitForReplicas(pods, 15*time.Minute)
metricsMap := map[string]bool{}
pollingFunction := checkForMetrics(projectId, gcmService, time.Now(), metricsMap, allPodsCPU, perPodCPU)

View File

@ -18,6 +18,7 @@ package upgrades
import (
"fmt"
"time"
autoscalingv1 "k8s.io/kubernetes/pkg/apis/autoscaling/v1"
"k8s.io/kubernetes/test/e2e/common"
@ -72,22 +73,23 @@ func (t *HPAUpgradeTest) Teardown(f *framework.Framework) {
}
func (t *HPAUpgradeTest) test() {
const timeToWait = 15 * time.Minute
t.rc.Resume()
By(fmt.Sprintf("HPA scales to 1 replica: consume 10 millicores, target per pod 100 millicores, min pods 1."))
t.rc.ConsumeCPU(10) /* millicores */
By(fmt.Sprintf("HPA waits for 1 replica"))
t.rc.WaitForReplicas(1)
t.rc.WaitForReplicas(1, timeToWait)
By(fmt.Sprintf("HPA scales to 3 replicas: consume 250 millicores, target per pod 100 millicores."))
t.rc.ConsumeCPU(250) /* millicores */
By(fmt.Sprintf("HPA waits for 3 replicas"))
t.rc.WaitForReplicas(3)
t.rc.WaitForReplicas(3, timeToWait)
By(fmt.Sprintf("HPA scales to 5 replicas: consume 700 millicores, target per pod 100 millicores, max pods 5."))
t.rc.ConsumeCPU(700) /* millicores */
By(fmt.Sprintf("HPA waits for 5 replicas"))
t.rc.WaitForReplicas(5)
t.rc.WaitForReplicas(5, timeToWait)
// We need to pause background goroutines as during upgrade master is unavailable and requests issued by them fail.
t.rc.Pause()