mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #47332 from wasylkowski/e2e-autoscaling-timer
Automatic merge from submit-queue Added an e2e test timing HPA + CA scaling up from 1 to 8 pods and from 3 to >=4 clusters **What this PR does / why we need it**: **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #46847 **Special notes for your reviewer**: **Release note**: ```release-note NONE ```
This commit is contained in:
commit
4fff2fbad3
@ -10,6 +10,7 @@ load(
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"autoscaling_timer.go",
|
||||
"cluster_size_autoscaling.go",
|
||||
"dns_autoscaling.go",
|
||||
"horizontal_pod_autoscaling.go",
|
||||
|
111
test/e2e/autoscaling/autoscaling_timer.go
Normal file
111
test/e2e/autoscaling/autoscaling_timer.go
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package autoscaling
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
"k8s.io/kubernetes/test/e2e/common"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling", func() {
|
||||
f := framework.NewDefaultFramework("autoscaling")
|
||||
|
||||
framework.KubeDescribe("Autoscaling a service", func() {
|
||||
BeforeEach(func() {
|
||||
// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
|
||||
_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get("cluster-autoscaler-status", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
framework.Skipf("test expects Cluster Autoscaler to be enabled")
|
||||
}
|
||||
})
|
||||
|
||||
Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
|
||||
const nodesNum = 3 // Expect there to be 3 nodes before and after the test.
|
||||
var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test.
|
||||
var nodes *v1.NodeList // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes.
|
||||
|
||||
BeforeEach(func() {
|
||||
// Make sure there is only 1 node group, otherwise this test becomes useless.
|
||||
nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")
|
||||
if len(nodeGroups) != 1 {
|
||||
framework.Skipf("test expects 1 node group, found %d", len(nodeGroups))
|
||||
}
|
||||
nodeGroupName = nodeGroups[0]
|
||||
|
||||
// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless.
|
||||
nodeGroupSize, err := framework.GroupSize(nodeGroupName)
|
||||
framework.ExpectNoError(err)
|
||||
if nodeGroupSize != nodesNum {
|
||||
framework.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize)
|
||||
}
|
||||
|
||||
// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state.
|
||||
nodes = framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||
schedulableCount := len(nodes.Items)
|
||||
Expect(schedulableCount).To(Equal(nodeGroupSize), "not all nodes are schedulable")
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
|
||||
framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, nodesNum, 15*time.Minute))
|
||||
})
|
||||
|
||||
Measure("takes less than 15 minutes", func(b Benchmarker) {
|
||||
// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
|
||||
const timeToWait = 15 * time.Minute
|
||||
|
||||
// Calculate the CPU request of the service.
|
||||
// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
|
||||
// Make it so that 'nodesNum' pods fit perfectly per node (in practice other things take space, so less than that will fit).
|
||||
nodeCpus := nodes.Items[0].Status.Capacity[v1.ResourceCPU]
|
||||
nodeCpuMillis := (&nodeCpus).MilliValue()
|
||||
cpuRequestMillis := int64(nodeCpuMillis / nodesNum)
|
||||
|
||||
// Start the service we want to scale and wait for it to be up and running.
|
||||
nodeMemoryBytes := nodes.Items[0].Status.Capacity[v1.ResourceMemory]
|
||||
nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
|
||||
memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's total memory.
|
||||
replicas := 1
|
||||
resourceConsumer := common.NewDynamicResourceConsumer("resource-consumer", common.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f)
|
||||
defer resourceConsumer.CleanUp()
|
||||
resourceConsumer.WaitForReplicas(replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
|
||||
|
||||
// Enable Horizontal Pod Autoscaler with 50% target utilization and
|
||||
// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
|
||||
targetCpuUtilizationPercent := int32(50)
|
||||
hpa := common.CreateCPUHorizontalPodAutoscaler(resourceConsumer, targetCpuUtilizationPercent, 1, 10)
|
||||
defer common.DeleteHorizontalPodAutoscaler(resourceConsumer, hpa.Name)
|
||||
cpuLoad := 8 * cpuRequestMillis * int64(targetCpuUtilizationPercent) / 100 // 8 pods utilized to the target level
|
||||
resourceConsumer.ConsumeCPU(int(cpuLoad))
|
||||
|
||||
// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
|
||||
b.Time("total scale-up time", func() {
|
||||
resourceConsumer.WaitForReplicas(8, timeToWait)
|
||||
})
|
||||
}, 1) // Increase to run the test more than once.
|
||||
})
|
||||
})
|
||||
})
|
@ -114,17 +114,18 @@ type HPAScaleTest struct {
|
||||
// The second state change (optional) is due to the CPU burst parameter, which HPA again responds to.
|
||||
// TODO The use of 3 states is arbitrary, we could eventually make this test handle "n" states once this test stabilizes.
|
||||
func (scaleTest *HPAScaleTest) run(name, kind string, rc *common.ResourceConsumer, f *framework.Framework) {
|
||||
const timeToWait = 15 * time.Minute
|
||||
rc = common.NewDynamicResourceConsumer(name, kind, int(scaleTest.initPods), int(scaleTest.totalInitialCPUUsage), 0, 0, scaleTest.perPodCPURequest, 200, f)
|
||||
defer rc.CleanUp()
|
||||
hpa := common.CreateCPUHorizontalPodAutoscaler(rc, scaleTest.targetCPUUtilizationPercent, scaleTest.minPods, scaleTest.maxPods)
|
||||
defer common.DeleteHorizontalPodAutoscaler(rc, hpa.Name)
|
||||
rc.WaitForReplicas(int(scaleTest.firstScale))
|
||||
rc.WaitForReplicas(int(scaleTest.firstScale), timeToWait)
|
||||
if scaleTest.firstScaleStasis > 0 {
|
||||
rc.EnsureDesiredReplicas(int(scaleTest.firstScale), scaleTest.firstScaleStasis)
|
||||
}
|
||||
if scaleTest.cpuBurst > 0 && scaleTest.secondScale > 0 {
|
||||
rc.ConsumeCPU(scaleTest.cpuBurst)
|
||||
rc.WaitForReplicas(int(scaleTest.secondScale))
|
||||
rc.WaitForReplicas(int(scaleTest.secondScale), timeToWait)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -321,29 +321,28 @@ func (rc *ResourceConsumer) GetReplicas() int {
|
||||
if replicationController == nil {
|
||||
framework.Failf(rcIsNil)
|
||||
}
|
||||
return int(replicationController.Status.Replicas)
|
||||
return int(replicationController.Status.ReadyReplicas)
|
||||
case KindDeployment:
|
||||
deployment, err := rc.framework.ClientSet.Extensions().Deployments(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
if deployment == nil {
|
||||
framework.Failf(deploymentIsNil)
|
||||
}
|
||||
return int(deployment.Status.Replicas)
|
||||
return int(deployment.Status.ReadyReplicas)
|
||||
case KindReplicaSet:
|
||||
rs, err := rc.framework.ClientSet.Extensions().ReplicaSets(rc.framework.Namespace.Name).Get(rc.name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
if rs == nil {
|
||||
framework.Failf(rsIsNil)
|
||||
}
|
||||
return int(rs.Status.Replicas)
|
||||
return int(rs.Status.ReadyReplicas)
|
||||
default:
|
||||
framework.Failf(invalidKind)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int) {
|
||||
duration := 15 * time.Minute
|
||||
func (rc *ResourceConsumer) WaitForReplicas(desiredReplicas int, duration time.Duration) {
|
||||
interval := 20 * time.Second
|
||||
err := wait.PollImmediate(interval, duration, func() (bool, error) {
|
||||
replicas := rc.GetReplicas()
|
||||
|
@ -89,7 +89,7 @@ func testStackdriverMonitoring(f *framework.Framework, pods, allPodsCPU int, per
|
||||
rc := common.NewDynamicResourceConsumer(rcName, common.KindDeployment, pods, allPodsCPU, memoryUsed, 0, perPodCPU, memoryLimit, f)
|
||||
defer rc.CleanUp()
|
||||
|
||||
rc.WaitForReplicas(pods)
|
||||
rc.WaitForReplicas(pods, 15*time.Minute)
|
||||
|
||||
metricsMap := map[string]bool{}
|
||||
pollingFunction := checkForMetrics(projectId, gcmService, time.Now(), metricsMap, allPodsCPU, perPodCPU)
|
||||
|
@ -18,6 +18,7 @@ package upgrades
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
autoscalingv1 "k8s.io/kubernetes/pkg/apis/autoscaling/v1"
|
||||
"k8s.io/kubernetes/test/e2e/common"
|
||||
@ -72,22 +73,23 @@ func (t *HPAUpgradeTest) Teardown(f *framework.Framework) {
|
||||
}
|
||||
|
||||
func (t *HPAUpgradeTest) test() {
|
||||
const timeToWait = 15 * time.Minute
|
||||
t.rc.Resume()
|
||||
|
||||
By(fmt.Sprintf("HPA scales to 1 replica: consume 10 millicores, target per pod 100 millicores, min pods 1."))
|
||||
t.rc.ConsumeCPU(10) /* millicores */
|
||||
By(fmt.Sprintf("HPA waits for 1 replica"))
|
||||
t.rc.WaitForReplicas(1)
|
||||
t.rc.WaitForReplicas(1, timeToWait)
|
||||
|
||||
By(fmt.Sprintf("HPA scales to 3 replicas: consume 250 millicores, target per pod 100 millicores."))
|
||||
t.rc.ConsumeCPU(250) /* millicores */
|
||||
By(fmt.Sprintf("HPA waits for 3 replicas"))
|
||||
t.rc.WaitForReplicas(3)
|
||||
t.rc.WaitForReplicas(3, timeToWait)
|
||||
|
||||
By(fmt.Sprintf("HPA scales to 5 replicas: consume 700 millicores, target per pod 100 millicores, max pods 5."))
|
||||
t.rc.ConsumeCPU(700) /* millicores */
|
||||
By(fmt.Sprintf("HPA waits for 5 replicas"))
|
||||
t.rc.WaitForReplicas(5)
|
||||
t.rc.WaitForReplicas(5, timeToWait)
|
||||
|
||||
// We need to pause background goroutines as during upgrade master is unavailable and requests issued by them fail.
|
||||
t.rc.Pause()
|
||||
|
Loading…
Reference in New Issue
Block a user