mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
All code must use the context from Ginkgo when doing API calls or polling for a change, otherwise the code would not return immediately when the test gets aborted.
125 lines
5.9 KiB
Go
125 lines
5.9 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package autoscaling
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"time"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
|
e2eautoscaling "k8s.io/kubernetes/test/e2e/framework/autoscaling"
|
|
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
|
admissionapi "k8s.io/pod-security-admission/api"
|
|
|
|
"github.com/onsi/ginkgo/v2"
|
|
"github.com/onsi/gomega/gmeasure"
|
|
)
|
|
|
|
var _ = SIGDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling", func() {
|
|
f := framework.NewDefaultFramework("autoscaling")
|
|
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
|
var experiment *gmeasure.Experiment
|
|
|
|
ginkgo.Describe("Autoscaling a service", func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
|
|
_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{})
|
|
if err != nil {
|
|
e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled")
|
|
}
|
|
experiment = gmeasure.NewExperiment("Autoscaling a service")
|
|
ginkgo.AddReportEntry(experiment.Name, experiment)
|
|
})
|
|
|
|
ginkgo.Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
|
|
const nodesNum = 3 // Expect there to be 3 nodes before and after the test.
|
|
var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test.
|
|
var nodes *v1.NodeList // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes.
|
|
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
// Make sure there is only 1 node group, otherwise this test becomes useless.
|
|
nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")
|
|
if len(nodeGroups) != 1 {
|
|
e2eskipper.Skipf("test expects 1 node group, found %d", len(nodeGroups))
|
|
}
|
|
nodeGroupName = nodeGroups[0]
|
|
|
|
// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless.
|
|
nodeGroupSize, err := framework.GroupSize(nodeGroupName)
|
|
framework.ExpectNoError(err)
|
|
if nodeGroupSize != nodesNum {
|
|
e2eskipper.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize)
|
|
}
|
|
|
|
// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state.
|
|
nodes, err = e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
|
|
framework.ExpectNoError(err)
|
|
schedulableCount := len(nodes.Items)
|
|
framework.ExpectEqual(schedulableCount, nodeGroupSize, "not all nodes are schedulable")
|
|
})
|
|
|
|
ginkgo.AfterEach(func(ctx context.Context) {
|
|
// Attempt cleanup only if a node group was targeted for scale up.
|
|
// Otherwise the test was probably skipped and we'll get a gcloud error due to invalid parameters.
|
|
if len(nodeGroupName) > 0 {
|
|
// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
|
|
framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
|
|
framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, nodesNum, 15*time.Minute))
|
|
}
|
|
})
|
|
|
|
ginkgo.It("takes less than 15 minutes", func(ctx context.Context) {
|
|
// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
|
|
const timeToWait = 15 * time.Minute
|
|
|
|
// Calculate the CPU request of the service.
|
|
// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
|
|
// Make it so that 'nodesNum' pods fit perfectly per node.
|
|
nodeCpus := nodes.Items[0].Status.Allocatable[v1.ResourceCPU]
|
|
nodeCPUMillis := (&nodeCpus).MilliValue()
|
|
cpuRequestMillis := int64(nodeCPUMillis / nodesNum)
|
|
|
|
// Start the service we want to scale and wait for it to be up and running.
|
|
nodeMemoryBytes := nodes.Items[0].Status.Allocatable[v1.ResourceMemory]
|
|
nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
|
|
memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's allocatable memory.
|
|
replicas := 1
|
|
resourceConsumer := e2eautoscaling.NewDynamicResourceConsumer(ctx, "resource-consumer", f.Namespace.Name, e2eautoscaling.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f.ClientSet, f.ScalesGetter, e2eautoscaling.Disable, e2eautoscaling.Idle)
|
|
ginkgo.DeferCleanup(resourceConsumer.CleanUp)
|
|
resourceConsumer.WaitForReplicas(ctx, replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
|
|
|
|
// Enable Horizontal Pod Autoscaler with 50% target utilization and
|
|
// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
|
|
targetCPUUtilizationPercent := int32(50)
|
|
hpa := e2eautoscaling.CreateCPUResourceHorizontalPodAutoscaler(ctx, resourceConsumer, targetCPUUtilizationPercent, 1, 10)
|
|
ginkgo.DeferCleanup(e2eautoscaling.DeleteHorizontalPodAutoscaler, resourceConsumer, hpa.Name)
|
|
cpuLoad := 8 * cpuRequestMillis * int64(targetCPUUtilizationPercent) / 100 // 8 pods utilized to the target level
|
|
resourceConsumer.ConsumeCPU(int(cpuLoad))
|
|
|
|
// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
|
|
experiment.SampleDuration("total scale-up time", func(idx int) {
|
|
resourceConsumer.WaitForReplicas(ctx, 8, timeToWait)
|
|
}, gmeasure.SamplingConfig{N: 1})
|
|
}) // Increase to run the test more than once.
|
|
})
|
|
})
|
|
})
|