mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 05:27:21 +00:00
Fix cluster autoscaler test to support regional clusters.
This commit is contained in:
parent
0dda5c8a7b
commit
329feee0e9
@ -246,36 +246,41 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
It("should increase cluster size if pending pods are small and there is another node pool that is not autoscaled [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
It("should increase cluster size if pending pods are small and there is another node pool that is not autoscaled [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||||
framework.SkipUnlessProviderIs("gke")
|
framework.SkipUnlessProviderIs("gke")
|
||||||
|
|
||||||
By("Creating new node-pool with one n1-standard-4 machine")
|
By("Creating new node-pool with n1-standard-4 machines")
|
||||||
const extraPoolName = "extra-pool"
|
const extraPoolName = "extra-pool"
|
||||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||||
defer deleteNodePool(extraPoolName)
|
defer deleteNodePool(extraPoolName)
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
|
||||||
glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")
|
glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")
|
||||||
|
|
||||||
By("Get memory available on new node, so we can account for it when creating RC")
|
By("Getting memory available on new nodes, so we can account for it when creating RC")
|
||||||
nodes := getPoolNodes(f, extraPoolName)
|
nodes := getPoolNodes(f, extraPoolName)
|
||||||
Expect(len(nodes)).Should(Equal(1))
|
Expect(len(nodes)).Should(Equal(extraNodes))
|
||||||
extraMem := nodes[0].Status.Capacity[v1.ResourceMemory]
|
extraMemMb := 0
|
||||||
extraMemMb := int((&extraMem).Value() / 1024 / 1024)
|
for _, node := range nodes {
|
||||||
|
mem := node.Status.Capacity[v1.ResourceMemory]
|
||||||
|
extraMemMb += int((&mem).Value() / 1024 / 1024)
|
||||||
|
}
|
||||||
|
|
||||||
ReserveMemory(f, "memory-reservation", 100, nodeCount*memAllocatableMb+extraMemMb, false, defaultTimeout)
|
By("Reserving 0.1x more memory than the cluster holds to trigger scale up")
|
||||||
|
totalMemoryReservation := int(1.1 * float64(nodeCount*memAllocatableMb+extraMemMb))
|
||||||
|
ReserveMemory(f, "memory-reservation", 100, totalMemoryReservation, false, defaultTimeout)
|
||||||
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
|
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
|
||||||
|
|
||||||
// Verify, that cluster size is increased
|
// Verify, that cluster size is increased
|
||||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout))
|
func(size int) bool { return size >= nodeCount+extraNodes+1 }, scaleUpTimeout))
|
||||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should disable node pool autoscaling [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
It("should disable node pool autoscaling [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||||
framework.SkipUnlessProviderIs("gke")
|
framework.SkipUnlessProviderIs("gke")
|
||||||
|
|
||||||
By("Creating new node-pool with one n1-standard-4 machine")
|
By("Creating new node-pool with n1-standard-4 machines")
|
||||||
const extraPoolName = "extra-pool"
|
const extraPoolName = "extra-pool"
|
||||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||||
defer deleteNodePool(extraPoolName)
|
defer deleteNodePool(extraPoolName)
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
|
||||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
||||||
framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2))
|
framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2))
|
||||||
})
|
})
|
||||||
@ -505,23 +510,25 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
It("should scale up correct target pool [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
It("should scale up correct target pool [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||||
framework.SkipUnlessProviderIs("gke")
|
framework.SkipUnlessProviderIs("gke")
|
||||||
|
|
||||||
By("Creating new node-pool with one n1-standard-4 machine")
|
By("Creating new node-pool with n1-standard-4 machines")
|
||||||
const extraPoolName = "extra-pool"
|
const extraPoolName = "extra-pool"
|
||||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||||
defer deleteNodePool(extraPoolName)
|
defer deleteNodePool(extraPoolName)
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
|
||||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
||||||
defer disableAutoscaler(extraPoolName, 1, 2)
|
defer disableAutoscaler(extraPoolName, 1, 2)
|
||||||
|
|
||||||
By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool")
|
extraPods := extraNodes + 1
|
||||||
ReserveMemory(f, "memory-reservation", 2, int(2.5*float64(memAllocatableMb)), false, defaultTimeout)
|
totalMemoryReservation := int(float64(extraPods) * 1.5 * float64(memAllocatableMb))
|
||||||
|
By(fmt.Sprintf("Creating rc with %v pods too big to fit default-pool but fitting extra-pool", extraPods))
|
||||||
|
ReserveMemory(f, "memory-reservation", extraPods, totalMemoryReservation, false, defaultTimeout)
|
||||||
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
|
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
|
||||||
|
|
||||||
// Apparently GKE master is restarted couple minutes after the node pool is added
|
// Apparently GKE master is restarted couple minutes after the node pool is added
|
||||||
// reseting all the timers in scale down code. Adding 5 extra minutes to workaround
|
// reseting all the timers in scale down code. Adding 5 extra minutes to workaround
|
||||||
// this issue.
|
// this issue.
|
||||||
// TODO: Remove the extra time when GKE restart is fixed.
|
// TODO: Remove the extra time when GKE restart is fixed.
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+2, scaleUpTimeout+5*time.Minute))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes+1, scaleUpTimeout+5*time.Minute))
|
||||||
})
|
})
|
||||||
|
|
||||||
simpleScaleDownTest := func(unready int) {
|
simpleScaleDownTest := func(unready int) {
|
||||||
@ -559,11 +566,11 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
increasedSize := manuallyIncreaseClusterSize(f, originalSizes)
|
increasedSize := manuallyIncreaseClusterSize(f, originalSizes)
|
||||||
|
|
||||||
const extraPoolName = "extra-pool"
|
const extraPoolName = "extra-pool"
|
||||||
addNodePool(extraPoolName, "n1-standard-1", 3)
|
extraNodes := addNodePool(extraPoolName, "n1-standard-1", 3)
|
||||||
defer deleteNodePool(extraPoolName)
|
defer deleteNodePool(extraPoolName)
|
||||||
|
|
||||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
func(size int) bool { return size >= increasedSize+3 }, scaleUpTimeout))
|
func(size int) bool { return size >= increasedSize+extraNodes }, scaleUpTimeout))
|
||||||
|
|
||||||
By("Some node should be removed")
|
By("Some node should be removed")
|
||||||
// Apparently GKE master is restarted couple minutes after the node pool is added
|
// Apparently GKE master is restarted couple minutes after the node pool is added
|
||||||
@ -571,7 +578,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
// this issue.
|
// this issue.
|
||||||
// TODO: Remove the extra time when GKE restart is fixed.
|
// TODO: Remove the extra time when GKE restart is fixed.
|
||||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
func(size int) bool { return size < increasedSize+3 }, scaleDownTimeout+10*time.Minute))
|
func(size int) bool { return size < increasedSize+extraNodes }, scaleDownTimeout+10*time.Minute))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should be able to scale down when rescheduling a pod is required and pdb allows for it[Feature:ClusterSizeAutoscalingScaleDown]", func() {
|
It("should be able to scale down when rescheduling a pod is required and pdb allows for it[Feature:ClusterSizeAutoscalingScaleDown]", func() {
|
||||||
@ -672,23 +679,25 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
// verify the targeted node pool/MIG is of size 0
|
// verify the targeted node pool/MIG is of size 0
|
||||||
gkeScaleToZero := func() {
|
gkeScaleToZero := func() {
|
||||||
// GKE-specific setup
|
// GKE-specific setup
|
||||||
By("Add a new node pool with 1 node and min size 0")
|
By("Add a new node pool with size 1 and min size 0")
|
||||||
const extraPoolName = "extra-pool"
|
const extraPoolName = "extra-pool"
|
||||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||||
defer deleteNodePool(extraPoolName)
|
defer deleteNodePool(extraPoolName)
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
|
||||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1))
|
framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1))
|
||||||
defer disableAutoscaler(extraPoolName, 0, 1)
|
defer disableAutoscaler(extraPoolName, 0, 1)
|
||||||
|
|
||||||
ngNodes := getPoolNodes(f, extraPoolName)
|
ngNodes := getPoolNodes(f, extraPoolName)
|
||||||
Expect(len(ngNodes) == 1).To(BeTrue())
|
Expect(len(ngNodes)).To(Equal(extraNodes))
|
||||||
node := ngNodes[0]
|
for _, node := range ngNodes {
|
||||||
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
|
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
|
||||||
|
}
|
||||||
|
|
||||||
// this part is identical
|
for _, node := range ngNodes {
|
||||||
drainNode(f, node)
|
drainNode(f, node)
|
||||||
|
}
|
||||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
func(size int) bool { return size < nodeCount+1 }, scaleDownTimeout))
|
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
|
||||||
|
|
||||||
// GKE-specific check
|
// GKE-specific check
|
||||||
newSize := getPoolSize(f, extraPoolName)
|
newSize := getPoolSize(f, extraPoolName)
|
||||||
@ -1051,7 +1060,7 @@ func getClusterLocation() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getGcloudCommand(commandTrack string, args []string) []string {
|
func getGcloudCommandFromTrack(commandTrack string, args []string) []string {
|
||||||
command := []string{"gcloud"}
|
command := []string{"gcloud"}
|
||||||
if commandTrack == "beta" || commandTrack == "alpha" {
|
if commandTrack == "beta" || commandTrack == "alpha" {
|
||||||
command = append(command, commandTrack)
|
command = append(command, commandTrack)
|
||||||
@ -1062,6 +1071,14 @@ func getGcloudCommand(commandTrack string, args []string) []string {
|
|||||||
return command
|
return command
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getGcloudCommand(args []string) []string {
|
||||||
|
track := ""
|
||||||
|
if isRegionalCluster() {
|
||||||
|
track = "beta"
|
||||||
|
}
|
||||||
|
return getGcloudCommandFromTrack(track, args)
|
||||||
|
}
|
||||||
|
|
||||||
func isRegionalCluster() bool {
|
func isRegionalCluster() bool {
|
||||||
// TODO(bskiba): Use an appropriate indicator that the cluster is regional.
|
// TODO(bskiba): Use an appropriate indicator that the cluster is regional.
|
||||||
return framework.TestContext.CloudConfig.MultiZone
|
return framework.TestContext.CloudConfig.MultiZone
|
||||||
@ -1075,11 +1092,7 @@ func enableAutoscaler(nodePool string, minCount, maxCount int) error {
|
|||||||
"--min-nodes=" + strconv.Itoa(minCount),
|
"--min-nodes=" + strconv.Itoa(minCount),
|
||||||
"--max-nodes=" + strconv.Itoa(maxCount),
|
"--max-nodes=" + strconv.Itoa(maxCount),
|
||||||
"--node-pool=" + nodePool}
|
"--node-pool=" + nodePool}
|
||||||
track := ""
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
if isRegionalCluster() {
|
|
||||||
track = "beta"
|
|
||||||
}
|
|
||||||
output, err := execCmd(getGcloudCommand(track, args)...).CombinedOutput()
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed config update result: %s", output)
|
glog.Errorf("Failed config update result: %s", output)
|
||||||
@ -1103,11 +1116,7 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
|
|||||||
args := []string{"container", "clusters", "update", framework.TestContext.CloudConfig.Cluster,
|
args := []string{"container", "clusters", "update", framework.TestContext.CloudConfig.Cluster,
|
||||||
"--no-enable-autoscaling",
|
"--no-enable-autoscaling",
|
||||||
"--node-pool=" + nodePool}
|
"--node-pool=" + nodePool}
|
||||||
track := ""
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
if isRegionalCluster() {
|
|
||||||
track = "beta"
|
|
||||||
}
|
|
||||||
output, err := execCmd(getGcloudCommand(track, args)...).CombinedOutput()
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed config update result: %s", output)
|
glog.Errorf("Failed config update result: %s", output)
|
||||||
@ -1219,10 +1228,8 @@ func disableAutoprovisioning() error {
|
|||||||
|
|
||||||
func getNAPNodePools() ([]string, error) {
|
func getNAPNodePools() ([]string, error) {
|
||||||
if framework.ProviderIs("gke") {
|
if framework.ProviderIs("gke") {
|
||||||
output, err := exec.Command("gcloud", "container", "node-pools", "list",
|
args := []string{"container", "node-pools", "list", "--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
||||||
"--project="+framework.TestContext.CloudConfig.ProjectID,
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
"--zone="+framework.TestContext.CloudConfig.Zone,
|
|
||||||
"--cluster="+framework.TestContext.CloudConfig.Cluster).CombinedOutput()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Failed to get instance groups: %v", string(output))
|
glog.Errorf("Failed to get instance groups: %v", string(output))
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -1278,21 +1285,23 @@ func waitTillAllNAPNodePoolsAreRemoved() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func addNodePool(name string, machineType string, numNodes int) {
|
// Returns size of the newly added node pool
|
||||||
|
func addNodePool(name string, machineType string, numNodes int) int {
|
||||||
args := []string{"container", "node-pools", "create", name, "--quiet",
|
args := []string{"container", "node-pools", "create", name, "--quiet",
|
||||||
"--machine-type=" + machineType,
|
"--machine-type=" + machineType,
|
||||||
"--num-nodes=" + strconv.Itoa(numNodes),
|
"--num-nodes=" + strconv.Itoa(numNodes),
|
||||||
"--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
"--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
||||||
output, err := execCmd(getGcloudCommand("alpha", args)...).CombinedOutput()
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
glog.Infof("Creating node-pool %s: %s", name, output)
|
glog.Infof("Creating node-pool %s: %s", name, output)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
|
return getPoolInitialSize(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
func deleteNodePool(name string) {
|
func deleteNodePool(name string) {
|
||||||
glog.Infof("Deleting node pool %s", name)
|
glog.Infof("Deleting node pool %s", name)
|
||||||
args := []string{"container", "node-pools", "delete", name, "--quiet",
|
args := []string{"container", "node-pools", "delete", name, "--quiet",
|
||||||
"--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
"--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
||||||
output, err := execCmd(getGcloudCommand("alpha", args)...).CombinedOutput()
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Infof("Error: %v", err)
|
glog.Infof("Error: %v", err)
|
||||||
}
|
}
|
||||||
@ -1310,6 +1319,32 @@ func getPoolNodes(f *framework.Framework, poolName string) []*v1.Node {
|
|||||||
return nodes
|
return nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getPoolInitialSize returns the initial size of the node pool taking into
|
||||||
|
// account that it may span multiple zones. In that case, node pool consists of
|
||||||
|
// multiple migs all containing initialNodeCount nodes.
|
||||||
|
func getPoolInitialSize(poolName string) int {
|
||||||
|
// get initial node count
|
||||||
|
args := []string{"container", "node-pools", "describe", poolName, "--quiet",
|
||||||
|
"--cluster=" + framework.TestContext.CloudConfig.Cluster,
|
||||||
|
"--format=value(initialNodeCount)"}
|
||||||
|
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
|
glog.Infof("Node-pool initial size: %s", output)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
fields := strings.Fields(string(output))
|
||||||
|
Expect(len(fields)).Should(Equal(1))
|
||||||
|
size, err := strconv.ParseInt(fields[0], 10, 64)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
// get number of node pools
|
||||||
|
args = []string{"container", "node-pools", "describe", poolName, "--quiet",
|
||||||
|
"--cluster=" + framework.TestContext.CloudConfig.Cluster,
|
||||||
|
"--format=value(instanceGroupUrls)"}
|
||||||
|
output, err = execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
nodeGroupCount := len(strings.Split(string(output), ";"))
|
||||||
|
return int(size) * nodeGroupCount
|
||||||
|
}
|
||||||
|
|
||||||
func getPoolSize(f *framework.Framework, poolName string) int {
|
func getPoolSize(f *framework.Framework, poolName string) int {
|
||||||
size := 0
|
size := 0
|
||||||
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||||
|
Loading…
Reference in New Issue
Block a user