Fix cluster autoscaler test to support regional clusters.

This commit is contained in:
Beata Skiba 2018-01-15 10:27:56 +01:00
parent 0dda5c8a7b
commit 329feee0e9

View File

@ -246,36 +246,41 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
It("should increase cluster size if pending pods are small and there is another node pool that is not autoscaled [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should increase cluster size if pending pods are small and there is another node pool that is not autoscaled [Feature:ClusterSizeAutoscalingScaleUp]", func() {
framework.SkipUnlessProviderIs("gke") framework.SkipUnlessProviderIs("gke")
By("Creating new node-pool with one n1-standard-4 machine") By("Creating new node-pool with n1-standard-4 machines")
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).") glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")
By("Get memory available on new node, so we can account for it when creating RC") By("Getting memory available on new nodes, so we can account for it when creating RC")
nodes := getPoolNodes(f, extraPoolName) nodes := getPoolNodes(f, extraPoolName)
Expect(len(nodes)).Should(Equal(1)) Expect(len(nodes)).Should(Equal(extraNodes))
extraMem := nodes[0].Status.Capacity[v1.ResourceMemory] extraMemMb := 0
extraMemMb := int((&extraMem).Value() / 1024 / 1024) for _, node := range nodes {
mem := node.Status.Capacity[v1.ResourceMemory]
extraMemMb += int((&mem).Value() / 1024 / 1024)
}
ReserveMemory(f, "memory-reservation", 100, nodeCount*memAllocatableMb+extraMemMb, false, defaultTimeout) By("Reserving 0.1x more memory than the cluster holds to trigger scale up")
totalMemoryReservation := int(1.1 * float64(nodeCount*memAllocatableMb+extraMemMb))
ReserveMemory(f, "memory-reservation", 100, totalMemoryReservation, false, defaultTimeout)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation") defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
// Verify, that cluster size is increased // Verify, that cluster size is increased
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout)) func(size int) bool { return size >= nodeCount+extraNodes+1 }, scaleUpTimeout))
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
}) })
It("should disable node pool autoscaling [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should disable node pool autoscaling [Feature:ClusterSizeAutoscalingScaleUp]", func() {
framework.SkipUnlessProviderIs("gke") framework.SkipUnlessProviderIs("gke")
By("Creating new node-pool with one n1-standard-4 machine") By("Creating new node-pool with n1-standard-4 machines")
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2))
}) })
@ -505,23 +510,25 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
It("should scale up correct target pool [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should scale up correct target pool [Feature:ClusterSizeAutoscalingScaleUp]", func() {
framework.SkipUnlessProviderIs("gke") framework.SkipUnlessProviderIs("gke")
By("Creating new node-pool with one n1-standard-4 machine") By("Creating new node-pool with n1-standard-4 machines")
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
defer disableAutoscaler(extraPoolName, 1, 2) defer disableAutoscaler(extraPoolName, 1, 2)
By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool") extraPods := extraNodes + 1
ReserveMemory(f, "memory-reservation", 2, int(2.5*float64(memAllocatableMb)), false, defaultTimeout) totalMemoryReservation := int(float64(extraPods) * 1.5 * float64(memAllocatableMb))
By(fmt.Sprintf("Creating rc with %v pods too big to fit default-pool but fitting extra-pool", extraPods))
ReserveMemory(f, "memory-reservation", extraPods, totalMemoryReservation, false, defaultTimeout)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation") defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "memory-reservation")
// Apparently GKE master is restarted couple minutes after the node pool is added // Apparently GKE master is restarted couple minutes after the node pool is added
// reseting all the timers in scale down code. Adding 5 extra minutes to workaround // reseting all the timers in scale down code. Adding 5 extra minutes to workaround
// this issue. // this issue.
// TODO: Remove the extra time when GKE restart is fixed. // TODO: Remove the extra time when GKE restart is fixed.
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+2, scaleUpTimeout+5*time.Minute)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes+1, scaleUpTimeout+5*time.Minute))
}) })
simpleScaleDownTest := func(unready int) { simpleScaleDownTest := func(unready int) {
@ -559,11 +566,11 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
increasedSize := manuallyIncreaseClusterSize(f, originalSizes) increasedSize := manuallyIncreaseClusterSize(f, originalSizes)
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-1", 3) extraNodes := addNodePool(extraPoolName, "n1-standard-1", 3)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size >= increasedSize+3 }, scaleUpTimeout)) func(size int) bool { return size >= increasedSize+extraNodes }, scaleUpTimeout))
By("Some node should be removed") By("Some node should be removed")
// Apparently GKE master is restarted couple minutes after the node pool is added // Apparently GKE master is restarted couple minutes after the node pool is added
@ -571,7 +578,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
// this issue. // this issue.
// TODO: Remove the extra time when GKE restart is fixed. // TODO: Remove the extra time when GKE restart is fixed.
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size < increasedSize+3 }, scaleDownTimeout+10*time.Minute)) func(size int) bool { return size < increasedSize+extraNodes }, scaleDownTimeout+10*time.Minute))
}) })
It("should be able to scale down when rescheduling a pod is required and pdb allows for it[Feature:ClusterSizeAutoscalingScaleDown]", func() { It("should be able to scale down when rescheduling a pod is required and pdb allows for it[Feature:ClusterSizeAutoscalingScaleDown]", func() {
@ -672,23 +679,25 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
// verify the targeted node pool/MIG is of size 0 // verify the targeted node pool/MIG is of size 0
gkeScaleToZero := func() { gkeScaleToZero := func() {
// GKE-specific setup // GKE-specific setup
By("Add a new node pool with 1 node and min size 0") By("Add a new node pool with size 1 and min size 0")
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) extraNodes := addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+extraNodes, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1)) framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1))
defer disableAutoscaler(extraPoolName, 0, 1) defer disableAutoscaler(extraPoolName, 0, 1)
ngNodes := getPoolNodes(f, extraPoolName) ngNodes := getPoolNodes(f, extraPoolName)
Expect(len(ngNodes) == 1).To(BeTrue()) Expect(len(ngNodes)).To(Equal(extraNodes))
node := ngNodes[0] for _, node := range ngNodes {
By(fmt.Sprintf("Target node for scale-down: %s", node.Name)) By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
}
// this part is identical for _, node := range ngNodes {
drainNode(f, node) drainNode(f, node)
}
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size < nodeCount+1 }, scaleDownTimeout)) func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
// GKE-specific check // GKE-specific check
newSize := getPoolSize(f, extraPoolName) newSize := getPoolSize(f, extraPoolName)
@ -1051,7 +1060,7 @@ func getClusterLocation() string {
} }
} }
func getGcloudCommand(commandTrack string, args []string) []string { func getGcloudCommandFromTrack(commandTrack string, args []string) []string {
command := []string{"gcloud"} command := []string{"gcloud"}
if commandTrack == "beta" || commandTrack == "alpha" { if commandTrack == "beta" || commandTrack == "alpha" {
command = append(command, commandTrack) command = append(command, commandTrack)
@ -1062,6 +1071,14 @@ func getGcloudCommand(commandTrack string, args []string) []string {
return command return command
} }
func getGcloudCommand(args []string) []string {
track := ""
if isRegionalCluster() {
track = "beta"
}
return getGcloudCommandFromTrack(track, args)
}
func isRegionalCluster() bool { func isRegionalCluster() bool {
// TODO(bskiba): Use an appropriate indicator that the cluster is regional. // TODO(bskiba): Use an appropriate indicator that the cluster is regional.
return framework.TestContext.CloudConfig.MultiZone return framework.TestContext.CloudConfig.MultiZone
@ -1075,11 +1092,7 @@ func enableAutoscaler(nodePool string, minCount, maxCount int) error {
"--min-nodes=" + strconv.Itoa(minCount), "--min-nodes=" + strconv.Itoa(minCount),
"--max-nodes=" + strconv.Itoa(maxCount), "--max-nodes=" + strconv.Itoa(maxCount),
"--node-pool=" + nodePool} "--node-pool=" + nodePool}
track := "" output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
if isRegionalCluster() {
track = "beta"
}
output, err := execCmd(getGcloudCommand(track, args)...).CombinedOutput()
if err != nil { if err != nil {
glog.Errorf("Failed config update result: %s", output) glog.Errorf("Failed config update result: %s", output)
@ -1103,11 +1116,7 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
args := []string{"container", "clusters", "update", framework.TestContext.CloudConfig.Cluster, args := []string{"container", "clusters", "update", framework.TestContext.CloudConfig.Cluster,
"--no-enable-autoscaling", "--no-enable-autoscaling",
"--node-pool=" + nodePool} "--node-pool=" + nodePool}
track := "" output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
if isRegionalCluster() {
track = "beta"
}
output, err := execCmd(getGcloudCommand(track, args)...).CombinedOutput()
if err != nil { if err != nil {
glog.Errorf("Failed config update result: %s", output) glog.Errorf("Failed config update result: %s", output)
@ -1219,10 +1228,8 @@ func disableAutoprovisioning() error {
func getNAPNodePools() ([]string, error) { func getNAPNodePools() ([]string, error) {
if framework.ProviderIs("gke") { if framework.ProviderIs("gke") {
output, err := exec.Command("gcloud", "container", "node-pools", "list", args := []string{"container", "node-pools", "list", "--cluster=" + framework.TestContext.CloudConfig.Cluster}
"--project="+framework.TestContext.CloudConfig.ProjectID, output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
"--zone="+framework.TestContext.CloudConfig.Zone,
"--cluster="+framework.TestContext.CloudConfig.Cluster).CombinedOutput()
if err != nil { if err != nil {
glog.Errorf("Failed to get instance groups: %v", string(output)) glog.Errorf("Failed to get instance groups: %v", string(output))
return nil, err return nil, err
@ -1278,21 +1285,23 @@ func waitTillAllNAPNodePoolsAreRemoved() error {
return err return err
} }
func addNodePool(name string, machineType string, numNodes int) { // Returns size of the newly added node pool
func addNodePool(name string, machineType string, numNodes int) int {
args := []string{"container", "node-pools", "create", name, "--quiet", args := []string{"container", "node-pools", "create", name, "--quiet",
"--machine-type=" + machineType, "--machine-type=" + machineType,
"--num-nodes=" + strconv.Itoa(numNodes), "--num-nodes=" + strconv.Itoa(numNodes),
"--cluster=" + framework.TestContext.CloudConfig.Cluster} "--cluster=" + framework.TestContext.CloudConfig.Cluster}
output, err := execCmd(getGcloudCommand("alpha", args)...).CombinedOutput() output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
glog.Infof("Creating node-pool %s: %s", name, output) glog.Infof("Creating node-pool %s: %s", name, output)
framework.ExpectNoError(err) framework.ExpectNoError(err)
return getPoolInitialSize(name)
} }
func deleteNodePool(name string) { func deleteNodePool(name string) {
glog.Infof("Deleting node pool %s", name) glog.Infof("Deleting node pool %s", name)
args := []string{"container", "node-pools", "delete", name, "--quiet", args := []string{"container", "node-pools", "delete", name, "--quiet",
"--cluster=" + framework.TestContext.CloudConfig.Cluster} "--cluster=" + framework.TestContext.CloudConfig.Cluster}
output, err := execCmd(getGcloudCommand("alpha", args)...).CombinedOutput() output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
if err != nil { if err != nil {
glog.Infof("Error: %v", err) glog.Infof("Error: %v", err)
} }
@ -1310,6 +1319,32 @@ func getPoolNodes(f *framework.Framework, poolName string) []*v1.Node {
return nodes return nodes
} }
// getPoolInitialSize returns the initial size of the node pool taking into
// account that it may span multiple zones. In that case, node pool consists of
// multiple migs all containing initialNodeCount nodes.
func getPoolInitialSize(poolName string) int {
// get initial node count
args := []string{"container", "node-pools", "describe", poolName, "--quiet",
"--cluster=" + framework.TestContext.CloudConfig.Cluster,
"--format=value(initialNodeCount)"}
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
glog.Infof("Node-pool initial size: %s", output)
framework.ExpectNoError(err)
fields := strings.Fields(string(output))
Expect(len(fields)).Should(Equal(1))
size, err := strconv.ParseInt(fields[0], 10, 64)
framework.ExpectNoError(err)
// get number of node pools
args = []string{"container", "node-pools", "describe", poolName, "--quiet",
"--cluster=" + framework.TestContext.CloudConfig.Cluster,
"--format=value(instanceGroupUrls)"}
output, err = execCmd(getGcloudCommand(args)...).CombinedOutput()
framework.ExpectNoError(err)
nodeGroupCount := len(strings.Split(string(output), ";"))
return int(size) * nodeGroupCount
}
func getPoolSize(f *framework.Framework, poolName string) int { func getPoolSize(f *framework.Framework, poolName string) int {
size := 0 size := 0
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet) nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)