Merge pull request #53326 from aleksandra-malinowska/scale-to-0-test-gke

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Remake cluster size autoscaling scale to zero test

This PR affects only cluster size autoscaling test suite. Changes:
* check whether autoscaling for is enabled by looking for a node group with a given max number of nodes instead of min as the field is omitted if value is 0
* split scale to zero test into GKE & GCE versions, add GKE-specific setup and verification
This commit is contained in:
Kubernetes Submit Queue 2017-10-02 08:36:28 -07:00 committed by GitHub
commit 83d3137e25

View File

@ -66,6 +66,7 @@ const (
gkeEndpoint = "https://test-container.sandbox.googleapis.com"
gkeUpdateTimeout = 15 * time.Minute
gkeNodepoolNameKey = "cloud.google.com/gke-nodepool"
disabledTaint = "DisabledForAutoscalingTest"
criticalAddonsOnlyTaint = "CriticalAddonsOnly"
@ -110,7 +111,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
Expect(nodeCount).Should(Equal(sum))
if framework.ProviderIs("gke") {
val, err := isAutoscalerEnabled(3)
val, err := isAutoscalerEnabled(5)
framework.ExpectNoError(err)
if !val {
err = enableAutoscaler("default-pool", 3, 5)
@ -619,21 +620,43 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
})
It("Should be able to scale a node group down to 0[Feature:ClusterSizeAutoscalingScaleDown]", func() {
// Determine whether we want to run & adjust the setup if necessary
if len(originalSizes) < 2 {
if framework.ProviderIs("gke") {
By("Adding a new node pool")
// Scale to 0 test is split into two functions (for GKE & GCE.)
// The reason for it is that scenario is exactly the same,
// but setup & verification use different APIs.
//
// Scenario:
// (GKE only) add an extra node pool with size 1 & enable autoscaling for it
// (GCE only) find the smallest MIG & resize it to 1
// manually drain the single node from this node pool/MIG
// wait for cluster size to decrease
// verify the targeted node pool/MIG is of size 0
gkeScaleToZero := func() {
// GKE-specific setup
By("Add a new node pool with 1 node and min size 0")
const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1))
} else {
framework.Skipf("At least 2 node groups are needed for scale-to-0 tests")
}
err := enableAutoscaler(extraPoolName, 0, 1)
framework.ExpectNoError(err)
ngNodes := getPoolNodes(f, extraPoolName)
Expect(len(ngNodes) == 1).To(BeTrue())
node := ngNodes[0]
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
// this part is identical
drainNode(f, node)
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size < nodeCount+1 }, scaleDownTimeout))
// GKE-specific check
newSize := getPoolSize(f, extraPoolName)
Expect(newSize).Should(Equal(0))
}
gceScaleToZero := func() {
// non-GKE only
By("Find smallest node group and manually scale it to a single node")
minMig := ""
minSize := nodeCount
@ -646,35 +669,32 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
err := framework.ResizeGroup(minMig, int32(1))
framework.ExpectNoError(err)
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize+1, resizeTimeout))
By("Make the single node unschedulable")
allNodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
"spec.unschedulable": "false",
}.AsSelector().String()})
framework.ExpectNoError(err)
ngNodes, err := framework.GetGroupNodes(minMig)
framework.ExpectNoError(err)
By(fmt.Sprintf("Target nodes for scale-down: %s", ngNodes))
Expect(len(ngNodes) == 1).To(BeTrue())
node, err := f.ClientSet.Core().Nodes().Get(ngNodes[0], metav1.GetOptions{})
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
framework.ExpectNoError(err)
makeNodeUnschedulable(f.ClientSet, node)
By("Manually drain the single node")
podOpts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()}
pods, err := c.Core().Pods(metav1.NamespaceAll).List(podOpts)
framework.ExpectNoError(err)
for _, pod := range pods.Items {
err = f.ClientSet.Core().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
// this part is identical
drainNode(f, node)
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size < nodeCount-minSize+1 }, scaleDownTimeout))
// non-GKE only
newSize, err := framework.GroupSize(minMig)
framework.ExpectNoError(err)
Expect(newSize).Should(Equal(0))
}
By("The node should be removed")
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size < len(allNodes.Items) }, scaleDownTimeout))
minSize, err = framework.GroupSize(minMig)
framework.ExpectNoError(err)
Expect(minSize).Should(Equal(0))
It("Should be able to scale a node group down to 0[Feature:ClusterSizeAutoscalingScaleDown]", func() {
if framework.ProviderIs("gke") { // In GKE, we can just add a node pool
gkeScaleToZero()
} else if len(originalSizes) < 2 {
gceScaleToZero()
} else {
framework.Skipf("At least 2 node groups are needed for scale-to-0 tests")
}
})
It("Shouldn't perform scale up operation and should list unhealthy status if most of the cluster is broken[Feature:ClusterSizeAutoscalingScaleUp]", func() {
@ -775,7 +795,7 @@ func getGKEClusterUrl() string {
token)
}
func isAutoscalerEnabled(expectedMinNodeCountInTargetPool int) (bool, error) {
func isAutoscalerEnabled(expectedMaxNodeCountInTargetPool int) (bool, error) {
resp, err := http.Get(getGKEClusterUrl())
if err != nil {
return false, err
@ -786,7 +806,7 @@ func isAutoscalerEnabled(expectedMinNodeCountInTargetPool int) (bool, error) {
return false, err
}
strBody := string(body)
if strings.Contains(strBody, "\"minNodeCount\": "+strconv.Itoa(expectedMinNodeCountInTargetPool)) {
if strings.Contains(strBody, "\"maxNodeCount\": "+strconv.Itoa(expectedMaxNodeCountInTargetPool)) {
return true, nil
}
return false, nil
@ -833,7 +853,7 @@ func enableAutoscaler(nodePool string, minCount, maxCount int) error {
var finalErr error
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
val, err := isAutoscalerEnabled(minCount)
val, err := isAutoscalerEnabled(maxCount)
if err == nil && val {
return nil
}
@ -880,7 +900,7 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
}
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
if val, err := isAutoscalerEnabled(minCount); err == nil && !val {
if val, err := isAutoscalerEnabled(maxCount); err == nil && !val {
return nil
}
}
@ -914,13 +934,24 @@ func getPoolNodes(f *framework.Framework, poolName string) []*v1.Node {
nodes := make([]*v1.Node, 0, 1)
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
for _, node := range nodeList.Items {
if poolLabel := node.Labels["cloud.google.com/gke-nodepool"]; poolLabel == poolName {
if node.Labels[gkeNodepoolNameKey] == poolName {
nodes = append(nodes, &node)
}
}
return nodes
}
func getPoolSize(f *framework.Framework, poolName string) int {
size := 0
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
for _, node := range nodeList.Items {
if node.Labels[gkeNodepoolNameKey] == poolName {
size++
}
}
return size
}
func doPut(url, content string) (string, error) {
req, err := http.NewRequest("PUT", url, bytes.NewBuffer([]byte(content)))
if err != nil {
@ -1075,6 +1106,20 @@ func setMigSizes(sizes map[string]int) bool {
return madeChanges
}
func drainNode(f *framework.Framework, node *v1.Node) {
By("Make the single node unschedulable")
makeNodeUnschedulable(f.ClientSet, node)
By("Manually drain the single node")
podOpts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()}
pods, err := f.ClientSet.Core().Pods(metav1.NamespaceAll).List(podOpts)
framework.ExpectNoError(err)
for _, pod := range pods.Items {
err = f.ClientSet.Core().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
framework.ExpectNoError(err)
}
}
func makeNodeUnschedulable(c clientset.Interface, node *v1.Node) error {
By(fmt.Sprintf("Taint node %s", node.Name))
for j := 0; j < 3; j++ {