Merge pull request #50910 from wasylkowski/autoscaler-test-6

Automatic merge from submit-queue

Added an end-to-end test ensuring that Cluster Autoscaler does not scale up when all pending pods are unschedulable

**What this PR does / why we need it**:

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2017-08-29 06:00:03 -07:00 committed by GitHub
commit 28f6b3fcc0
6 changed files with 105 additions and 44 deletions

View File

@ -70,7 +70,7 @@ var _ = SIGDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling"
AfterEach(func() { AfterEach(func() {
// Scale down back to only 'nodesNum' nodes, as expected at the start of the test. // Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum)) framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, nodesNum, 15*time.Minute)) framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, nodesNum, 15*time.Minute))
}) })
Measure("takes less than 15 minutes", func(b Benchmarker) { Measure("takes less than 15 minutes", func(b Benchmarker) {

View File

@ -19,6 +19,7 @@ package autoscaling
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"strings" "strings"
"time" "time"
@ -88,7 +89,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
} }
} }
framework.ExpectNoError(framework.WaitForClusterSize(c, sum, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, sum, scaleUpTimeout))
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
nodeCount = len(nodes.Items) nodeCount = len(nodes.Items)
@ -113,7 +114,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
AfterEach(func() { AfterEach(func() {
By(fmt.Sprintf("Restoring initial size of the cluster")) By(fmt.Sprintf("Restoring initial size of the cluster"))
setMigSizes(originalSizes) setMigSizes(originalSizes)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount, scaleDownTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount, scaleDownTimeout))
nodes, err := c.Core().Nodes().List(metav1.ListOptions{}) nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
s := time.Now() s := time.Now()
@ -159,8 +160,11 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
It("should scale up twice [Feature:ClusterAutoscalerScalability2]", func() { It("should scale up twice [Feature:ClusterAutoscalerScalability2]", func() {
perNodeReservation := int(float64(memCapacityMb) * 0.95) perNodeReservation := int(float64(memCapacityMb) * 0.95)
replicasPerNode := 10 replicasPerNode := 10
additionalNodes1 := int(0.7 * maxNodes) additionalNodes1 := int(math.Ceil(0.7 * maxNodes))
additionalNodes2 := int(0.25 * maxNodes) additionalNodes2 := int(math.Ceil(0.25 * maxNodes))
if additionalNodes1+additionalNodes2 > maxNodes {
additionalNodes2 = maxNodes - additionalNodes1
}
replicas1 := additionalNodes1 * replicasPerNode replicas1 := additionalNodes1 * replicasPerNode
replicas2 := additionalNodes2 * replicasPerNode replicas2 := additionalNodes2 * replicasPerNode
@ -168,7 +172,8 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
glog.Infof("cores per node: %v", coresPerNode) glog.Infof("cores per node: %v", coresPerNode)
// saturate cluster // saturate cluster
reservationCleanup := ReserveMemory(f, "some-pod", nodeCount, nodeCount*perNodeReservation, true, memoryReservationTimeout) initialReplicas := nodeCount
reservationCleanup := ReserveMemory(f, "some-pod", initialReplicas, nodeCount*perNodeReservation, true, memoryReservationTimeout)
defer reservationCleanup() defer reservationCleanup()
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
@ -179,10 +184,10 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
expectedResult := createClusterPredicates(nodeCount + additionalNodes1) expectedResult := createClusterPredicates(nodeCount + additionalNodes1)
config := createScaleUpTestConfig(nodeCount, nodeCount, rcConfig, expectedResult) config := createScaleUpTestConfig(nodeCount, nodeCount, rcConfig, expectedResult)
epsilon := 0.05
// run test #1 // run test #1
testCleanup1 := simpleScaleUpTestWithEpsilon(f, config, epsilon) tolerateUnreadyNodes := additionalNodes1 / 20
tolerateUnreadyPods := (initialReplicas + replicas1) / 20
testCleanup1 := simpleScaleUpTestWithTolerance(f, config, tolerateUnreadyNodes, tolerateUnreadyPods)
defer testCleanup1() defer testCleanup1()
glog.Infof("Scaled up once") glog.Infof("Scaled up once")
@ -193,7 +198,9 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
config2 := createScaleUpTestConfig(nodeCount+additionalNodes1, nodeCount+additionalNodes2, rcConfig2, expectedResult2) config2 := createScaleUpTestConfig(nodeCount+additionalNodes1, nodeCount+additionalNodes2, rcConfig2, expectedResult2)
// run test #2 // run test #2
testCleanup2 := simpleScaleUpTestWithEpsilon(f, config2, epsilon) tolerateUnreadyNodes = maxNodes / 20
tolerateUnreadyPods = (initialReplicas + replicas1 + replicas2) / 20
testCleanup2 := simpleScaleUpTestWithTolerance(f, config2, tolerateUnreadyNodes, tolerateUnreadyPods)
defer testCleanup2() defer testCleanup2()
glog.Infof("Scaled up twice") glog.Infof("Scaled up twice")
@ -201,7 +208,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
It("should scale down empty nodes [Feature:ClusterAutoscalerScalability3]", func() { It("should scale down empty nodes [Feature:ClusterAutoscalerScalability3]", func() {
perNodeReservation := int(float64(memCapacityMb) * 0.7) perNodeReservation := int(float64(memCapacityMb) * 0.7)
replicas := int(float64(maxNodes) * 0.7) replicas := int(math.Ceil(maxNodes * 0.7))
totalNodes := maxNodes totalNodes := maxNodes
// resize cluster to totalNodes // resize cluster to totalNodes
@ -209,13 +216,15 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
anyKey(originalSizes): totalNodes, anyKey(originalSizes): totalNodes,
} }
setMigSizes(newSizes) setMigSizes(newSizes)
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
// run replicas // run replicas
rcConfig := reserveMemoryRCConfig(f, "some-pod", replicas, replicas*perNodeReservation, largeScaleUpTimeout) rcConfig := reserveMemoryRCConfig(f, "some-pod", replicas, replicas*perNodeReservation, largeScaleUpTimeout)
expectedResult := createClusterPredicates(totalNodes) expectedResult := createClusterPredicates(totalNodes)
config := createScaleUpTestConfig(totalNodes, totalNodes, rcConfig, expectedResult) config := createScaleUpTestConfig(totalNodes, totalNodes, rcConfig, expectedResult)
testCleanup := simpleScaleUpTestWithEpsilon(f, config, 0.1) tolerateUnreadyNodes := totalNodes / 10
tolerateUnreadyPods := replicas / 10
testCleanup := simpleScaleUpTestWithTolerance(f, config, tolerateUnreadyNodes, tolerateUnreadyPods)
defer testCleanup() defer testCleanup()
// check if empty nodes are scaled down // check if empty nodes are scaled down
@ -241,7 +250,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
} }
setMigSizes(newSizes) setMigSizes(newSizes)
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
// annotate all nodes with no-scale-down // annotate all nodes with no-scale-down
ScaleDownDisabledKey := "cluster-autoscaler.kubernetes.io/scale-down-disabled" ScaleDownDisabledKey := "cluster-autoscaler.kubernetes.io/scale-down-disabled"
@ -295,7 +304,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
anyKey(originalSizes): totalNodes, anyKey(originalSizes): totalNodes,
} }
setMigSizes(newSizes) setMigSizes(newSizes)
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
divider := int(float64(totalNodes) * 0.7) divider := int(float64(totalNodes) * 0.7)
fullNodesCount := divider fullNodesCount := divider
underutilizedNodesCount := totalNodes - fullNodesCount underutilizedNodesCount := totalNodes - fullNodesCount
@ -321,6 +330,41 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
Expect(len(nodes.Items)).Should(Equal(totalNodes)) Expect(len(nodes.Items)).Should(Equal(totalNodes))
}) })
Specify("CA ignores unschedulable pods while scheduling schedulable pods [Feature:ClusterAutoscalerScalability6]", func() {
// Start a number of pods saturating existing nodes.
perNodeReservation := int(float64(memCapacityMb) * 0.80)
replicasPerNode := 10
initialPodReplicas := nodeCount * replicasPerNode
initialPodsTotalMemory := nodeCount * perNodeReservation
reservationCleanup := ReserveMemory(f, "initial-pod", initialPodReplicas, initialPodsTotalMemory, true /* wait for pods to run */, memoryReservationTimeout)
defer reservationCleanup()
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
// Configure a number of unschedulable pods.
unschedulableMemReservation := memCapacityMb * 2
unschedulablePodReplicas := 1000
totalMemReservation := unschedulableMemReservation * unschedulablePodReplicas
timeToWait := 5 * time.Minute
podsConfig := reserveMemoryRCConfig(f, "unschedulable-pod", unschedulablePodReplicas, totalMemReservation, timeToWait)
framework.RunRC(*podsConfig) // Ignore error (it will occur because pods are unschedulable)
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, podsConfig.Name)
// Ensure that no new nodes have been added so far.
Expect(framework.NumberOfReadyNodes(f.ClientSet)).To(Equal(nodeCount))
// Start a number of schedulable pods to ensure CA reacts.
additionalNodes := maxNodes - nodeCount
replicas := additionalNodes * replicasPerNode
totalMemory := additionalNodes * perNodeReservation
rcConfig := reserveMemoryRCConfig(f, "extra-pod", replicas, totalMemory, largeScaleUpTimeout)
expectedResult := createClusterPredicates(nodeCount + additionalNodes)
config := createScaleUpTestConfig(nodeCount, initialPodReplicas, rcConfig, expectedResult)
// Test that scale up happens, allowing 1000 unschedulable pods not to be scheduled.
testCleanup := simpleScaleUpTestWithTolerance(f, config, 0, unschedulablePodReplicas)
defer testCleanup()
})
}) })
func makeUnschedulable(f *framework.Framework, nodes []v1.Node) error { func makeUnschedulable(f *framework.Framework, nodes []v1.Node) error {
@ -350,24 +394,24 @@ func anyKey(input map[string]int) string {
return "" return ""
} }
func simpleScaleUpTestWithEpsilon(f *framework.Framework, config *scaleUpTestConfig, epsilon float64) func() error { func simpleScaleUpTestWithTolerance(f *framework.Framework, config *scaleUpTestConfig, tolerateMissingNodeCount int, tolerateMissingPodCount int) func() error {
// resize cluster to start size // resize cluster to start size
// run rc based on config // run rc based on config
By(fmt.Sprintf("Running RC %v from config", config.extraPods.Name)) By(fmt.Sprintf("Running RC %v from config", config.extraPods.Name))
start := time.Now() start := time.Now()
framework.ExpectNoError(framework.RunRC(*config.extraPods)) framework.ExpectNoError(framework.RunRC(*config.extraPods))
// check results // check results
if epsilon > 0 && epsilon < 1 { if tolerateMissingNodeCount > 0 {
// Tolerate some number of nodes not to be created. // Tolerate some number of nodes not to be created.
minExpectedNodeCount := int(float64(config.expectedResult.nodes) - epsilon*float64(config.expectedResult.nodes)) minExpectedNodeCount := config.expectedResult.nodes - tolerateMissingNodeCount
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size >= minExpectedNodeCount }, scaleUpTimeout)) func(size int) bool { return size >= minExpectedNodeCount }, scaleUpTimeout))
} else { } else {
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, config.expectedResult.nodes, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, config.expectedResult.nodes, scaleUpTimeout))
} }
glog.Infof("cluster is increased") glog.Infof("cluster is increased")
if epsilon > 0 && epsilon < 0 { if tolerateMissingPodCount > 0 {
framework.ExpectNoError(waitForCaPodsReadyInNamespace(f, f.ClientSet, int(epsilon*float64(config.extraPods.Replicas)+1))) framework.ExpectNoError(waitForCaPodsReadyInNamespace(f, f.ClientSet, tolerateMissingPodCount))
} else { } else {
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, f.ClientSet)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, f.ClientSet))
} }
@ -378,7 +422,7 @@ func simpleScaleUpTestWithEpsilon(f *framework.Framework, config *scaleUpTestCon
} }
func simpleScaleUpTest(f *framework.Framework, config *scaleUpTestConfig) func() error { func simpleScaleUpTest(f *framework.Framework, config *scaleUpTestConfig) func() error {
return simpleScaleUpTestWithEpsilon(f, config, 0) return simpleScaleUpTestWithTolerance(f, config, 0, 0)
} }
func reserveMemoryRCConfig(f *framework.Framework, id string, replicas, megabytes int, timeout time.Duration) *testutils.RCConfig { func reserveMemoryRCConfig(f *framework.Framework, id string, replicas, megabytes int, timeout time.Duration) *testutils.RCConfig {

View File

@ -97,7 +97,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
sum += size sum += size
} }
// Give instances time to spin up // Give instances time to spin up
framework.ExpectNoError(framework.WaitForClusterSize(c, sum, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, sum, scaleUpTimeout))
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
nodeCount = len(nodes.Items) nodeCount = len(nodes.Items)
@ -127,7 +127,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
for _, size := range originalSizes { for _, size := range originalSizes {
expectedNodes += size expectedNodes += size
} }
framework.ExpectNoError(framework.WaitForClusterSize(c, expectedNodes, scaleDownTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, expectedNodes, scaleDownTimeout))
nodes, err := c.Core().Nodes().List(metav1.ListOptions{}) nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
@ -226,7 +226,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).") glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")
By("Get memory available on new node, so we can account for it when creating RC") By("Get memory available on new node, so we can account for it when creating RC")
@ -253,7 +253,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2))
}) })
@ -283,7 +283,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod") defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod")
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
}) })
It("should increase cluster size if pod requesting EmptyDir volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should increase cluster size if pod requesting EmptyDir volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() {
@ -304,7 +304,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod") defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod")
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
}) })
It("should increase cluster size if pod requesting volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should increase cluster size if pod requesting volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() {
@ -377,7 +377,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
}() }()
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c)) framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
}) })
It("should add node to the particular mig [Feature:ClusterSizeAutoscalingScaleUp]", func() { It("should add node to the particular mig [Feature:ClusterSizeAutoscalingScaleUp]", func() {
@ -478,7 +478,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
const extraPoolName = "extra-pool" const extraPoolName = "extra-pool"
addNodePool(extraPoolName, "n1-standard-4", 1) addNodePool(extraPoolName, "n1-standard-4", 1)
defer deleteNodePool(extraPoolName) defer deleteNodePool(extraPoolName)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2)) framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool") By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool")
@ -489,7 +489,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
// reseting all the timers in scale down code. Adding 5 extra minutes to workaround // reseting all the timers in scale down code. Adding 5 extra minutes to workaround
// this issue. // this issue.
// TODO: Remove the extra time when GKE restart is fixed. // TODO: Remove the extra time when GKE restart is fixed.
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+2, scaleUpTimeout+5*time.Minute)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+2, scaleUpTimeout+5*time.Minute))
}) })
simpleScaleDownTest := func(unready int) { simpleScaleDownTest := func(unready int) {
@ -588,7 +588,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
} }
err := framework.ResizeGroup(minMig, int32(0)) err := framework.ResizeGroup(minMig, int32(0))
framework.ExpectNoError(err) framework.ExpectNoError(err)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount-minSize, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize, resizeTimeout))
By("Make remaining nodes unschedulable") By("Make remaining nodes unschedulable")
nodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ nodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
@ -628,7 +628,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
} }
err := framework.ResizeGroup(minMig, int32(1)) err := framework.ResizeGroup(minMig, int32(1))
framework.ExpectNoError(err) framework.ExpectNoError(err)
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount-minSize+1, resizeTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize+1, resizeTimeout))
By("Make the single node unschedulable") By("Make the single node unschedulable")
allNodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ allNodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
@ -699,7 +699,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
} }
testFunction() testFunction()
// Give nodes time to recover from network failure // Give nodes time to recover from network failure
framework.ExpectNoError(framework.WaitForClusterSize(c, len(nodes.Items), nodesRecoverTimeout)) framework.ExpectNoError(framework.WaitForReadyNodes(c, len(nodes.Items), nodesRecoverTimeout))
}) })
}) })
@ -937,7 +937,7 @@ func ReserveMemory(f *framework.Framework, id string, replicas, megabytes int, e
return nil return nil
} }
// WaitForClusterSize waits until the cluster size matches the given function. // WaitForClusterSizeFunc waits until the cluster size matches the given function.
func WaitForClusterSizeFunc(c clientset.Interface, sizeFunc func(int) bool, timeout time.Duration) error { func WaitForClusterSizeFunc(c clientset.Interface, sizeFunc func(int) bool, timeout time.Duration) error {
return WaitForClusterSizeFuncWithUnready(c, sizeFunc, timeout, 0) return WaitForClusterSizeFuncWithUnready(c, sizeFunc, timeout, 0)
} }

View File

@ -151,7 +151,7 @@ var _ = SIGDescribe("DNS horizontal autoscaling", func() {
By("Restoring cluster size") By("Restoring cluster size")
setMigSizes(originalSizes) setMigSizes(originalSizes)
Expect(framework.WaitForClusterSize(c, sum, scaleDownTimeout)).NotTo(HaveOccurred()) Expect(framework.WaitForReadyNodes(c, sum, scaleDownTimeout)).NotTo(HaveOccurred())
By("Wait for kube-dns scaled to expected number") By("Wait for kube-dns scaled to expected number")
Expect(waitForDNSReplicasSatisfied(c, getExpectReplicasLinear, DNSdefaultTimeout)).NotTo(HaveOccurred()) Expect(waitForDNSReplicasSatisfied(c, getExpectReplicasLinear, DNSdefaultTimeout)).NotTo(HaveOccurred())

View File

@ -3892,9 +3892,26 @@ func WaitForControllerManagerUp() error {
return fmt.Errorf("waiting for controller-manager timed out") return fmt.Errorf("waiting for controller-manager timed out")
} }
// WaitForClusterSize waits until the cluster has desired size and there is no not-ready nodes in it. // Returns number of ready Nodes excluding Master Node.
func NumberOfReadyNodes(c clientset.Interface) (int, error) {
nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
"spec.unschedulable": "false",
}.AsSelector().String()})
if err != nil {
Logf("Failed to list nodes: %v", err)
return 0, err
}
// Filter out not-ready nodes.
FilterNodes(nodes, func(node v1.Node) bool {
return IsNodeConditionSetAsExpected(&node, v1.NodeReady, true)
})
return len(nodes.Items), nil
}
// WaitForReadyNodes waits until the cluster has desired size and there is no not-ready nodes in it.
// By cluster size we mean number of Nodes excluding Master Node. // By cluster size we mean number of Nodes excluding Master Node.
func WaitForClusterSize(c clientset.Interface, size int, timeout time.Duration) error { func WaitForReadyNodes(c clientset.Interface, size int, timeout time.Duration) error {
for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) { for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{ nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
"spec.unschedulable": "false", "spec.unschedulable": "false",
@ -3912,12 +3929,12 @@ func WaitForClusterSize(c clientset.Interface, size int, timeout time.Duration)
numReady := len(nodes.Items) numReady := len(nodes.Items)
if numNodes == size && numReady == size { if numNodes == size && numReady == size {
Logf("Cluster has reached the desired size %d", size) Logf("Cluster has reached the desired number of ready nodes %d", size)
return nil return nil
} }
Logf("Waiting for cluster size %d, current size %d, not ready nodes %d", size, numNodes, numNodes-numReady) Logf("Waiting for ready nodes %d, current ready %d, not ready nodes %d", size, numNodes, numNodes-numReady)
} }
return fmt.Errorf("timeout waiting %v for cluster size to be %d", timeout, size) return fmt.Errorf("timeout waiting %v for number of ready nodes to be %d", timeout, size)
} }
func GenerateMasterRegexp(prefix string) string { func GenerateMasterRegexp(prefix string) string {

View File

@ -98,7 +98,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
framework.Failf("Couldn't restore the original node instance group size: %v", err) framework.Failf("Couldn't restore the original node instance group size: %v", err)
} }
if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil { if err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
framework.Failf("Couldn't restore the original cluster size: %v", err) framework.Failf("Couldn't restore the original cluster size: %v", err)
} }
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until // Many e2e tests assume that the cluster is fully healthy before they start. Wait until
@ -124,7 +124,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForGroupSize(group, replicas-1) err = framework.WaitForGroupSize(group, replicas-1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute) err = framework.WaitForReadyNodes(c, int(replicas-1), 10*time.Minute)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By("waiting 1 minute for the watch in the podGC to catch up, remove any pods scheduled on " + By("waiting 1 minute for the watch in the podGC to catch up, remove any pods scheduled on " +
@ -152,7 +152,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForGroupSize(group, replicas+1) err = framework.WaitForGroupSize(group, replicas+1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute) err = framework.WaitForReadyNodes(c, int(replicas+1), 10*time.Minute)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1)) By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))