mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #53326 from aleksandra-malinowska/scale-to-0-test-gke
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Remake cluster size autoscaling scale to zero test This PR affects only cluster size autoscaling test suite. Changes: * check whether autoscaling for is enabled by looking for a node group with a given max number of nodes instead of min as the field is omitted if value is 0 * split scale to zero test into GKE & GCE versions, add GKE-specific setup and verification
This commit is contained in:
commit
83d3137e25
@ -64,8 +64,9 @@ const (
|
|||||||
makeSchedulableDelay = 20 * time.Second
|
makeSchedulableDelay = 20 * time.Second
|
||||||
freshStatusLimit = 20 * time.Second
|
freshStatusLimit = 20 * time.Second
|
||||||
|
|
||||||
gkeEndpoint = "https://test-container.sandbox.googleapis.com"
|
gkeEndpoint = "https://test-container.sandbox.googleapis.com"
|
||||||
gkeUpdateTimeout = 15 * time.Minute
|
gkeUpdateTimeout = 15 * time.Minute
|
||||||
|
gkeNodepoolNameKey = "cloud.google.com/gke-nodepool"
|
||||||
|
|
||||||
disabledTaint = "DisabledForAutoscalingTest"
|
disabledTaint = "DisabledForAutoscalingTest"
|
||||||
criticalAddonsOnlyTaint = "CriticalAddonsOnly"
|
criticalAddonsOnlyTaint = "CriticalAddonsOnly"
|
||||||
@ -110,7 +111,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
Expect(nodeCount).Should(Equal(sum))
|
Expect(nodeCount).Should(Equal(sum))
|
||||||
|
|
||||||
if framework.ProviderIs("gke") {
|
if framework.ProviderIs("gke") {
|
||||||
val, err := isAutoscalerEnabled(3)
|
val, err := isAutoscalerEnabled(5)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
if !val {
|
if !val {
|
||||||
err = enableAutoscaler("default-pool", 3, 5)
|
err = enableAutoscaler("default-pool", 3, 5)
|
||||||
@ -619,21 +620,43 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("Should be able to scale a node group down to 0[Feature:ClusterSizeAutoscalingScaleDown]", func() {
|
// Scale to 0 test is split into two functions (for GKE & GCE.)
|
||||||
// Determine whether we want to run & adjust the setup if necessary
|
// The reason for it is that scenario is exactly the same,
|
||||||
if len(originalSizes) < 2 {
|
// but setup & verification use different APIs.
|
||||||
if framework.ProviderIs("gke") {
|
//
|
||||||
By("Adding a new node pool")
|
// Scenario:
|
||||||
const extraPoolName = "extra-pool"
|
// (GKE only) add an extra node pool with size 1 & enable autoscaling for it
|
||||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
// (GCE only) find the smallest MIG & resize it to 1
|
||||||
defer deleteNodePool(extraPoolName)
|
// manually drain the single node from this node pool/MIG
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
// wait for cluster size to decrease
|
||||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 0, 1))
|
// verify the targeted node pool/MIG is of size 0
|
||||||
} else {
|
gkeScaleToZero := func() {
|
||||||
framework.Skipf("At least 2 node groups are needed for scale-to-0 tests")
|
// GKE-specific setup
|
||||||
}
|
By("Add a new node pool with 1 node and min size 0")
|
||||||
}
|
const extraPoolName = "extra-pool"
|
||||||
|
addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||||
|
defer deleteNodePool(extraPoolName)
|
||||||
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
||||||
|
err := enableAutoscaler(extraPoolName, 0, 1)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
ngNodes := getPoolNodes(f, extraPoolName)
|
||||||
|
Expect(len(ngNodes) == 1).To(BeTrue())
|
||||||
|
node := ngNodes[0]
|
||||||
|
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
|
||||||
|
|
||||||
|
// this part is identical
|
||||||
|
drainNode(f, node)
|
||||||
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
|
func(size int) bool { return size < nodeCount+1 }, scaleDownTimeout))
|
||||||
|
|
||||||
|
// GKE-specific check
|
||||||
|
newSize := getPoolSize(f, extraPoolName)
|
||||||
|
Expect(newSize).Should(Equal(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
gceScaleToZero := func() {
|
||||||
|
// non-GKE only
|
||||||
By("Find smallest node group and manually scale it to a single node")
|
By("Find smallest node group and manually scale it to a single node")
|
||||||
minMig := ""
|
minMig := ""
|
||||||
minSize := nodeCount
|
minSize := nodeCount
|
||||||
@ -646,35 +669,32 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||||||
err := framework.ResizeGroup(minMig, int32(1))
|
err := framework.ResizeGroup(minMig, int32(1))
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize+1, resizeTimeout))
|
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize+1, resizeTimeout))
|
||||||
|
|
||||||
By("Make the single node unschedulable")
|
|
||||||
allNodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
|
|
||||||
"spec.unschedulable": "false",
|
|
||||||
}.AsSelector().String()})
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
ngNodes, err := framework.GetGroupNodes(minMig)
|
ngNodes, err := framework.GetGroupNodes(minMig)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
By(fmt.Sprintf("Target nodes for scale-down: %s", ngNodes))
|
|
||||||
Expect(len(ngNodes) == 1).To(BeTrue())
|
Expect(len(ngNodes) == 1).To(BeTrue())
|
||||||
node, err := f.ClientSet.Core().Nodes().Get(ngNodes[0], metav1.GetOptions{})
|
node, err := f.ClientSet.Core().Nodes().Get(ngNodes[0], metav1.GetOptions{})
|
||||||
|
By(fmt.Sprintf("Target node for scale-down: %s", node.Name))
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
makeNodeUnschedulable(f.ClientSet, node)
|
|
||||||
|
|
||||||
By("Manually drain the single node")
|
// this part is identical
|
||||||
podOpts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()}
|
drainNode(f, node)
|
||||||
pods, err := c.Core().Pods(metav1.NamespaceAll).List(podOpts)
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
for _, pod := range pods.Items {
|
|
||||||
err = f.ClientSet.Core().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
By("The node should be removed")
|
|
||||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||||
func(size int) bool { return size < len(allNodes.Items) }, scaleDownTimeout))
|
func(size int) bool { return size < nodeCount-minSize+1 }, scaleDownTimeout))
|
||||||
minSize, err = framework.GroupSize(minMig)
|
|
||||||
|
// non-GKE only
|
||||||
|
newSize, err := framework.GroupSize(minMig)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
Expect(minSize).Should(Equal(0))
|
Expect(newSize).Should(Equal(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
It("Should be able to scale a node group down to 0[Feature:ClusterSizeAutoscalingScaleDown]", func() {
|
||||||
|
if framework.ProviderIs("gke") { // In GKE, we can just add a node pool
|
||||||
|
gkeScaleToZero()
|
||||||
|
} else if len(originalSizes) < 2 {
|
||||||
|
gceScaleToZero()
|
||||||
|
} else {
|
||||||
|
framework.Skipf("At least 2 node groups are needed for scale-to-0 tests")
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
It("Shouldn't perform scale up operation and should list unhealthy status if most of the cluster is broken[Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
It("Shouldn't perform scale up operation and should list unhealthy status if most of the cluster is broken[Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||||
@ -775,7 +795,7 @@ func getGKEClusterUrl() string {
|
|||||||
token)
|
token)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isAutoscalerEnabled(expectedMinNodeCountInTargetPool int) (bool, error) {
|
func isAutoscalerEnabled(expectedMaxNodeCountInTargetPool int) (bool, error) {
|
||||||
resp, err := http.Get(getGKEClusterUrl())
|
resp, err := http.Get(getGKEClusterUrl())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
@ -786,7 +806,7 @@ func isAutoscalerEnabled(expectedMinNodeCountInTargetPool int) (bool, error) {
|
|||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
strBody := string(body)
|
strBody := string(body)
|
||||||
if strings.Contains(strBody, "\"minNodeCount\": "+strconv.Itoa(expectedMinNodeCountInTargetPool)) {
|
if strings.Contains(strBody, "\"maxNodeCount\": "+strconv.Itoa(expectedMaxNodeCountInTargetPool)) {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
@ -833,7 +853,7 @@ func enableAutoscaler(nodePool string, minCount, maxCount int) error {
|
|||||||
|
|
||||||
var finalErr error
|
var finalErr error
|
||||||
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
|
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
|
||||||
val, err := isAutoscalerEnabled(minCount)
|
val, err := isAutoscalerEnabled(maxCount)
|
||||||
if err == nil && val {
|
if err == nil && val {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -880,7 +900,7 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
|
for startTime := time.Now(); startTime.Add(gkeUpdateTimeout).After(time.Now()); time.Sleep(30 * time.Second) {
|
||||||
if val, err := isAutoscalerEnabled(minCount); err == nil && !val {
|
if val, err := isAutoscalerEnabled(maxCount); err == nil && !val {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -914,13 +934,24 @@ func getPoolNodes(f *framework.Framework, poolName string) []*v1.Node {
|
|||||||
nodes := make([]*v1.Node, 0, 1)
|
nodes := make([]*v1.Node, 0, 1)
|
||||||
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
if poolLabel := node.Labels["cloud.google.com/gke-nodepool"]; poolLabel == poolName {
|
if node.Labels[gkeNodepoolNameKey] == poolName {
|
||||||
nodes = append(nodes, &node)
|
nodes = append(nodes, &node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nodes
|
return nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getPoolSize(f *framework.Framework, poolName string) int {
|
||||||
|
size := 0
|
||||||
|
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||||
|
for _, node := range nodeList.Items {
|
||||||
|
if node.Labels[gkeNodepoolNameKey] == poolName {
|
||||||
|
size++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
|
||||||
func doPut(url, content string) (string, error) {
|
func doPut(url, content string) (string, error) {
|
||||||
req, err := http.NewRequest("PUT", url, bytes.NewBuffer([]byte(content)))
|
req, err := http.NewRequest("PUT", url, bytes.NewBuffer([]byte(content)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1075,6 +1106,20 @@ func setMigSizes(sizes map[string]int) bool {
|
|||||||
return madeChanges
|
return madeChanges
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func drainNode(f *framework.Framework, node *v1.Node) {
|
||||||
|
By("Make the single node unschedulable")
|
||||||
|
makeNodeUnschedulable(f.ClientSet, node)
|
||||||
|
|
||||||
|
By("Manually drain the single node")
|
||||||
|
podOpts := metav1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()}
|
||||||
|
pods, err := f.ClientSet.Core().Pods(metav1.NamespaceAll).List(podOpts)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
err = f.ClientSet.Core().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func makeNodeUnschedulable(c clientset.Interface, node *v1.Node) error {
|
func makeNodeUnschedulable(c clientset.Interface, node *v1.Node) error {
|
||||||
By(fmt.Sprintf("Taint node %s", node.Name))
|
By(fmt.Sprintf("Taint node %s", node.Name))
|
||||||
for j := 0; j < 3; j++ {
|
for j := 0; j < 3; j++ {
|
||||||
|
Loading…
Reference in New Issue
Block a user