mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #66504 from aleksandra-malinowska/remove-nap-tests
Automatic merge from submit-queue (batch tested with PRs 66352, 66504). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Remove outdated autoscaling e2e tests. Remove outdated autoscaling e2e tests. /cc @bskiba @MaciekPytel /sig-autoscaling ```release-note NONE ```
This commit is contained in:
commit
c91d441343
@ -130,16 +130,10 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
||||
err = enableAutoscaler("default-pool", 3, 5)
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
|
||||
}
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if framework.ProviderIs("gke") {
|
||||
By("Remove changes introduced by NAP tests")
|
||||
removeNAPNodePools()
|
||||
disableAutoprovisioning()
|
||||
}
|
||||
By(fmt.Sprintf("Restoring initial size of the cluster"))
|
||||
setMigSizes(originalSizes)
|
||||
expectedNodes := 0
|
||||
@ -326,29 +320,6 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
||||
Expect(len(getPoolNodes(f, gpuPoolName))).Should(Equal(0))
|
||||
})
|
||||
|
||||
// TODO consider moving to [Feature:ClusterSizeAutoscalingGpu] as soon as NAP goes out of beta. Currently
|
||||
// project needed to run the NAP tests require whitelisting for NAP alpha
|
||||
It("NAP should add a pool with GPUs if unschedulable POD which require GPU exists [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
|
||||
framework.SkipUnlessProviderIs("gke")
|
||||
|
||||
installNvidiaDriversDaemonSet()
|
||||
framework.ExpectNoError(enableAutoprovisioning(`
|
||||
"resource_limits":{"resource_type":"nvidia-tesla-k80", "minimum":0, "maximum": 3},
|
||||
"resource_limits":{"resource_type":"cpu", "minimum":0, "maximum":64},
|
||||
"resource_limits":{"resource_type":"memory", "minimum":0, "maximum":1000000000000}`))
|
||||
|
||||
By("Schedule a pod which requires GPU and wait until it is started")
|
||||
framework.ExpectNoError(scheduleGpuPod(f, "gpu-pod-rc"))
|
||||
defer framework.DeleteRCAndWaitForGC(f.ClientSet, f.Namespace.Name, "gpu-pod-rc")
|
||||
|
||||
By("Verify cluster size increased")
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+1 }, scaleUpTimeout))
|
||||
|
||||
By("Check if NAP group was created")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
|
||||
})
|
||||
|
||||
It("should increase cluster size if pending pods are small and one node is broken [Feature:ClusterSizeAutoscalingScaleUp]",
|
||||
func() {
|
||||
framework.TestUnderTemporaryNetworkFailure(c, "default", getAnyNode(c), func() { simpleScaleUpTest(1) })
|
||||
@ -934,103 +905,6 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, len(nodes.Items), nodesRecoverTimeout))
|
||||
})
|
||||
|
||||
It("should add new node and new node pool on too big pod, scale down to 1 and scale down to 0 [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
|
||||
framework.SkipUnlessProviderIs("gke")
|
||||
framework.ExpectNoError(enableAutoprovisioning(""))
|
||||
By("Create first pod")
|
||||
cleanupFunc1 := ReserveMemory(f, "memory-reservation1", 1, int(1.1*float64(memAllocatableMb)), true, defaultTimeout)
|
||||
defer func() {
|
||||
if cleanupFunc1 != nil {
|
||||
cleanupFunc1()
|
||||
}
|
||||
}()
|
||||
By("Waiting for scale up")
|
||||
// Verify that cluster size increased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+1 }, defaultTimeout))
|
||||
By("Check if NAP group was created")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
|
||||
By("Create second pod")
|
||||
cleanupFunc2 := ReserveMemory(f, "memory-reservation2", 1, int(1.1*float64(memAllocatableMb)), true, defaultTimeout)
|
||||
defer func() {
|
||||
if cleanupFunc2 != nil {
|
||||
cleanupFunc2()
|
||||
}
|
||||
}()
|
||||
By("Waiting for scale up")
|
||||
// Verify that cluster size increased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+2 }, defaultTimeout))
|
||||
By("Delete first pod")
|
||||
cleanupFunc1()
|
||||
cleanupFunc1 = nil
|
||||
By("Waiting for scale down to 1")
|
||||
// Verify that cluster size decreased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+1 }, scaleDownTimeout))
|
||||
By("Delete second pod")
|
||||
cleanupFunc2()
|
||||
cleanupFunc2 = nil
|
||||
By("Waiting for scale down to 0")
|
||||
// Verify that cluster size decreased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount }, scaleDownTimeout))
|
||||
By("Waiting for NAP group remove")
|
||||
framework.ExpectNoError(waitTillAllNAPNodePoolsAreRemoved())
|
||||
By("Check if NAP group was removeed")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
|
||||
})
|
||||
|
||||
It("shouldn't add new node group if not needed [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
|
||||
framework.SkipUnlessProviderIs("gke")
|
||||
framework.ExpectNoError(enableAutoprovisioning(""))
|
||||
By("Create pods")
|
||||
// Create nodesCountAfterResize+1 pods allocating 0.7 allocatable on present nodes. One more node will have to be created.
|
||||
cleanupFunc := ReserveMemory(f, "memory-reservation", nodeCount+1, int(float64(nodeCount+1)*float64(0.7)*float64(memAllocatableMb)), true, scaleUpTimeout)
|
||||
defer cleanupFunc()
|
||||
By("Waiting for scale up")
|
||||
// Verify that cluster size increased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout))
|
||||
By("Check if NAP group was created hoping id didn't happen")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(0))
|
||||
})
|
||||
|
||||
It("shouldn't scale up if cores limit too low, should scale up after limit is changed [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
|
||||
framework.SkipUnlessProviderIs("gke")
|
||||
By(fmt.Sprintf("Set core limit to %d", coreCount))
|
||||
framework.ExpectNoError(enableAutoprovisioning(fmt.Sprintf(`"resource_limits":{"resource_type":"cpu", "minimum":2, "maximum":%d}, "resource_limits":{"resource_type":"memory", "minimum":0, "maximum":10000000}`, coreCount)))
|
||||
// Create pod allocating 1.1 allocatable for present nodes. Bigger node will have to be created.
|
||||
cleanupFunc := ReserveMemory(f, "memory-reservation", 1, int(1.1*float64(memAllocatableMb)), false, time.Second)
|
||||
defer cleanupFunc()
|
||||
By(fmt.Sprintf("Waiting for scale up hoping it won't happen, sleep for %s", scaleUpTimeout.String()))
|
||||
time.Sleep(scaleUpTimeout)
|
||||
// Verify that cluster size is not changed
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount }, time.Second))
|
||||
By("Change resource limits")
|
||||
framework.ExpectNoError(enableAutoprovisioning(fmt.Sprintf(`"resource_limits":{"resource_type":"cpu", "minimum":2, "maximum":%d}, "resource_limits":{"resource_type":"memory", "minimum":0, "maximum":10000000}`, coreCount+5)))
|
||||
By("Wait for scale up")
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+1 }, scaleUpTimeout))
|
||||
By("Check if NAP group was created")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
|
||||
})
|
||||
|
||||
It("should create new node if there is no node for node selector [Feature:ClusterSizeAutoscalingScaleWithNAP]", func() {
|
||||
framework.SkipUnlessProviderIs("gke")
|
||||
framework.ExpectNoError(enableAutoprovisioning(""))
|
||||
// Create pod allocating 0.7 allocatable for present nodes with node selector.
|
||||
cleanupFunc := ReserveMemoryWithSelector(f, "memory-reservation", 1, int(0.7*float64(memAllocatableMb)), true, scaleUpTimeout, map[string]string{"test": "test"})
|
||||
defer cleanupFunc()
|
||||
By("Waiting for scale up")
|
||||
// Verify that cluster size increased.
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size == nodeCount+1 }, defaultTimeout))
|
||||
By("Check if NAP group was created")
|
||||
Expect(getNAPNodePoolsNumber()).Should(Equal(1))
|
||||
})
|
||||
|
||||
It("shouldn't scale up when expendable pod is created [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
// TODO(krzysztof_jastrzebski): Start running this test on GKE when Pod Priority and Preemption is in beta.
|
||||
framework.SkipUnlessProviderIs("gce")
|
||||
@ -1288,17 +1162,6 @@ func disableAutoscaler(nodePool string, minCount, maxCount int) error {
|
||||
return fmt.Errorf("autoscaler still enabled, last error: %v", finalErr)
|
||||
}
|
||||
|
||||
func isAutoprovisioningEnabled() (bool, error) {
|
||||
strBody, err := getCluster("v1alpha1")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if strings.Contains(strBody, "\"enableNodeAutoprovisioning\": true") {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func executeHTTPRequest(method string, url string, body string) (string, error) {
|
||||
client := &http.Client{}
|
||||
req, err := http.NewRequest(method, url, strings.NewReader(body))
|
||||
@ -1318,126 +1181,6 @@ func executeHTTPRequest(method string, url string, body string) (string, error)
|
||||
return string(respBody), nil
|
||||
}
|
||||
|
||||
func enableAutoprovisioning(resourceLimits string) error {
|
||||
By("Using API to enable autoprovisioning.")
|
||||
var body string
|
||||
if resourceLimits != "" {
|
||||
body = fmt.Sprintf(`{"update": {"desired_cluster_autoscaling": {"enable_node_autoprovisioning": true, %s}}}`, resourceLimits)
|
||||
} else {
|
||||
body = `{"update": {"desired_cluster_autoscaling": {"enable_node_autoprovisioning": true, "resource_limits":{"resource_type":"cpu", "minimum":0, "maximum":100}, "resource_limits":{"resource_type":"memory", "minimum":0, "maximum":10000000}}}}`
|
||||
}
|
||||
_, err := executeHTTPRequest(http.MethodPut, getGKEClusterURL("v1alpha1"), body)
|
||||
if err != nil {
|
||||
glog.Errorf("Request error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
glog.Infof("Wait for enabling autoprovisioning.")
|
||||
for start := time.Now(); time.Since(start) < gkeUpdateTimeout; time.Sleep(30 * time.Second) {
|
||||
enabled, err := isAutoprovisioningEnabled()
|
||||
if err != nil {
|
||||
glog.Errorf("Error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
if enabled {
|
||||
By("Autoprovisioning enabled.")
|
||||
return nil
|
||||
}
|
||||
glog.Infof("Waiting for enabling autoprovisioning")
|
||||
}
|
||||
return fmt.Errorf("autoprovisioning wasn't enabled (timeout).")
|
||||
}
|
||||
|
||||
func disableAutoprovisioning() error {
|
||||
enabled, err := isAutoprovisioningEnabled()
|
||||
if err != nil {
|
||||
glog.Errorf("Error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
if !enabled {
|
||||
By("Autoprovisioning disabled.")
|
||||
return nil
|
||||
}
|
||||
By("Using API to disable autoprovisioning.")
|
||||
_, err = executeHTTPRequest(http.MethodPut, getGKEClusterURL("v1alpha1"), "{\"update\": {\"desired_cluster_autoscaling\": {}}}")
|
||||
if err != nil {
|
||||
glog.Errorf("Request error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
By("Wait for disabling autoprovisioning.")
|
||||
for start := time.Now(); time.Since(start) < gkeUpdateTimeout; time.Sleep(30 * time.Second) {
|
||||
enabled, err := isAutoprovisioningEnabled()
|
||||
if err != nil {
|
||||
glog.Errorf("Error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
if !enabled {
|
||||
By("Autoprovisioning disabled.")
|
||||
return nil
|
||||
}
|
||||
By("Waiting for disabling autoprovisioning")
|
||||
}
|
||||
return fmt.Errorf("autoprovisioning wasn't disabled (timeout).")
|
||||
}
|
||||
|
||||
func getNAPNodePools() ([]string, error) {
|
||||
if framework.ProviderIs("gke") {
|
||||
args := []string{"container", "node-pools", "list", "--cluster=" + framework.TestContext.CloudConfig.Cluster}
|
||||
output, err := execCmd(getGcloudCommand(args)...).CombinedOutput()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to get instance groups: %v", string(output))
|
||||
return nil, err
|
||||
}
|
||||
re := regexp.MustCompile("nap.* ")
|
||||
lines := re.FindAllString(string(output), -1)
|
||||
for i, line := range lines {
|
||||
lines[i] = line[:strings.Index(line, " ")]
|
||||
}
|
||||
return lines, nil
|
||||
} else {
|
||||
return nil, fmt.Errorf("provider does not support NAP")
|
||||
}
|
||||
}
|
||||
|
||||
func removeNAPNodePools() error {
|
||||
By("Remove NAP node pools")
|
||||
pools, err := getNAPNodePools()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, pool := range pools {
|
||||
By("Remove node pool: " + pool)
|
||||
suffix := fmt.Sprintf("projects/%s/zones/%s/clusters/%s/nodePools/%s",
|
||||
framework.TestContext.CloudConfig.ProjectID,
|
||||
framework.TestContext.CloudConfig.Zone,
|
||||
framework.TestContext.CloudConfig.Cluster,
|
||||
pool)
|
||||
_, err := executeHTTPRequest(http.MethodDelete, getGKEURL("v1alpha1", suffix), "")
|
||||
if err != nil {
|
||||
glog.Errorf("Request error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
err = waitTillAllNAPNodePoolsAreRemoved()
|
||||
if err != nil {
|
||||
glog.Errorf(fmt.Sprintf("Couldn't remove NAP groups: %s", err.Error()))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func getNAPNodePoolsNumber() int {
|
||||
groups, err := getNAPNodePools()
|
||||
framework.ExpectNoError(err)
|
||||
return len(groups)
|
||||
}
|
||||
|
||||
func waitTillAllNAPNodePoolsAreRemoved() error {
|
||||
By("Wait till all NAP node pools are removed")
|
||||
err := wait.PollImmediate(5*time.Second, defaultTimeout, func() (bool, error) {
|
||||
return getNAPNodePoolsNumber() == 0, nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func addNodePool(name string, machineType string, numNodes int) {
|
||||
args := []string{"container", "node-pools", "create", name, "--quiet",
|
||||
"--machine-type=" + machineType,
|
||||
|
Loading…
Reference in New Issue
Block a user