From 9f84841d8a6dde8233ab649089cace40fea873d9 Mon Sep 17 00:00:00 2001 From: Shyam Jeedigunta Date: Thu, 22 Feb 2018 14:13:23 +0100 Subject: [PATCH] Add smart retries to resource creations in testing framework --- test/e2e/framework/util.go | 1 + test/e2e/scalability/BUILD | 1 - test/e2e/scalability/density.go | 10 +- test/e2e/scalability/load.go | 25 +--- test/utils/BUILD | 1 + test/utils/create_resources.go | 224 ++++++++++++++++++++++++++++++++ test/utils/runners.go | 58 +++------ 7 files changed, 250 insertions(+), 70 deletions(-) create mode 100644 test/utils/create_resources.go diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 7f6cb443c97..00762f328c0 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -5078,6 +5078,7 @@ func DumpDebugInfo(c clientset.Interface, ns string) { } } +// TODO: Get rid of this duplicate function in favour of the one in test/utils. func IsRetryableAPIError(err error) bool { return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err) } diff --git a/test/e2e/scalability/BUILD b/test/e2e/scalability/BUILD index fc6e3cee361..798bad237fc 100644 --- a/test/e2e/scalability/BUILD +++ b/test/e2e/scalability/BUILD @@ -22,7 +22,6 @@ go_library( "//vendor/github.com/onsi/ginkgo:go_default_library", "//vendor/github.com/onsi/gomega:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/meta:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go index c136b5c0cde..dfb2ccfa1ba 100644 --- a/test/e2e/scalability/density.go +++ b/test/e2e/scalability/density.go @@ -26,7 +26,6 @@ import ( "time" "k8s.io/api/core/v1" - apierrs "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" @@ -55,7 +54,6 @@ const ( MinSaturationThreshold = 2 * time.Minute MinPodsPerSecondThroughput = 8 DensityPollInterval = 10 * time.Second - MaxLatencyPodCreationTries = 5 ) // Maximum container failures this test tolerates before failing. @@ -901,13 +899,7 @@ func createRunningPodFromRC(wg *sync.WaitGroup, c clientset.Interface, name, ns, }, }, } - for attempt := 1; attempt <= MaxLatencyPodCreationTries; attempt++ { - _, err := c.CoreV1().ReplicationControllers(ns).Create(rc) - if err == nil || apierrs.IsAlreadyExists(err) { - break - } - Expect(attempt < MaxLatencyPodCreationTries && framework.IsRetryableAPIError(err)).To(Equal(true)) - } + framework.ExpectNoError(testutils.CreateRCWithRetries(c, ns, rc)) framework.ExpectNoError(framework.WaitForControlledPodsRunning(c, ns, name, api.Kind("ReplicationController"))) framework.Logf("Found pod '%s' running", name) } diff --git a/test/e2e/scalability/load.go b/test/e2e/scalability/load.go index 9246e2a9d63..897cb24a41d 100644 --- a/test/e2e/scalability/load.go +++ b/test/e2e/scalability/load.go @@ -28,7 +28,6 @@ import ( "time" "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -229,8 +228,7 @@ var _ = SIGDescribe("Load capacity", func() { configs, secretConfigs, configMapConfigs = generateConfigs(totalPods, itArg.image, itArg.command, namespaces, itArg.kind, itArg.secretsPerPod, itArg.configMapsPerPod) if itArg.quotas { - err := CreateQuotas(f, namespaces, 2*totalPods, testPhaseDurations.StartPhase(115, "quota creation")) - framework.ExpectNoError(err) + framework.ExpectNoError(CreateQuotas(f, namespaces, 2*totalPods, testPhaseDurations.StartPhase(115, "quota creation"))) } serviceCreationPhase := testPhaseDurations.StartPhase(120, "services creation") @@ -240,8 +238,7 @@ var _ = SIGDescribe("Load capacity", func() { services := generateServicesForConfigs(configs) createService := func(i int) { defer GinkgoRecover() - _, err := clientset.CoreV1().Services(services[i].Namespace).Create(services[i]) - framework.ExpectNoError(err) + framework.ExpectNoError(testutils.CreateServiceWithRetries(clientset, services[i].Namespace, services[i])) } workqueue.Parallelize(serviceOperationsParallelism, len(services), createService) framework.Logf("%v Services created.", len(services)) @@ -251,8 +248,7 @@ var _ = SIGDescribe("Load capacity", func() { framework.Logf("Starting to delete services...") deleteService := func(i int) { defer GinkgoRecover() - err := clientset.CoreV1().Services(services[i].Namespace).Delete(services[i].Name, nil) - framework.ExpectNoError(err) + framework.ExpectNoError(clientset.CoreV1().Services(services[i].Namespace).Delete(services[i].Name, nil)) } workqueue.Parallelize(serviceOperationsParallelism, len(services), deleteService) framework.Logf("Services deleted") @@ -729,20 +725,11 @@ func CreateQuotas(f *framework.Framework, namespaces []*v1.Namespace, podCount i Hard: v1.ResourceList{"pods": *resource.NewQuantity(int64(podCount), resource.DecimalSI)}, }, } - for _, ns := range namespaces { - if err := wait.PollImmediate(2*time.Second, 30*time.Second, func() (bool, error) { - quotaTemplate.Name = ns.Name + "-quota" - _, err := f.ClientSet.CoreV1().ResourceQuotas(ns.Name).Create(quotaTemplate) - if err != nil && !errors.IsAlreadyExists(err) { - framework.Logf("Unexpected error while creating resource quota: %v", err) - return false, nil - } - return true, nil - }); err != nil { - return fmt.Errorf("Failed to create quota: %v", err) + quotaTemplate.Name = ns.Name + "-quota" + if err := testutils.CreateResourceQuotaWithRetries(f.ClientSet, ns.Name, quotaTemplate); err != nil { + return fmt.Errorf("Error creating quota: %v", err) } } - return nil } diff --git a/test/utils/BUILD b/test/utils/BUILD index da7eeadab95..5fc3753d4d8 100644 --- a/test/utils/BUILD +++ b/test/utils/BUILD @@ -9,6 +9,7 @@ go_library( name = "go_default_library", srcs = [ "conditions.go", + "create_resources.go", "density_utils.go", "deployment.go", "pod_store.go", diff --git a/test/utils/create_resources.go b/test/utils/create_resources.go new file mode 100644 index 00000000000..9ed33a734aa --- /dev/null +++ b/test/utils/create_resources.go @@ -0,0 +1,224 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// TODO: Refactor common part of functions in this file for generic object kinds. + +package utils + +import ( + "fmt" + "time" + + batch "k8s.io/api/batch/v1" + "k8s.io/api/core/v1" + extensions "k8s.io/api/extensions/v1beta1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" +) + +const ( + // Parameters for retrying with exponential backoff. + retryBackoffInitialDuration = 100 * time.Millisecond + retryBackoffFactor = 3 + retryBackoffJitter = 0 + retryBackoffSteps = 6 +) + +// Utility for retrying the given function with exponential backoff. +func RetryWithExponentialBackOff(fn wait.ConditionFunc) error { + backoff := wait.Backoff{ + Duration: retryBackoffInitialDuration, + Factor: retryBackoffFactor, + Jitter: retryBackoffJitter, + Steps: retryBackoffSteps, + } + return wait.ExponentialBackoff(backoff, fn) +} + +func IsRetryableAPIError(err error) bool { + return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err) +} + +func CreatePodWithRetries(c clientset.Interface, namespace string, obj *v1.Pod) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().Pods(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateRCWithRetries(c clientset.Interface, namespace string, obj *v1.ReplicationController) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().ReplicationControllers(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateReplicaSetWithRetries(c clientset.Interface, namespace string, obj *extensions.ReplicaSet) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.ExtensionsV1beta1().ReplicaSets(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateDeploymentWithRetries(c clientset.Interface, namespace string, obj *extensions.Deployment) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.ExtensionsV1beta1().Deployments(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateDaemonSetWithRetries(c clientset.Interface, namespace string, obj *extensions.DaemonSet) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.ExtensionsV1beta1().DaemonSets(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateJobWithRetries(c clientset.Interface, namespace string, obj *batch.Job) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.BatchV1().Jobs(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateSecretWithRetries(c clientset.Interface, namespace string, obj *v1.Secret) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().Secrets(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateConfigMapWithRetries(c clientset.Interface, namespace string, obj *v1.ConfigMap) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().ConfigMaps(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateServiceWithRetries(c clientset.Interface, namespace string, obj *v1.Service) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().Services(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} + +func CreateResourceQuotaWithRetries(c clientset.Interface, namespace string, obj *v1.ResourceQuota) error { + if obj == nil { + return fmt.Errorf("Object provided to create is empty") + } + createFunc := func() (bool, error) { + _, err := c.CoreV1().ResourceQuotas(namespace).Create(obj) + if err == nil || apierrs.IsAlreadyExists(err) { + return true, nil + } + if IsRetryableAPIError(err) { + return false, nil + } + return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err) + } + return RetryWithExponentialBackOff(createFunc) +} diff --git a/test/utils/runners.go b/test/utils/runners.go index 1d71a3eeb62..43d77bf79ca 100644 --- a/test/utils/runners.go +++ b/test/utils/runners.go @@ -81,15 +81,7 @@ func WaitUntilPodIsScheduled(c clientset.Interface, name, namespace string, time func RunPodAndGetNodeName(c clientset.Interface, pod *v1.Pod, timeout time.Duration) (string, error) { name := pod.Name namespace := pod.Namespace - var err error - // Create a Pod - for i := 0; i < retries; i++ { - _, err = c.CoreV1().Pods(namespace).Create(pod) - if err == nil || apierrs.IsAlreadyExists(err) { - break - } - } - if err != nil && !apierrs.IsAlreadyExists(err) { + if err := CreatePodWithRetries(c, namespace, pod); err != nil { return "", err } p, err := WaitUntilPodIsScheduled(c, name, namespace, timeout) @@ -325,8 +317,7 @@ func (config *DeploymentConfig) create() error { config.applyTo(&deployment.Spec.Template) - _, err := config.Client.ExtensionsV1beta1().Deployments(config.Namespace).Create(deployment) - if err != nil { + if err := CreateDeploymentWithRetries(config.Client, config.Namespace, deployment); err != nil { return fmt.Errorf("Error creating deployment: %v", err) } config.RCConfigLog("Created deployment with name: %v, namespace: %v, replica count: %v", deployment.Name, config.Namespace, removePtr(deployment.Spec.Replicas)) @@ -396,8 +387,7 @@ func (config *ReplicaSetConfig) create() error { config.applyTo(&rs.Spec.Template) - _, err := config.Client.ExtensionsV1beta1().ReplicaSets(config.Namespace).Create(rs) - if err != nil { + if err := CreateReplicaSetWithRetries(config.Client, config.Namespace, rs); err != nil { return fmt.Errorf("Error creating replica set: %v", err) } config.RCConfigLog("Created replica set with name: %v, namespace: %v, replica count: %v", rs.Name, config.Namespace, removePtr(rs.Spec.Replicas)) @@ -463,8 +453,7 @@ func (config *JobConfig) create() error { config.applyTo(&job.Spec.Template) - _, err := config.Client.BatchV1().Jobs(config.Namespace).Create(job) - if err != nil { + if err := CreateJobWithRetries(config.Client, config.Namespace, job); err != nil { return fmt.Errorf("Error creating job: %v", err) } config.RCConfigLog("Created job with name: %v, namespace: %v, parallelism/completions: %v", job.Name, config.Namespace, job.Spec.Parallelism) @@ -584,8 +573,7 @@ func (config *RCConfig) create() error { config.applyTo(rc.Spec.Template) - _, err := config.Client.CoreV1().ReplicationControllers(config.Namespace).Create(rc) - if err != nil { + if err := CreateRCWithRetries(config.Client, config.Namespace, rc); err != nil { return fmt.Errorf("Error creating replication controller: %v", err) } config.RCConfigLog("Created replication controller with name: %v, namespace: %v, replica count: %v", rc.Name, config.Namespace, removePtr(rc.Spec.Replicas)) @@ -795,7 +783,6 @@ func (config *RCConfig) start() error { // List only pods from a given replication controller. options := metav1.ListOptions{LabelSelector: label.String()} if pods, err := config.Client.CoreV1().Pods(metav1.NamespaceAll).List(options); err == nil { - for _, pod := range pods.Items { config.RCConfigLog("Pod %s\t%s\t%s\t%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, pod.DeletionTimestamp) } @@ -823,8 +810,7 @@ func StartPods(c clientset.Interface, replicas int, namespace string, podNamePre pod.ObjectMeta.Labels["name"] = podName pod.ObjectMeta.Labels["startPodsID"] = startPodsID pod.Spec.Containers[0].Name = podName - _, err := c.CoreV1().Pods(namespace).Create(&pod) - if err != nil { + if err := CreatePodWithRetries(c, namespace, &pod); err != nil { return err } } @@ -1020,14 +1006,10 @@ func MakePodSpec() v1.PodSpec { } func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1.Pod) error { - var err error - for attempt := 0; attempt < retries; attempt++ { - if _, err := client.CoreV1().Pods(namespace).Create(podTemplate); err == nil { - return nil - } - glog.Errorf("Error while creating pod, maybe retry: %v", err) + if err := CreatePodWithRetries(client, namespace, podTemplate); err != nil { + return fmt.Errorf("Error creating pod: %v", err) } - return fmt.Errorf("Terminal error while creating pod, won't retry: %v", err) + return nil } func CreatePod(client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error { @@ -1065,14 +1047,10 @@ func createController(client clientset.Interface, controllerName, namespace stri }, }, } - var err error - for attempt := 0; attempt < retries; attempt++ { - if _, err := client.CoreV1().ReplicationControllers(namespace).Create(rc); err == nil { - return nil - } - glog.Errorf("Error while creating rc, maybe retry: %v", err) + if err := CreateRCWithRetries(client, namespace, rc); err != nil { + return fmt.Errorf("Error creating replication controller: %v", err) } - return fmt.Errorf("Terminal error while creating rc, won't retry: %v", err) + return nil } func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy { @@ -1127,8 +1105,7 @@ func (config *SecretConfig) Run() error { secret.StringData[k] = v } - _, err := config.Client.CoreV1().Secrets(config.Namespace).Create(secret) - if err != nil { + if err := CreateSecretWithRetries(config.Client, config.Namespace, secret); err != nil { return fmt.Errorf("Error creating secret: %v", err) } config.LogFunc("Created secret %v/%v", config.Namespace, config.Name) @@ -1186,8 +1163,7 @@ func (config *ConfigMapConfig) Run() error { configMap.Data[k] = v } - _, err := config.Client.CoreV1().ConfigMaps(config.Namespace).Create(configMap) - if err != nil { + if err := CreateConfigMapWithRetries(config.Client, config.Namespace, configMap); err != nil { return fmt.Errorf("Error creating configmap: %v", err) } config.LogFunc("Created configmap %v/%v", config.Namespace, config.Name) @@ -1266,12 +1242,12 @@ func (config *DaemonConfig) Run() error { }, } - _, err := config.Client.ExtensionsV1beta1().DaemonSets(config.Namespace).Create(daemon) - if err != nil { - return fmt.Errorf("Error creating DaemonSet %v: %v", config.Name, err) + if err := CreateDaemonSetWithRetries(config.Client, config.Namespace, daemon); err != nil { + return fmt.Errorf("Error creating daemonset: %v", err) } var nodes *v1.NodeList + var err error for i := 0; i < retries; i++ { // Wait for all daemons to be running nodes, err = config.Client.CoreV1().Nodes().List(metav1.ListOptions{ResourceVersion: "0"})