diff --git a/test/e2e/apimachinery/garbage_collector.go b/test/e2e/apimachinery/garbage_collector.go index e240312f207..11b22e67e3c 100644 --- a/test/e2e/apimachinery/garbage_collector.go +++ b/test/e2e/apimachinery/garbage_collector.go @@ -38,6 +38,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/storage/names" clientset "k8s.io/client-go/kubernetes" + clientv1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/kubernetes/test/e2e/framework" e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" @@ -98,7 +99,9 @@ func getOrphanOptions() metav1.DeleteOptions { var ( zero = int64(0) lablecount = int64(0) +) +const ( // The GC controller periodically rediscovers available APIs and syncs running informers for those resources. // If previously available APIs are removed during that resync process, the sync process can fail and need to be retried. // @@ -108,6 +111,12 @@ var ( // This timeout covers two resync/retry periods, and should be added to wait timeouts to account for delays // to the GC controller caused by API changes in other tests. gcInformerResyncRetryTimeout = time.Minute + + // Many operations in these tests are per-replica and may require 100 mutating requests. The + // default client QPS of a controller is 5. If the qps is saturated, it will take 20s complete + // 100 requests. The e2e tests are running in parallel, so a controller might be stuck + // processing other tests. + replicaSyncTimeout = 2 * time.Minute ) func getPodTemplateSpec(labels map[string]string) v1.PodTemplateSpec { @@ -380,19 +389,8 @@ var _ = SIGDescribe("Garbage collector", func() { framework.Failf("Failed to create replication controller: %v", err) } // wait for rc to create pods - if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { - rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get rc: %w", err) - } - if rc.Status.Replicas == *rc.Spec.Replicas { - return true, nil - } - return false, nil + waitForReplicas(ctx, rc, rcClient) - }); err != nil { - framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) - } ginkgo.By("delete the rc") deleteOptions := getOrphanOptions() deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) @@ -449,18 +447,8 @@ var _ = SIGDescribe("Garbage collector", func() { framework.Failf("Failed to create replication controller: %v", err) } // wait for rc to create some pods - if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { - rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get rc: %w", err) - } - if rc.Status.Replicas == *rc.Spec.Replicas { - return true, nil - } - return false, nil - }); err != nil { - framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) - } + waitForReplicas(ctx, rc, rcClient) + ginkgo.By("delete the rc") deleteOptions := metav1.DeleteOptions{ Preconditions: metav1.NewUIDPreconditions(string(rc.UID)), @@ -660,18 +648,8 @@ var _ = SIGDescribe("Garbage collector", func() { framework.Failf("Failed to create replication controller: %v", err) } // wait for rc to create pods - if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { - rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get rc: %w", err) - } - if rc.Status.Replicas == *rc.Spec.Replicas { - return true, nil - } - return false, nil - }); err != nil { - framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) - } + waitForReplicas(ctx, rc, rcClient) + ginkgo.By("delete the rc") deleteOptions := getForegroundOptions() deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) @@ -755,18 +733,8 @@ var _ = SIGDescribe("Garbage collector", func() { framework.Failf("Failed to create replication controller: %v", err) } // wait for rc1 to be stable - if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { - rc1, err := rcClient.Get(ctx, rc1.Name, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get rc: %w", err) - } - if rc1.Status.Replicas == *rc1.Spec.Replicas { - return true, nil - } - return false, nil - }); err != nil { - framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) - } + waitForReplicas(ctx, rc1, rcClient) + ginkgo.By(fmt.Sprintf("set half of pods created by rc %s to have rc %s as owner as well", rc1Name, rc2Name)) pods, err := podClient.List(ctx, metav1.ListOptions{}) framework.ExpectNoError(err, "failed to list pods in namespace: %s", f.Namespace.Name) @@ -1134,7 +1102,7 @@ var _ = SIGDescribe("Garbage collector", func() { // Wait 30s and ensure the dependent is not deleted. ginkgo.By("wait for 30 seconds to see if the garbage collector mistakenly deletes the dependent crd") - if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second+gcInformerResyncRetryTimeout, func(ctx context.Context) (bool, error) { _, err := resourceClient.Get(ctx, dependentName, metav1.GetOptions{}) return false, err }); err != nil && err != wait.ErrWaitTimeout { @@ -1177,3 +1145,28 @@ var _ = SIGDescribe("Garbage collector", func() { gatherMetrics(ctx, f) }) }) + +// TODO(106575): Migrate away from generic polling function. +func waitForReplicas(ctx context.Context, rc *v1.ReplicationController, rcClient clientv1.ReplicationControllerInterface) { + var ( + lastObservedRC *v1.ReplicationController + err error + ) + if err := wait.PollWithContext(ctx, framework.Poll, replicaSyncTimeout, func(ctx context.Context) (bool, error) { + lastObservedRC, err = rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + if lastObservedRC.Status.Replicas == *rc.Spec.Replicas { + return true, nil + } + return false, nil + }); err != nil { + if lastObservedRC == nil { + framework.Failf("Failed to get ReplicationController %q: %v", rc.Name, err) + } else { + framework.Failf("failed to wait for the rc.Status.Replicas (%d) to reach rc.Spec.Replicas (%d): %v", + lastObservedRC.Status.Replicas, *lastObservedRC.Spec.Replicas, err) + } + } +}