Merge pull request #116724 from tallclair/gc-test-flake

Deflake GC e2e test
This commit is contained in:
Kubernetes Prow Robot 2023-03-21 08:19:08 -07:00 committed by GitHub
commit 42f54ccf9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/names" "k8s.io/apiserver/pkg/storage/names"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
clientv1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment" e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
@ -98,7 +99,9 @@ func getOrphanOptions() metav1.DeleteOptions {
var ( var (
zero = int64(0) zero = int64(0)
lablecount = int64(0) lablecount = int64(0)
)
const (
// The GC controller periodically rediscovers available APIs and syncs running informers for those resources. // The GC controller periodically rediscovers available APIs and syncs running informers for those resources.
// If previously available APIs are removed during that resync process, the sync process can fail and need to be retried. // If previously available APIs are removed during that resync process, the sync process can fail and need to be retried.
// //
@ -108,6 +111,12 @@ var (
// This timeout covers two resync/retry periods, and should be added to wait timeouts to account for delays // This timeout covers two resync/retry periods, and should be added to wait timeouts to account for delays
// to the GC controller caused by API changes in other tests. // to the GC controller caused by API changes in other tests.
gcInformerResyncRetryTimeout = time.Minute gcInformerResyncRetryTimeout = time.Minute
// Many operations in these tests are per-replica and may require 100 mutating requests. The
// default client QPS of a controller is 5. If the qps is saturated, it will take 20s complete
// 100 requests. The e2e tests are running in parallel, so a controller might be stuck
// processing other tests.
replicaSyncTimeout = 2 * time.Minute
) )
func getPodTemplateSpec(labels map[string]string) v1.PodTemplateSpec { func getPodTemplateSpec(labels map[string]string) v1.PodTemplateSpec {
@ -380,19 +389,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err) framework.Failf("Failed to create replication controller: %v", err)
} }
// wait for rc to create pods // wait for rc to create pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { waitForReplicas(ctx, rc, rcClient)
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
ginkgo.By("delete the rc") ginkgo.By("delete the rc")
deleteOptions := getOrphanOptions() deleteOptions := getOrphanOptions()
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID))
@ -449,18 +447,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err) framework.Failf("Failed to create replication controller: %v", err)
} }
// wait for rc to create some pods // wait for rc to create some pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { waitForReplicas(ctx, rc, rcClient)
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
ginkgo.By("delete the rc") ginkgo.By("delete the rc")
deleteOptions := metav1.DeleteOptions{ deleteOptions := metav1.DeleteOptions{
Preconditions: metav1.NewUIDPreconditions(string(rc.UID)), Preconditions: metav1.NewUIDPreconditions(string(rc.UID)),
@ -660,18 +648,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err) framework.Failf("Failed to create replication controller: %v", err)
} }
// wait for rc to create pods // wait for rc to create pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { waitForReplicas(ctx, rc, rcClient)
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
ginkgo.By("delete the rc") ginkgo.By("delete the rc")
deleteOptions := getForegroundOptions() deleteOptions := getForegroundOptions()
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID))
@ -755,18 +733,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err) framework.Failf("Failed to create replication controller: %v", err)
} }
// wait for rc1 to be stable // wait for rc1 to be stable
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { waitForReplicas(ctx, rc1, rcClient)
rc1, err := rcClient.Get(ctx, rc1.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc1.Status.Replicas == *rc1.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
ginkgo.By(fmt.Sprintf("set half of pods created by rc %s to have rc %s as owner as well", rc1Name, rc2Name)) ginkgo.By(fmt.Sprintf("set half of pods created by rc %s to have rc %s as owner as well", rc1Name, rc2Name))
pods, err := podClient.List(ctx, metav1.ListOptions{}) pods, err := podClient.List(ctx, metav1.ListOptions{})
framework.ExpectNoError(err, "failed to list pods in namespace: %s", f.Namespace.Name) framework.ExpectNoError(err, "failed to list pods in namespace: %s", f.Namespace.Name)
@ -1134,7 +1102,7 @@ var _ = SIGDescribe("Garbage collector", func() {
// Wait 30s and ensure the dependent is not deleted. // Wait 30s and ensure the dependent is not deleted.
ginkgo.By("wait for 30 seconds to see if the garbage collector mistakenly deletes the dependent crd") ginkgo.By("wait for 30 seconds to see if the garbage collector mistakenly deletes the dependent crd")
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second+gcInformerResyncRetryTimeout, func(ctx context.Context) (bool, error) {
_, err := resourceClient.Get(ctx, dependentName, metav1.GetOptions{}) _, err := resourceClient.Get(ctx, dependentName, metav1.GetOptions{})
return false, err return false, err
}); err != nil && err != wait.ErrWaitTimeout { }); err != nil && err != wait.ErrWaitTimeout {
@ -1177,3 +1145,28 @@ var _ = SIGDescribe("Garbage collector", func() {
gatherMetrics(ctx, f) gatherMetrics(ctx, f)
}) })
}) })
// TODO(106575): Migrate away from generic polling function.
func waitForReplicas(ctx context.Context, rc *v1.ReplicationController, rcClient clientv1.ReplicationControllerInterface) {
var (
lastObservedRC *v1.ReplicationController
err error
)
if err := wait.PollWithContext(ctx, framework.Poll, replicaSyncTimeout, func(ctx context.Context) (bool, error) {
lastObservedRC, err = rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, err
}
if lastObservedRC.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
if lastObservedRC == nil {
framework.Failf("Failed to get ReplicationController %q: %v", rc.Name, err)
} else {
framework.Failf("failed to wait for the rc.Status.Replicas (%d) to reach rc.Spec.Replicas (%d): %v",
lastObservedRC.Status.Replicas, *lastObservedRC.Spec.Replicas, err)
}
}
}