Deflake GC e2e test

This commit is contained in:
Tim Allclair 2023-03-20 16:03:25 -07:00
parent c9ff286668
commit 9cce3fa107

View File

@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/names"
clientset "k8s.io/client-go/kubernetes"
clientv1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/test/e2e/framework"
e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
@ -98,7 +99,9 @@ func getOrphanOptions() metav1.DeleteOptions {
var (
zero = int64(0)
lablecount = int64(0)
)
const (
// The GC controller periodically rediscovers available APIs and syncs running informers for those resources.
// If previously available APIs are removed during that resync process, the sync process can fail and need to be retried.
//
@ -108,6 +111,12 @@ var (
// This timeout covers two resync/retry periods, and should be added to wait timeouts to account for delays
// to the GC controller caused by API changes in other tests.
gcInformerResyncRetryTimeout = time.Minute
// Many operations in these tests are per-replica and may require 100 mutating requests. The
// default client QPS of a controller is 5. If the qps is saturated, it will take 20s complete
// 100 requests. The e2e tests are running in parallel, so a controller might be stuck
// processing other tests.
replicaSyncTimeout = 2 * time.Minute
)
func getPodTemplateSpec(labels map[string]string) v1.PodTemplateSpec {
@ -380,19 +389,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err)
}
// wait for rc to create pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) {
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
waitForReplicas(ctx, rc, rcClient)
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
ginkgo.By("delete the rc")
deleteOptions := getOrphanOptions()
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID))
@ -449,18 +447,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err)
}
// wait for rc to create some pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) {
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
waitForReplicas(ctx, rc, rcClient)
ginkgo.By("delete the rc")
deleteOptions := metav1.DeleteOptions{
Preconditions: metav1.NewUIDPreconditions(string(rc.UID)),
@ -660,18 +648,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err)
}
// wait for rc to create pods
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) {
rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
waitForReplicas(ctx, rc, rcClient)
ginkgo.By("delete the rc")
deleteOptions := getForegroundOptions()
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID))
@ -755,18 +733,8 @@ var _ = SIGDescribe("Garbage collector", func() {
framework.Failf("Failed to create replication controller: %v", err)
}
// wait for rc1 to be stable
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) {
rc1, err := rcClient.Get(ctx, rc1.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get rc: %w", err)
}
if rc1.Status.Replicas == *rc1.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err)
}
waitForReplicas(ctx, rc1, rcClient)
ginkgo.By(fmt.Sprintf("set half of pods created by rc %s to have rc %s as owner as well", rc1Name, rc2Name))
pods, err := podClient.List(ctx, metav1.ListOptions{})
framework.ExpectNoError(err, "failed to list pods in namespace: %s", f.Namespace.Name)
@ -1134,7 +1102,7 @@ var _ = SIGDescribe("Garbage collector", func() {
// Wait 30s and ensure the dependent is not deleted.
ginkgo.By("wait for 30 seconds to see if the garbage collector mistakenly deletes the dependent crd")
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) {
if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second+gcInformerResyncRetryTimeout, func(ctx context.Context) (bool, error) {
_, err := resourceClient.Get(ctx, dependentName, metav1.GetOptions{})
return false, err
}); err != nil && err != wait.ErrWaitTimeout {
@ -1177,3 +1145,28 @@ var _ = SIGDescribe("Garbage collector", func() {
gatherMetrics(ctx, f)
})
})
// TODO(106575): Migrate away from generic polling function.
func waitForReplicas(ctx context.Context, rc *v1.ReplicationController, rcClient clientv1.ReplicationControllerInterface) {
var (
lastObservedRC *v1.ReplicationController
err error
)
if err := wait.PollWithContext(ctx, framework.Poll, replicaSyncTimeout, func(ctx context.Context) (bool, error) {
lastObservedRC, err = rcClient.Get(ctx, rc.Name, metav1.GetOptions{})
if err != nil {
return false, err
}
if lastObservedRC.Status.Replicas == *rc.Spec.Replicas {
return true, nil
}
return false, nil
}); err != nil {
if lastObservedRC == nil {
framework.Failf("Failed to get ReplicationController %q: %v", rc.Name, err)
} else {
framework.Failf("failed to wait for the rc.Status.Replicas (%d) to reach rc.Spec.Replicas (%d): %v",
lastObservedRC.Status.Replicas, *lastObservedRC.Spec.Replicas, err)
}
}
}