From bcf42255bbc09ce7d37dfef3b18000c238107e15 Mon Sep 17 00:00:00 2001 From: Matt Karrmann Date: Sat, 13 Apr 2024 18:52:27 -0500 Subject: [PATCH] Refactor and simplify WaitForPodsRunningReady; update callers to use new interface --- test/e2e/cloud/gcp/node_lease.go | 6 +-- test/e2e/cloud/gcp/resize_nodes.go | 6 +-- test/e2e/common/node/container_probe.go | 8 ++-- test/e2e/common/node/pods.go | 2 +- test/e2e/framework/pod/wait.go | 55 +++++-------------------- test/e2e/scheduling/priorities.go | 2 +- test/e2e/windows/host_process.go | 4 +- test/e2e/windows/hyperv.go | 2 +- test/e2e/windows/kubelet_stats.go | 4 +- 9 files changed, 28 insertions(+), 61 deletions(-) diff --git a/test/e2e/cloud/gcp/node_lease.go b/test/e2e/cloud/gcp/node_lease.go index 58aab52d89c..b0d6d8a90bf 100644 --- a/test/e2e/cloud/gcp/node_lease.go +++ b/test/e2e/cloud/gcp/node_lease.go @@ -38,7 +38,7 @@ import ( var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() { f := framework.NewDefaultFramework("node-lease-test") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var systemPodsNo int32 + var systemPodsNo int var c clientset.Interface var ns string var group string @@ -48,7 +48,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() { ns = f.Namespace.Name systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{}) framework.ExpectNoError(err) - systemPodsNo = int32(len(systemPods)) + systemPodsNo = len(systemPods) if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) } else { @@ -97,7 +97,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() { // Many e2e tests assume that the cluster is fully healthy before they start. Wait until // the cluster is restored to health. ginkgo.By("waiting for system pods to successfully restart") - err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout) + err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout) framework.ExpectNoError(err) }) diff --git a/test/e2e/cloud/gcp/resize_nodes.go b/test/e2e/cloud/gcp/resize_nodes.go index 916d761fbcd..0d32f93f719 100644 --- a/test/e2e/cloud/gcp/resize_nodes.go +++ b/test/e2e/cloud/gcp/resize_nodes.go @@ -47,7 +47,7 @@ func resizeRC(ctx context.Context, c clientset.Interface, ns, name string, repli var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() { f := framework.NewDefaultFramework("resize-nodes") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged - var systemPodsNo int32 + var systemPodsNo int var c clientset.Interface var ns string var group string @@ -57,7 +57,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() { ns = f.Namespace.Name systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{}) framework.ExpectNoError(err) - systemPodsNo = int32(len(systemPods)) + systemPodsNo = len(systemPods) if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) } else { @@ -99,7 +99,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() { // Many e2e tests assume that the cluster is fully healthy before they start. Wait until // the cluster is restored to health. ginkgo.By("waiting for system pods to successfully restart") - err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout) + err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout) framework.ExpectNoError(err) }) }) diff --git a/test/e2e/common/node/container_probe.go b/test/e2e/common/node/container_probe.go index 31122ccc581..534fc26bf97 100644 --- a/test/e2e/common/node/container_probe.go +++ b/test/e2e/common/node/container_probe.go @@ -612,7 +612,7 @@ done }) // verify pods are running and ready - err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) + err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) framework.ExpectNoError(err) // Shutdown pod. Readiness should change to false @@ -694,7 +694,7 @@ done }) // verify pods are running and ready - err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) + err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) framework.ExpectNoError(err) // Shutdown pod. Readiness should change to false @@ -1359,7 +1359,7 @@ done }) // verify pods are running and ready - err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) + err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) framework.ExpectNoError(err) // Shutdown pod. Readiness should change to false @@ -1452,7 +1452,7 @@ done }) // verify pods are running and ready - err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) + err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) framework.ExpectNoError(err) // Shutdown pod. Readiness should change to false diff --git a/test/e2e/common/node/pods.go b/test/e2e/common/node/pods.go index 69037c6e65f..7aa1521b81c 100644 --- a/test/e2e/common/node/pods.go +++ b/test/e2e/common/node/pods.go @@ -873,7 +873,7 @@ var _ = SIGDescribe("Pods", func() { // wait as required for all 3 pods to be running ginkgo.By("waiting for all 3 pods to be running") - err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, f.Timeouts.PodStart) + err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, f.Timeouts.PodStart) framework.ExpectNoError(err, "3 pods not found running.") // delete Collection of pods with a label in the current namespace diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go index 924f6058605..f4b43ff523a 100644 --- a/test/e2e/framework/pod/wait.go +++ b/test/e2e/framework/pod/wait.go @@ -226,66 +226,35 @@ func WaitForAlmostAllPodsReady(ctx context.Context, c clientset.Interface, ns st // WaitForPodsRunningReady waits up to timeout for the following conditions: // 1. At least minPods Pods in Namespace ns are Running and Ready -// 2. No more than allowedNotReadyPods Pods in Namespace ns are not either -// Ready, Succeeded, or Failed with a Controller. +// 2. No Pods in Namespace ns are not either Ready, Succeeded, or Failed with a Controller // -// # An error is returned if either of these conditions are not met within the timeout +// An error is returned if either of these conditions are not met within the timeout. // // It has separate behavior from other 'wait for' pods functions in // that it requests the list of pods on every iteration. This is useful, for // example, in cluster startup, because the number of pods increases while // waiting. All pods that are in SUCCESS state are not counted. -// -// If minPods or allowedNotReadyPods are -1, this method returns immediately -// without waiting. -func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error { - if minPods == -1 || allowedNotReadyPods == -1 { - return nil - } +func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods int, timeout time.Duration) error { - // We get the new list of pods, replication controllers, and replica - // sets in every iteration because more pods come online during startup - // and we want to ensure they are also checked. - // - // This struct gets populated while polling, then gets checked, and in - // case of a timeout is included in the failure message. - type state struct { - ReplicationControllers []v1.ReplicationController - ReplicaSets []appsv1.ReplicaSet - Pods []v1.Pod - } - - nOk := int32(0) + nOk := 0 badPods := []v1.Pod{} otherPods := []v1.Pod{} succeededPods := []string{} - return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) { + return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) ([]v1.Pod, error) { - rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err) - } - rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err) - } podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{}) if err != nil { return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err) } - return &state{ - ReplicationControllers: rcList.Items, - ReplicaSets: rsList.Items, - Pods: podList.Items, - }, nil - })).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) { + return podList.Items, nil + })).WithTimeout(timeout).Should(framework.MakeMatcher(func(pods []v1.Pod) (func() string, error) { nOk = 0 badPods = []v1.Pod{} otherPods = []v1.Pod{} succeededPods = []string{} - for _, pod := range s.Pods { + for _, pod := range pods { res, err := testutils.PodRunningReady(&pod) switch { case res && err == nil: @@ -295,8 +264,6 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri succeededPods = append(succeededPods, pod.Name) case pod.Status.Phase == v1.PodFailed: // ignore failed pods that are controlled by some controller - // TODO either document why failures with controllers are allowed while - // failures without controllers are not, or remove this check if metav1.GetControllerOf(&pod) == nil { badPods = append(badPods, pod) } @@ -304,15 +271,15 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri otherPods = append(otherPods, pod) } } - if nOk >= minPods && len(badPods)+len(otherPods) <= int(allowedNotReadyPods) { + if nOk >= minPods && len(badPods)+len(otherPods) <= 0 { return nil, nil } // Delayed formatting of a failure message. return func() string { var buffer strings.Builder - buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods)) - buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods))) + buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready \n", minPods, ns)) + buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(pods))) if len(succeededPods) > 0 { buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1))) } diff --git a/test/e2e/scheduling/priorities.go b/test/e2e/scheduling/priorities.go index 21f83d39784..636ec4da34b 100644 --- a/test/e2e/scheduling/priorities.go +++ b/test/e2e/scheduling/priorities.go @@ -109,7 +109,7 @@ var _ = SIGDescribe("SchedulerPriorities", framework.WithSerial(), func() { err = framework.CheckTestingNSDeletedExcept(ctx, cs, ns) framework.ExpectNoError(err) - err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout) + err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout) framework.ExpectNoError(err) // skip if the most utilized node has less than the cri-o minMemLimit available diff --git a/test/e2e/windows/host_process.go b/test/e2e/windows/host_process.go index 1d794313ba4..4af3c2c79c2 100644 --- a/test/e2e/windows/host_process.go +++ b/test/e2e/windows/host_process.go @@ -657,7 +657,7 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio ginkgo.By("Waiting for the pod to start running") timeout := 3 * time.Minute - e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout) + e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout) ginkgo.By("Getting container stats for pod") statsChecked := false @@ -711,7 +711,7 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio pc.Create(ctx, pod) ginkgo.By("Waiting for pod to run") - e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, 3*time.Minute) + e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 3*time.Minute) ginkgo.By("Waiting for 60 seconds") // We wait an additional 60 seconds after the pod is Running because the diff --git a/test/e2e/windows/hyperv.go b/test/e2e/windows/hyperv.go index f066e24dedb..1b304936f35 100644 --- a/test/e2e/windows/hyperv.go +++ b/test/e2e/windows/hyperv.go @@ -95,7 +95,7 @@ var _ = sigDescribe(feature.WindowsHyperVContainers, "HyperV containers", skipUn pc.Create(ctx, hypervPod) ginkgo.By("waiting for the pod to be running") timeout := 3 * time.Minute - e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout) + e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout) ginkgo.By("creating a host process container in another pod to verify the pod is running hyperv isolated containers") diff --git a/test/e2e/windows/kubelet_stats.go b/test/e2e/windows/kubelet_stats.go index 3570bb01daf..64ac0a8e59e 100644 --- a/test/e2e/windows/kubelet_stats.go +++ b/test/e2e/windows/kubelet_stats.go @@ -60,7 +60,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", framework.WithSerial(), sk ginkgo.By("Waiting up to 3 minutes for pods to be running") timeout := 3 * time.Minute - err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, 0, timeout) + err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, timeout) framework.ExpectNoError(err) ginkgo.By("Getting kubelet stats 5 times and checking average duration") @@ -152,7 +152,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", skipUnlessWindows(func() { ginkgo.By("Waiting up to 3 minutes for pods to be running") timeout := 3 * time.Minute - err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, timeout) + err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, timeout) framework.ExpectNoError(err) ginkgo.By("Getting kubelet stats 1 time")