Refactor and simplify WaitForPodsRunningReady; update callers to use new interface

This commit is contained in:
Matt Karrmann 2024-04-13 18:52:27 -05:00
parent 272a055a46
commit bcf42255bb
9 changed files with 28 additions and 61 deletions

View File

@ -38,7 +38,7 @@ import (
var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
f := framework.NewDefaultFramework("node-lease-test")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
var systemPodsNo int32
var systemPodsNo int
var c clientset.Interface
var ns string
var group string
@ -48,7 +48,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
ns = f.Namespace.Name
systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
framework.ExpectNoError(err)
systemPodsNo = int32(len(systemPods))
systemPodsNo = len(systemPods)
if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
} else {
@ -97,7 +97,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
// the cluster is restored to health.
ginkgo.By("waiting for system pods to successfully restart")
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
framework.ExpectNoError(err)
})

View File

@ -47,7 +47,7 @@ func resizeRC(ctx context.Context, c clientset.Interface, ns, name string, repli
var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
f := framework.NewDefaultFramework("resize-nodes")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
var systemPodsNo int32
var systemPodsNo int
var c clientset.Interface
var ns string
var group string
@ -57,7 +57,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
ns = f.Namespace.Name
systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
framework.ExpectNoError(err)
systemPodsNo = int32(len(systemPods))
systemPodsNo = len(systemPods)
if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
} else {
@ -99,7 +99,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
// the cluster is restored to health.
ginkgo.By("waiting for system pods to successfully restart")
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
framework.ExpectNoError(err)
})
})

View File

@ -612,7 +612,7 @@ done
})
// verify pods are running and ready
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
framework.ExpectNoError(err)
// Shutdown pod. Readiness should change to false
@ -694,7 +694,7 @@ done
})
// verify pods are running and ready
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
framework.ExpectNoError(err)
// Shutdown pod. Readiness should change to false
@ -1359,7 +1359,7 @@ done
})
// verify pods are running and ready
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
framework.ExpectNoError(err)
// Shutdown pod. Readiness should change to false
@ -1452,7 +1452,7 @@ done
})
// verify pods are running and ready
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
framework.ExpectNoError(err)
// Shutdown pod. Readiness should change to false

View File

@ -873,7 +873,7 @@ var _ = SIGDescribe("Pods", func() {
// wait as required for all 3 pods to be running
ginkgo.By("waiting for all 3 pods to be running")
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, f.Timeouts.PodStart)
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, f.Timeouts.PodStart)
framework.ExpectNoError(err, "3 pods not found running.")
// delete Collection of pods with a label in the current namespace

View File

@ -226,66 +226,35 @@ func WaitForAlmostAllPodsReady(ctx context.Context, c clientset.Interface, ns st
// WaitForPodsRunningReady waits up to timeout for the following conditions:
// 1. At least minPods Pods in Namespace ns are Running and Ready
// 2. No more than allowedNotReadyPods Pods in Namespace ns are not either
// Ready, Succeeded, or Failed with a Controller.
// 2. No Pods in Namespace ns are not either Ready, Succeeded, or Failed with a Controller
//
// # An error is returned if either of these conditions are not met within the timeout
// An error is returned if either of these conditions are not met within the timeout.
//
// It has separate behavior from other 'wait for' pods functions in
// that it requests the list of pods on every iteration. This is useful, for
// example, in cluster startup, because the number of pods increases while
// waiting. All pods that are in SUCCESS state are not counted.
//
// If minPods or allowedNotReadyPods are -1, this method returns immediately
// without waiting.
func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error {
if minPods == -1 || allowedNotReadyPods == -1 {
return nil
}
func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods int, timeout time.Duration) error {
// We get the new list of pods, replication controllers, and replica
// sets in every iteration because more pods come online during startup
// and we want to ensure they are also checked.
//
// This struct gets populated while polling, then gets checked, and in
// case of a timeout is included in the failure message.
type state struct {
ReplicationControllers []v1.ReplicationController
ReplicaSets []appsv1.ReplicaSet
Pods []v1.Pod
}
nOk := int32(0)
nOk := 0
badPods := []v1.Pod{}
otherPods := []v1.Pod{}
succeededPods := []string{}
return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) {
return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) ([]v1.Pod, error) {
rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err)
}
rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err)
}
podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
}
return &state{
ReplicationControllers: rcList.Items,
ReplicaSets: rsList.Items,
Pods: podList.Items,
}, nil
})).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) {
return podList.Items, nil
})).WithTimeout(timeout).Should(framework.MakeMatcher(func(pods []v1.Pod) (func() string, error) {
nOk = 0
badPods = []v1.Pod{}
otherPods = []v1.Pod{}
succeededPods = []string{}
for _, pod := range s.Pods {
for _, pod := range pods {
res, err := testutils.PodRunningReady(&pod)
switch {
case res && err == nil:
@ -295,8 +264,6 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
succeededPods = append(succeededPods, pod.Name)
case pod.Status.Phase == v1.PodFailed:
// ignore failed pods that are controlled by some controller
// TODO either document why failures with controllers are allowed while
// failures without controllers are not, or remove this check
if metav1.GetControllerOf(&pod) == nil {
badPods = append(badPods, pod)
}
@ -304,15 +271,15 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
otherPods = append(otherPods, pod)
}
}
if nOk >= minPods && len(badPods)+len(otherPods) <= int(allowedNotReadyPods) {
if nOk >= minPods && len(badPods)+len(otherPods) <= 0 {
return nil, nil
}
// Delayed formatting of a failure message.
return func() string {
var buffer strings.Builder
buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods))
buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods)))
buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready \n", minPods, ns))
buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(pods)))
if len(succeededPods) > 0 {
buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
}

View File

@ -109,7 +109,7 @@ var _ = SIGDescribe("SchedulerPriorities", framework.WithSerial(), func() {
err = framework.CheckTestingNSDeletedExcept(ctx, cs, ns)
framework.ExpectNoError(err)
err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout)
err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
framework.ExpectNoError(err)
// skip if the most utilized node has less than the cri-o minMemLimit available

View File

@ -657,7 +657,7 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio
ginkgo.By("Waiting for the pod to start running")
timeout := 3 * time.Minute
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout)
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout)
ginkgo.By("Getting container stats for pod")
statsChecked := false
@ -711,7 +711,7 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio
pc.Create(ctx, pod)
ginkgo.By("Waiting for pod to run")
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, 3*time.Minute)
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 3*time.Minute)
ginkgo.By("Waiting for 60 seconds")
// We wait an additional 60 seconds after the pod is Running because the

View File

@ -95,7 +95,7 @@ var _ = sigDescribe(feature.WindowsHyperVContainers, "HyperV containers", skipUn
pc.Create(ctx, hypervPod)
ginkgo.By("waiting for the pod to be running")
timeout := 3 * time.Minute
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout)
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout)
ginkgo.By("creating a host process container in another pod to verify the pod is running hyperv isolated containers")

View File

@ -60,7 +60,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", framework.WithSerial(), sk
ginkgo.By("Waiting up to 3 minutes for pods to be running")
timeout := 3 * time.Minute
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, 0, timeout)
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, timeout)
framework.ExpectNoError(err)
ginkgo.By("Getting kubelet stats 5 times and checking average duration")
@ -152,7 +152,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", skipUnlessWindows(func() {
ginkgo.By("Waiting up to 3 minutes for pods to be running")
timeout := 3 * time.Minute
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, timeout)
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, timeout)
framework.ExpectNoError(err)
ginkgo.By("Getting kubelet stats 1 time")