mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-09 03:57:41 +00:00
Merge pull request #124205 from mkarrmann/wait-for-pods-e2e-cleanup-111092
chore/refactor(e2e tests): Solidify Contract for and Cleanup WaitForPodsRunningReady
This commit is contained in:
commit
cb3bd5bc41
@ -38,7 +38,7 @@ import (
|
||||
var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
|
||||
f := framework.NewDefaultFramework("node-lease-test")
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||
var systemPodsNo int32
|
||||
var systemPodsNo int
|
||||
var c clientset.Interface
|
||||
var ns string
|
||||
var group string
|
||||
@ -49,7 +49,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
|
||||
ns = f.Namespace.Name
|
||||
systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
|
||||
framework.ExpectNoError(err)
|
||||
systemPodsNo = int32(len(systemPods))
|
||||
systemPodsNo = len(systemPods)
|
||||
if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
|
||||
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
} else {
|
||||
@ -98,7 +98,7 @@ var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
|
||||
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
|
||||
// the cluster is restored to health.
|
||||
ginkgo.By("waiting for system pods to successfully restart")
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
|
@ -47,7 +47,7 @@ func resizeRC(ctx context.Context, c clientset.Interface, ns, name string, repli
|
||||
var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
|
||||
f := framework.NewDefaultFramework("resize-nodes")
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||
var systemPodsNo int32
|
||||
var systemPodsNo int
|
||||
var c clientset.Interface
|
||||
var ns string
|
||||
var group string
|
||||
@ -57,7 +57,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
|
||||
ns = f.Namespace.Name
|
||||
systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
|
||||
framework.ExpectNoError(err)
|
||||
systemPodsNo = int32(len(systemPods))
|
||||
systemPodsNo = len(systemPods)
|
||||
if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
|
||||
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
} else {
|
||||
@ -99,7 +99,7 @@ var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
|
||||
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
|
||||
// the cluster is restored to health.
|
||||
ginkgo.By("waiting for system pods to successfully restart")
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
})
|
||||
|
@ -612,7 +612,7 @@ done
|
||||
})
|
||||
|
||||
// verify pods are running and ready
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Shutdown pod. Readiness should change to false
|
||||
@ -694,7 +694,7 @@ done
|
||||
})
|
||||
|
||||
// verify pods are running and ready
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Shutdown pod. Readiness should change to false
|
||||
@ -1359,7 +1359,7 @@ done
|
||||
})
|
||||
|
||||
// verify pods are running and ready
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Shutdown pod. Readiness should change to false
|
||||
@ -1452,7 +1452,7 @@ done
|
||||
})
|
||||
|
||||
// verify pods are running and ready
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Shutdown pod. Readiness should change to false
|
||||
|
@ -873,7 +873,7 @@ var _ = SIGDescribe("Pods", func() {
|
||||
|
||||
// wait as required for all 3 pods to be running
|
||||
ginkgo.By("waiting for all 3 pods to be running")
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, f.Timeouts.PodStart)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, f.Timeouts.PodStart)
|
||||
framework.ExpectNoError(err, "3 pods not found running.")
|
||||
|
||||
// delete Collection of pods with a label in the current namespace
|
||||
|
@ -226,7 +226,7 @@ func setupSuite(ctx context.Context) {
|
||||
// #41007. To avoid those pods preventing the whole test runs (and just
|
||||
// wasting the whole run), we allow for some not-ready pods (with the
|
||||
// number equal to the number of allowed not-ready nodes).
|
||||
if err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), timeouts.SystemPodsStartup); err != nil {
|
||||
if err := e2epod.WaitForAlmostAllPodsReady(ctx, c, metav1.NamespaceSystem, framework.TestContext.MinStartupPods, framework.TestContext.AllowedNotReadyNodes, timeouts.SystemPodsStartup); err != nil {
|
||||
e2edebug.DumpAllNamespaceInfo(ctx, c, metav1.NamespaceSystem)
|
||||
e2ekubectl.LogFailedContainers(ctx, c, metav1.NamespaceSystem, framework.Logf)
|
||||
framework.Failf("Error waiting for all pods to be running and ready: %v", err)
|
||||
|
@ -99,17 +99,22 @@ func BeInPhase(phase v1.PodPhase) types.GomegaMatcher {
|
||||
}).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase)
|
||||
}
|
||||
|
||||
// WaitForPodsRunningReady waits up to timeout to ensure that all pods in
|
||||
// namespace ns are either running and ready, or failed but controlled by a
|
||||
// controller. Also, it ensures that at least minPods are running and
|
||||
// ready. It has separate behavior from other 'wait for' pods functions in
|
||||
// that it requests the list of pods on every iteration. This is useful, for
|
||||
// example, in cluster startup, because the number of pods increases while
|
||||
// waiting. All pods that are in SUCCESS state are not counted.
|
||||
// WaitForAlmostAllReady waits up to timeout for the following conditions:
|
||||
// 1. At least minPods Pods in Namespace ns are Running and Ready
|
||||
// 2. All Pods in Namespace ns are either Ready or Succeeded
|
||||
// 3. All Pods part of a ReplicaSet or ReplicationController in Namespace ns are Ready
|
||||
//
|
||||
// After the timeout has elapsed, an error is returned if the number of Pods in a Pending Phase
|
||||
// is greater than allowedNotReadyPods.
|
||||
//
|
||||
// It is generally recommended to use WaitForPodsRunningReady instead of this function
|
||||
// whenever possible, because its behavior is more intuitive. Similar to WaitForPodsRunningReady,
|
||||
// this function requests the list of pods on every iteration, making it useful for situations
|
||||
// where the set of Pods is likely changing, such as during cluster startup.
|
||||
//
|
||||
// If minPods or allowedNotReadyPods are -1, this method returns immediately
|
||||
// without waiting.
|
||||
func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error {
|
||||
func WaitForAlmostAllPodsReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int, timeout time.Duration) error {
|
||||
if minPods == -1 || allowedNotReadyPods == -1 {
|
||||
return nil
|
||||
}
|
||||
@ -126,14 +131,12 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
|
||||
Pods []v1.Pod
|
||||
}
|
||||
|
||||
// notReady is -1 for any failure other than a timeout.
|
||||
// Otherwise it is the number of pods that we were still
|
||||
// waiting for.
|
||||
notReady := int32(-1)
|
||||
nOk := 0
|
||||
badPods := []v1.Pod{}
|
||||
otherPods := []v1.Pod{}
|
||||
succeededPods := []string{}
|
||||
|
||||
err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) {
|
||||
// Reset notReady at the start of a poll attempt.
|
||||
notReady = -1
|
||||
|
||||
rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
@ -163,11 +166,10 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
|
||||
replicaOk += rs.Status.ReadyReplicas
|
||||
}
|
||||
|
||||
nOk := int32(0)
|
||||
notReady = int32(0)
|
||||
failedPods := []v1.Pod{}
|
||||
otherPods := []v1.Pod{}
|
||||
succeededPods := []string{}
|
||||
nOk = 0
|
||||
badPods = []v1.Pod{}
|
||||
otherPods = []v1.Pod{}
|
||||
succeededPods = []string{}
|
||||
for _, pod := range s.Pods {
|
||||
res, err := testutils.PodRunningReady(&pod)
|
||||
switch {
|
||||
@ -179,14 +181,13 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
|
||||
case pod.Status.Phase == v1.PodFailed:
|
||||
// ignore failed pods that are controlled by some controller
|
||||
if metav1.GetControllerOf(&pod) == nil {
|
||||
failedPods = append(failedPods, pod)
|
||||
badPods = append(badPods, pod)
|
||||
}
|
||||
default:
|
||||
notReady++
|
||||
otherPods = append(otherPods, pod)
|
||||
}
|
||||
}
|
||||
done := replicaOk == replicas && nOk >= minPods && (len(failedPods)+len(otherPods)) == 0
|
||||
done := replicaOk == replicas && nOk >= minPods && (len(badPods)+len(otherPods)) == 0
|
||||
if done {
|
||||
return nil, nil
|
||||
}
|
||||
@ -200,8 +201,8 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
|
||||
if len(succeededPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
|
||||
}
|
||||
if len(failedPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(failedPods, 1)))
|
||||
if len(badPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1)))
|
||||
}
|
||||
if len(otherPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
|
||||
@ -211,13 +212,79 @@ func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns stri
|
||||
}))
|
||||
|
||||
// An error might not be fatal.
|
||||
if err != nil && notReady >= 0 && notReady <= allowedNotReadyPods {
|
||||
framework.Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
|
||||
if len(otherPods) <= allowedNotReadyPods {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// WaitForPodsRunningReady waits up to timeout for the following conditions:
|
||||
// 1. At least minPods Pods in Namespace ns are Running and Ready
|
||||
// 2. No Pods in Namespace ns are Failed and not owned by a controller or Pending
|
||||
//
|
||||
// An error is returned if either of these conditions are not met within the timeout.
|
||||
//
|
||||
// It has separate behavior from other 'wait for' pods functions in
|
||||
// that it requests the list of pods on every iteration. This is useful, for
|
||||
// example, in cluster startup, because the number of pods increases while
|
||||
// waiting. All pods that are in SUCCESS state are not counted.
|
||||
func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods int, timeout time.Duration) error {
|
||||
|
||||
return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) ([]v1.Pod, error) {
|
||||
|
||||
podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
|
||||
}
|
||||
return podList.Items, nil
|
||||
})).WithTimeout(timeout).Should(framework.MakeMatcher(func(pods []v1.Pod) (func() string, error) {
|
||||
|
||||
nOk := 0
|
||||
badPods := []v1.Pod{}
|
||||
otherPods := []v1.Pod{}
|
||||
succeededPods := []string{}
|
||||
|
||||
for _, pod := range pods {
|
||||
res, err := testutils.PodRunningReady(&pod)
|
||||
switch {
|
||||
case res && err == nil:
|
||||
nOk++
|
||||
case pod.Status.Phase == v1.PodSucceeded:
|
||||
// ignore succeeded pods
|
||||
succeededPods = append(succeededPods, pod.Name)
|
||||
case pod.Status.Phase == v1.PodFailed:
|
||||
// ignore failed pods that are controlled by some controller
|
||||
if metav1.GetControllerOf(&pod) == nil {
|
||||
badPods = append(badPods, pod)
|
||||
}
|
||||
default:
|
||||
otherPods = append(otherPods, pod)
|
||||
}
|
||||
}
|
||||
if nOk >= minPods && len(badPods)+len(otherPods) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Delayed formatting of a failure message.
|
||||
return func() string {
|
||||
var buffer strings.Builder
|
||||
buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready \n", minPods, ns))
|
||||
buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(pods)))
|
||||
if len(succeededPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
|
||||
}
|
||||
if len(badPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1)))
|
||||
}
|
||||
if len(otherPods) > 0 {
|
||||
buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
|
||||
}
|
||||
return buffer.String()
|
||||
}, nil
|
||||
}))
|
||||
|
||||
}
|
||||
|
||||
// WaitForPodCondition waits a pods to be matched to the given condition.
|
||||
// The condition callback may use gomega.StopTrying to abort early.
|
||||
func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error {
|
||||
|
@ -109,7 +109,7 @@ var _ = SIGDescribe("SchedulerPriorities", framework.WithSerial(), func() {
|
||||
|
||||
err = framework.CheckTestingNSDeletedExcept(ctx, cs, ns)
|
||||
framework.ExpectNoError(err)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, cs, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// skip if the most utilized node has less than the cri-o minMemLimit available
|
||||
|
@ -657,7 +657,8 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio
|
||||
|
||||
ginkgo.By("Waiting for the pod to start running")
|
||||
timeout := 3 * time.Minute
|
||||
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Getting container stats for pod")
|
||||
statsChecked := false
|
||||
@ -711,7 +712,8 @@ var _ = sigDescribe(feature.WindowsHostProcessContainers, "[MinimumKubeletVersio
|
||||
pc.Create(ctx, pod)
|
||||
|
||||
ginkgo.By("Waiting for pod to run")
|
||||
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, 3*time.Minute)
|
||||
err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 3*time.Minute)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Waiting for 60 seconds")
|
||||
// We wait an additional 60 seconds after the pod is Running because the
|
||||
|
@ -95,7 +95,8 @@ var _ = sigDescribe(feature.WindowsHyperVContainers, "HyperV containers", skipUn
|
||||
pc.Create(ctx, hypervPod)
|
||||
ginkgo.By("waiting for the pod to be running")
|
||||
timeout := 3 * time.Minute
|
||||
e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, timeout)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, timeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("creating a host process container in another pod to verify the pod is running hyperv isolated containers")
|
||||
|
||||
|
@ -60,7 +60,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", framework.WithSerial(), sk
|
||||
|
||||
ginkgo.By("Waiting up to 3 minutes for pods to be running")
|
||||
timeout := 3 * time.Minute
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, 0, timeout)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 10, timeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Getting kubelet stats 5 times and checking average duration")
|
||||
@ -152,7 +152,7 @@ var _ = sigDescribe(feature.Windows, "Kubelet-Stats", skipUnlessWindows(func() {
|
||||
|
||||
ginkgo.By("Waiting up to 3 minutes for pods to be running")
|
||||
timeout := 3 * time.Minute
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, 0, timeout)
|
||||
err = e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 3, timeout)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Getting kubelet stats 1 time")
|
||||
|
Loading…
Reference in New Issue
Block a user