mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-13 21:25:09 +00:00
Ensure pods both running and ready before starting e2e tests
This commit is contained in:
@@ -22,7 +22,9 @@ import (
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
@@ -33,6 +35,28 @@ import (
|
||||
"github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
const (
|
||||
// podStartupTimeout is the time to allow all pods in the cluster to become
|
||||
// running and ready before any e2e tests run. It includes pulling all of
|
||||
// the pods (as of 5/18/15 this is 8 pods).
|
||||
podStartupTimeout = 10 * time.Minute
|
||||
|
||||
// minStartupPods is the minimum number of pods that will allow
|
||||
// wiatForPodsRunningReady(...) to succeed. More verbosely, that function
|
||||
// checks that all pods in the cluster are both in a phase of "running" and
|
||||
// have a condition of "ready": "true". It aims to ensure that the cluster's
|
||||
// pods are fully healthy before beginning e2e tests. However, if there were
|
||||
// only 0 pods, it would technically pass if there wasn't a required minimum
|
||||
// number of pods. We expect every cluster to come up with some number of
|
||||
// pods (which in practice is more than this number), so we have this
|
||||
// minimum here as a sanity check to make sure that there are actually pods
|
||||
// on the cluster (i.e. preventing a possible race with kube-addons). This
|
||||
// does *not* mean that the function will succeed as soon as minStartupPods
|
||||
// are found to be running and ready; it ensures that *all* pods it finds
|
||||
// are running and ready. This is the minimum number it must find.
|
||||
minStartupPods = 1
|
||||
)
|
||||
|
||||
var (
|
||||
cloudConfig = &testContext.CloudConfig
|
||||
|
||||
@@ -92,6 +116,15 @@ func TestE2E(t *testing.T) {
|
||||
}
|
||||
|
||||
gomega.RegisterFailHandler(ginkgo.Fail)
|
||||
|
||||
// Ensure all pods are running and ready before starting tests (otherwise,
|
||||
// cluster infrastructure pods that are being pulled or started can block
|
||||
// test pods from running, and tests that ensure all pods are running and
|
||||
// ready will fail).
|
||||
if err := waitForPodsRunningReady(api.NamespaceDefault, minStartupPods, podStartupTimeout); err != nil {
|
||||
glog.Fatalf("Error waiting for all pods to be running and ready: %v", err)
|
||||
}
|
||||
|
||||
// Run tests through the Ginkgo runner with output to console + JUnit for Jenkins
|
||||
var r []ginkgo.Reporter
|
||||
if *reportDir != "" {
|
||||
|
@@ -191,25 +191,6 @@ func rebootNode(c *client.Client, provider, name string, result chan bool) {
|
||||
result <- true
|
||||
}
|
||||
|
||||
// podRunningReady is the checker function passed to waitForPodCondition(...)
|
||||
// (found in util.go). It ensures that the pods' phase is running and that the
|
||||
// ready condition is true.
|
||||
func podRunningReady(p *api.Pod) (bool, error) {
|
||||
// Check the phase is running.
|
||||
if p.Status.Phase != api.PodRunning {
|
||||
return false, fmt.Errorf("want pod %s on %s to be %v but was %v",
|
||||
p.ObjectMeta.Name, p.Spec.Host, api.PodRunning, p.Status.Phase)
|
||||
}
|
||||
// Check the ready condition is true.
|
||||
for _, cond := range p.Status.Conditions {
|
||||
if cond.Type == api.PodReady && cond.Status == api.ConditionTrue {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("pod %s on %s didn't have condition %v, %v; conditions: %v",
|
||||
p.ObjectMeta.Name, p.Spec.Host, api.PodReady, api.ConditionTrue, p.Status.Conditions)
|
||||
}
|
||||
|
||||
// checkPodsRunning returns whether all pods whose names are listed in podNames
|
||||
// are running.
|
||||
func checkPodsRunning(c *client.Client, podNames []string, timeout time.Duration) bool {
|
||||
|
@@ -106,6 +106,83 @@ func providerIs(providers ...string) bool {
|
||||
|
||||
type podCondition func(pod *api.Pod) (bool, error)
|
||||
|
||||
// podReady returns whether pod has a condition of Ready with a status of true.
|
||||
func podReady(pod *api.Pod) bool {
|
||||
for _, cond := range pod.Status.Conditions {
|
||||
if cond.Type == api.PodReady && cond.Status == api.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// logPodStates logs all pod states for debugging.
|
||||
func logPodStates(c *client.Client, ns string) {
|
||||
podList, err := c.Pods(ns).List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
Logf("Error getting pods for logPodStates(...): %v", err)
|
||||
return
|
||||
}
|
||||
Logf("Phase and conditions for all pods in namespace '%s':", ns)
|
||||
for _, pod := range podList.Items {
|
||||
Logf("- pod '%s' on '%s' has phase '%v' and conditions %v",
|
||||
pod.ObjectMeta.Name, pod.Spec.Host, pod.Status.Phase, pod.Status.Conditions)
|
||||
}
|
||||
}
|
||||
|
||||
// podRunningReady checks whether pod p's phase is running and it has a ready
|
||||
// condition of status true.
|
||||
func podRunningReady(p *api.Pod) (bool, error) {
|
||||
// Check the phase is running.
|
||||
if p.Status.Phase != api.PodRunning {
|
||||
return false, fmt.Errorf("want pod '%s' on '%s' to be '%v' but was '%v'",
|
||||
p.ObjectMeta.Name, p.Spec.Host, api.PodRunning, p.Status.Phase)
|
||||
}
|
||||
// Check the ready condition is true.
|
||||
if !podReady(p) {
|
||||
return false, fmt.Errorf("pod '%s' on '%s' didn't have condition {%v %v}; conditions: %v",
|
||||
p.ObjectMeta.Name, p.Spec.Host, api.PodReady, api.ConditionTrue, p.Status.Conditions)
|
||||
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// waitForPodsRunningReady waits up to timeout to ensure that all pods in
|
||||
// namespace ns are running and ready, requiring that it finds at least minPods.
|
||||
// It has separate behavior from other 'wait for' pods functions in that it re-
|
||||
// queries the list of pods on every iteration. This is useful, for example, in
|
||||
// cluster startup, because the number of pods increases while waiting.
|
||||
func waitForPodsRunningReady(ns string, minPods int, timeout time.Duration) error {
|
||||
c, err := loadClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
|
||||
timeout, minPods, ns)
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(podPoll) {
|
||||
// We get the new list of pods in every iteration beause more pods come
|
||||
// online during startup and we want to ensure they are also checked.
|
||||
podList, err := c.Pods(ns).List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
Logf("Error getting pods in namespace '%s': %v", ns, err)
|
||||
continue
|
||||
}
|
||||
nOk := 0
|
||||
for _, pod := range podList.Items {
|
||||
if res, err := podRunningReady(&pod); res && err == nil {
|
||||
nOk++
|
||||
}
|
||||
}
|
||||
Logf("%d / %d pods in namespace '%s' are running and ready (%v elapsed)",
|
||||
nOk, len(podList.Items), ns, time.Since(start))
|
||||
if nOk == len(podList.Items) && nOk >= minPods {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
logPodStates(c, ns)
|
||||
return fmt.Errorf("Not all pods in namespace '%s' running and ready within %v", ns, timeout)
|
||||
}
|
||||
|
||||
func waitForPodCondition(c *client.Client, ns, podName, desc string, poll, timeout time.Duration, condition podCondition) error {
|
||||
Logf("Waiting up to %v for pod %s status to be %s", timeout, podName, desc)
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
|
||||
@@ -118,9 +195,10 @@ func waitForPodCondition(c *client.Client, ns, podName, desc string, poll, timeo
|
||||
if done {
|
||||
return err
|
||||
}
|
||||
Logf("Waiting for pod %s in namespace %s status to be %q (found %q) (%v)", podName, ns, desc, pod.Status.Phase, time.Since(start))
|
||||
Logf("Waiting for pod '%s' in namespace '%s' status to be '%q' (found phase: '%q', readiness: %t) (%v)",
|
||||
podName, ns, desc, pod.Status.Phase, podReady(pod), time.Since(start))
|
||||
}
|
||||
return fmt.Errorf("gave up waiting for pod %s to be %s after %v", podName, desc, timeout)
|
||||
return fmt.Errorf("gave up waiting for pod '%s' to be '%s' after %v", podName, desc, timeout)
|
||||
}
|
||||
|
||||
// createNS should be used by every test, note that we append a common prefix to the provided test name.
|
||||
@@ -150,7 +228,7 @@ func waitForPodRunning(c *client.Client, podName string) error {
|
||||
func waitForPodNotPending(c *client.Client, ns, podName string) error {
|
||||
return waitForPodCondition(c, ns, podName, "!pending", podPoll, podStartTimeout, func(pod *api.Pod) (bool, error) {
|
||||
if pod.Status.Phase != api.PodPending {
|
||||
Logf("Saw pod %s in namespace %s out of pending state (found %q)", podName, ns, pod.Status.Phase)
|
||||
Logf("Saw pod '%s' in namespace '%s' out of pending state (found '%q')", podName, ns, pod.Status.Phase)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
@@ -163,17 +241,17 @@ func waitForPodSuccessInNamespace(c *client.Client, podName string, contName str
|
||||
// Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632
|
||||
ci, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, contName)
|
||||
if !ok {
|
||||
Logf("No Status.Info for container %s in pod %s yet", contName, podName)
|
||||
Logf("No Status.Info for container '%s' in pod '%s' yet", contName, podName)
|
||||
} else {
|
||||
if ci.State.Termination != nil {
|
||||
if ci.State.Termination.ExitCode == 0 {
|
||||
By("Saw pod success")
|
||||
return true, nil
|
||||
} else {
|
||||
return true, fmt.Errorf("pod %s terminated with failure: %+v", podName, ci.State.Termination)
|
||||
return true, fmt.Errorf("pod '%s' terminated with failure: %+v", podName, ci.State.Termination)
|
||||
}
|
||||
} else {
|
||||
Logf("Nil State.Termination for container %s in pod %s in namespace %s so far", contName, podName, namespace)
|
||||
Logf("Nil State.Termination for container '%s' in pod '%s' in namespace '%s' so far", contName, podName, namespace)
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
|
Reference in New Issue
Block a user