From 96b04bfeacd09cbdf8e31537c51d1fa0455e3a06 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 9 May 2019 21:35:02 -0700 Subject: [PATCH] test/e2e/upgrades/apps/job: List Pods in failure message Currently, this test can fail with the not-very-helpful [1,2]: fail [k8s.io/kubernetes/test/e2e/upgrades/apps/job.go:58]: Expected : false to be true Since this test is the only CheckForAllJobPodsRunning consumer, and has been since CheckForAllJobPodsRunning landed in 116eda0909 (Implements an upgrade test for Job, 2017-02-22, #41271), this commit refactors the function to EnsureJobPodsRunning, dropping the opaque boolean, and constructing a useful error summarizing the divergence from the expected parallelism and the status of listed Pods. Thanks to Maciej Szulik for the fixups [3] :). [1]: https://storage.googleapis.com/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-upgrade/1434/build-log.txt [2]: https://bugzilla.redhat.com/show_bug.cgi?id=1708454#c0 [3]: https://github.com/wking/kubernetes/pull/1 --- test/e2e/framework/job/wait.go | 18 +++++++++++++----- test/e2e/upgrades/apps/job.go | 3 +-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/test/e2e/framework/job/wait.go b/test/e2e/framework/job/wait.go index d74509e6cb7..8df3b60cd6c 100644 --- a/test/e2e/framework/job/wait.go +++ b/test/e2e/framework/job/wait.go @@ -17,6 +17,8 @@ limitations under the License. package job import ( + "fmt" + "strings" "time" batchv1 "k8s.io/api/batch/v1" @@ -99,22 +101,28 @@ func WaitForJobGone(c clientset.Interface, ns, jobName string, timeout time.Dura }) } -// CheckForAllJobPodsRunning uses c to check in the Job named jobName in ns is running. If the returned error is not -// nil the returned bool is true if the Job is running. -func CheckForAllJobPodsRunning(c clientset.Interface, ns, jobName string, parallelism int32) (bool, error) { +// EnsureAllJobPodsRunning uses c to check in the Job named jobName in ns +// is running, returning an error if the expected parallelism is not +// satisfied. +func EnsureAllJobPodsRunning(c clientset.Interface, ns, jobName string, parallelism int32) error { label := labels.SelectorFromSet(labels.Set(map[string]string{JobSelectorKey: jobName})) options := metav1.ListOptions{LabelSelector: label.String()} pods, err := c.CoreV1().Pods(ns).List(options) if err != nil { - return false, err + return err } + podsSummary := make([]string, 0, parallelism) count := int32(0) for _, p := range pods.Items { if p.Status.Phase == v1.PodRunning { count++ } + podsSummary = append(podsSummary, fmt.Sprintf("%s (%s: %s)", p.ObjectMeta.Name, p.Status.Phase, p.Status.Message)) } - return count == parallelism, nil + if count != parallelism { + return fmt.Errorf("job has %d of %d expected running pods: %s", count, parallelism, strings.Join(podsSummary, ", ")) + } + return nil } // WaitForAllJobPodsGone waits for all pods for the Job named jobName in namespace ns diff --git a/test/e2e/upgrades/apps/job.go b/test/e2e/upgrades/apps/job.go index 15eacd96356..cd836e75c05 100644 --- a/test/e2e/upgrades/apps/job.go +++ b/test/e2e/upgrades/apps/job.go @@ -55,9 +55,8 @@ func (t *JobUpgradeTest) Setup(f *framework.Framework) { func (t *JobUpgradeTest) Test(f *framework.Framework, done <-chan struct{}, upgrade upgrades.UpgradeType) { <-done ginkgo.By("Ensuring active pods == parallelism") - running, err := jobutil.CheckForAllJobPodsRunning(f.ClientSet, t.namespace, t.job.Name, 2) + err := jobutil.EnsureAllJobPodsRunning(f.ClientSet, t.namespace, t.job.Name, 2) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(running).To(gomega.BeTrue()) } // Teardown cleans up any remaining resources.