Number of failed jobs should exceed the backoff limit and not big equal.

Remove patch in e2e test of backoff limit due to usage of NumRequeues
2025-09-04 10:47:25 +00:00 · 2020-08-11 11:06:09 +03:00
parent ca420ddada
commit 24010022ef
3 changed files with 10 additions and 16 deletions
--- a/pkg/controller/job/job_controller.go
+++ b/pkg/controller/job/job_controller.go
@@ -503,7 +503,7 @@ func (jm *Controller) syncJob(key string) (bool, error) {
 	// is different than parallelism, otherwise the previous controller loop
 	// failed updating status so even if we pick up failure it is not a new one
 	exceedsBackoffLimit := jobHaveNewFailure && (active != *job.Spec.Parallelism) &&
-		(failed >= *job.Spec.BackoffLimit)
+		(failed > *job.Spec.BackoffLimit)

 	if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
 		// check if the number of pod restart exceeds backoff (for restart OnFailure only)
--- a/pkg/controller/job/job_controller_test.go
+++ b/pkg/controller/job/job_controller_test.go
@@ -1552,23 +1552,23 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) {
 		},
 		"not enough failures with backoffLimit 1 - single pod": {
 			1, 1, 1,
-			v1.PodPending, 1, 0,
-			false, true, 1, 0, 0, nil, "",
+			"", 0, 1,
+			true, false, 1, 0, 1, nil, "",
 		},
 		"too many failures with backoffLimit 1 - single pod": {
 			1, 1, 1,
-			"", 0, 1,
-			false, true, 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
+			"", 0, 2,
+			false, true, 0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded",
 		},
 		"not enough failures with backoffLimit 6 - multiple pods": {
 			2, 2, 6,
-			v1.PodRunning, 1, 5,
-			true, false, 2, 0, 5, nil, "",
+			v1.PodRunning, 1, 6,
+			true, false, 2, 0, 6, nil, "",
 		},
 		"too many failures with backoffLimit 6 - multiple pods": {
 			2, 2, 6,
-			"", 0, 6,
-			false, true, 0, 0, 6, &jobConditionFailed, "BackoffLimitExceeded",
+			"", 0, 7,
+			false, true, 0, 0, 7, &jobConditionFailed, "BackoffLimitExceeded",
 		},
 	}

--- a/test/e2e/apps/job.go
+++ b/test/e2e/apps/job.go
@@ -246,13 +246,7 @@ var _ = SIGDescribe("Job", func() {
 		ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
 		pods, err := e2ejob.GetJobPods(f.ClientSet, f.Namespace.Name, job.Name)
 		framework.ExpectNoError(err, "failed to get PodList for job %s in namespace: %s", job.Name, f.Namespace.Name)
-		// gomega.Expect(pods.Items).To(gomega.HaveLen(backoff + 1))
-		// due to NumRequeus not being stable enough, especially with failed status
-		// updates we need to allow more than backoff+1
-		// TODO revert this back to above when https://github.com/kubernetes/kubernetes/issues/64787 gets fixed
-		if len(pods.Items) < backoff+1 {
-			framework.Failf("Not enough pod created expected at least %d, got %#v", backoff+1, pods.Items)
-		}
+		gomega.Expect(pods.Items).To(gomega.HaveLen(backoff + 1))
 		for _, pod := range pods.Items {
 			framework.ExpectEqual(pod.Status.Phase, v1.PodFailed)
 		}