Number of failed jobs should exceed the backoff limit and not big equal.

Remove patch in e2e test of backoff limit due to usage of NumRequeues
This commit is contained in:
yodarshafrir1 2020-08-11 11:06:09 +03:00
parent ca420ddada
commit 24010022ef
3 changed files with 10 additions and 16 deletions

View File

@ -503,7 +503,7 @@ func (jm *Controller) syncJob(key string) (bool, error) {
// is different than parallelism, otherwise the previous controller loop
// failed updating status so even if we pick up failure it is not a new one
exceedsBackoffLimit := jobHaveNewFailure && (active != *job.Spec.Parallelism) &&
(failed >= *job.Spec.BackoffLimit)
(failed > *job.Spec.BackoffLimit)
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
// check if the number of pod restart exceeds backoff (for restart OnFailure only)

View File

@ -1552,23 +1552,23 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) {
},
"not enough failures with backoffLimit 1 - single pod": {
1, 1, 1,
v1.PodPending, 1, 0,
false, true, 1, 0, 0, nil, "",
"", 0, 1,
true, false, 1, 0, 1, nil, "",
},
"too many failures with backoffLimit 1 - single pod": {
1, 1, 1,
"", 0, 1,
false, true, 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
"", 0, 2,
false, true, 0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded",
},
"not enough failures with backoffLimit 6 - multiple pods": {
2, 2, 6,
v1.PodRunning, 1, 5,
true, false, 2, 0, 5, nil, "",
v1.PodRunning, 1, 6,
true, false, 2, 0, 6, nil, "",
},
"too many failures with backoffLimit 6 - multiple pods": {
2, 2, 6,
"", 0, 6,
false, true, 0, 0, 6, &jobConditionFailed, "BackoffLimitExceeded",
"", 0, 7,
false, true, 0, 0, 7, &jobConditionFailed, "BackoffLimitExceeded",
},
}

View File

@ -246,13 +246,7 @@ var _ = SIGDescribe("Job", func() {
ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
pods, err := e2ejob.GetJobPods(f.ClientSet, f.Namespace.Name, job.Name)
framework.ExpectNoError(err, "failed to get PodList for job %s in namespace: %s", job.Name, f.Namespace.Name)
// gomega.Expect(pods.Items).To(gomega.HaveLen(backoff + 1))
// due to NumRequeus not being stable enough, especially with failed status
// updates we need to allow more than backoff+1
// TODO revert this back to above when https://github.com/kubernetes/kubernetes/issues/64787 gets fixed
if len(pods.Items) < backoff+1 {
framework.Failf("Not enough pod created expected at least %d, got %#v", backoff+1, pods.Items)
}
gomega.Expect(pods.Items).To(gomega.HaveLen(backoff + 1))
for _, pod := range pods.Items {
framework.ExpectEqual(pod.Status.Phase, v1.PodFailed)
}