diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index e3cecd9fc2d..665c726f77b 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -631,16 +631,15 @@ func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool { result := int32(0) for i := range pods { po := pods[i] - if po.Status.Phase != v1.PodRunning { - continue - } - for j := range po.Status.InitContainerStatuses { - stat := po.Status.InitContainerStatuses[j] - result += stat.RestartCount - } - for j := range po.Status.ContainerStatuses { - stat := po.Status.ContainerStatuses[j] - result += stat.RestartCount + if po.Status.Phase == v1.PodRunning || po.Status.Phase == v1.PodPending { + for j := range po.Status.InitContainerStatuses { + stat := po.Status.InitContainerStatuses[j] + result += stat.RestartCount + } + for j := range po.Status.ContainerStatuses { + stat := po.Status.ContainerStatuses[j] + result += stat.RestartCount + } } } if *job.Spec.BackoffLimit == 0 { diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index 8be2deed3e9..bc5a493cefb 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -1414,6 +1414,7 @@ func TestJobBackoffForOnFailure(t *testing.T) { // pod setup jobKeyForget bool restartCounts []int32 + podPhase v1.PodPhase // expectations expectedActive int32 @@ -1424,32 +1425,47 @@ func TestJobBackoffForOnFailure(t *testing.T) { }{ "backoffLimit 0 should have 1 pod active": { 1, 1, 0, - true, []int32{0}, + true, []int32{0}, v1.PodRunning, 1, 0, 0, nil, "", }, "backoffLimit 1 with restartCount 0 should have 1 pod active": { 1, 1, 1, - true, []int32{0}, + true, []int32{0}, v1.PodRunning, 1, 0, 0, nil, "", }, - "backoffLimit 1 with restartCount 1 should have 0 pod active": { + "backoffLimit 1 with restartCount 1 and podRunning should have 0 pod active": { 1, 1, 1, - true, []int32{1}, + true, []int32{1}, v1.PodRunning, 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded", }, - "too many job failures - single pod": { + "backoffLimit 1 with restartCount 1 and podPending should have 0 pod active": { + 1, 1, 1, + true, []int32{1}, v1.PodPending, + 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded", + }, + "too many job failures with podRunning - single pod": { 1, 5, 2, - true, []int32{2}, + true, []int32{2}, v1.PodRunning, 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded", }, - "too many job failures - multiple pods": { + "too many job failures with podPending - single pod": { + 1, 5, 2, + true, []int32{2}, v1.PodPending, + 0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded", + }, + "too many job failures with podRunning - multiple pods": { 2, 5, 2, - true, []int32{1, 1}, + true, []int32{1, 1}, v1.PodRunning, + 0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded", + }, + "too many job failures with podPending - multiple pods": { + 2, 5, 2, + true, []int32{1, 1}, v1.PodPending, 0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded", }, "not enough failures": { 2, 5, 3, - true, []int32{1, 1}, + true, []int32{1, 1}, v1.PodRunning, 2, 0, 0, nil, "", }, } @@ -1474,7 +1490,7 @@ func TestJobBackoffForOnFailure(t *testing.T) { job.Spec.Template.Spec.RestartPolicy = v1.RestartPolicyOnFailure sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) podIndexer := sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer() - for i, pod := range newPodList(int32(len(tc.restartCounts)), v1.PodRunning, job) { + for i, pod := range newPodList(int32(len(tc.restartCounts)), tc.podPhase, job) { pod.Status.ContainerStatuses = []v1.ContainerStatus{{RestartCount: tc.restartCounts[i]}} podIndexer.Add(&pod) }