diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 34aae78e93d..01ecd6304bd 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -1871,7 +1871,10 @@ func countReadyPods(pods []*v1.Pod) int32 { // PodReplacementPolicy controls when we recreate pods if they are marked as terminating // Failed means that we recreate only once the pod has terminated. func onlyReplaceFailedPods(job *batch.Job) bool { - if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && *job.Spec.PodReplacementPolicy == batch.Failed { + // We check both PodReplacementPolicy for nil and failed + // because it is possible that `PodReplacementPolicy` is not defaulted, + // when the `JobPodReplacementPolicy` feature gate is disabled for API server. + if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && job.Spec.PodReplacementPolicy != nil && *job.Spec.PodReplacementPolicy == batch.Failed { return true } return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index 79b637e8af6..683ff84c0bc 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -361,6 +361,19 @@ func TestControllerSyncJob(t *testing.T) { expectedDeletions: 1, expectedFailed: 1, }, + "WQ job: turn on PodReplacementPolicy but not set PodReplacementPolicy": { + parallelism: 1, + completions: 1, + backoffLimit: 6, + activePods: 1, + failedPods: 1, + jobPodReplacementPolicy: true, + expectedTerminating: ptr.To[int32](1), + terminatingPods: 1, + expectedActive: 1, + expectedPodPatches: 2, + expectedFailed: 2, + }, "WQ job: recreate pods when terminating or failed": { parallelism: 1, completions: -1,