diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 2dce2d773f7..68eb105c97d 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -1875,7 +1875,10 @@ func countReadyPods(pods []*v1.Pod) int32 { // PodReplacementPolicy controls when we recreate pods if they are marked as terminating // Failed means that we recreate only once the pod has terminated. func onlyReplaceFailedPods(job *batch.Job) bool { - if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && *job.Spec.PodReplacementPolicy == batch.Failed { + // We check both PodReplacementPolicy for nil and failed + // because it is possible that `PodReplacementPolicy` is not defaulted, + // when the `JobPodReplacementPolicy` feature gate is disabled for API server. + if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && job.Spec.PodReplacementPolicy != nil && *job.Spec.PodReplacementPolicy == batch.Failed { return true } return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index 01b9222ad31..0d8dded06da 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -366,6 +366,19 @@ func TestControllerSyncJob(t *testing.T) { expectedDeletions: 1, expectedFailed: 1, }, + "WQ job: turn on PodReplacementPolicy but not set PodReplacementPolicy": { + parallelism: 1, + completions: 1, + backoffLimit: 6, + activePods: 1, + failedPods: 1, + jobPodReplacementPolicy: true, + expectedTerminating: ptr.To[int32](1), + terminatingPods: 1, + expectedActive: 1, + expectedPodPatches: 2, + expectedFailed: 2, + }, "WQ job: recreate pods when terminating or failed": { parallelism: 1, completions: -1,