fix possible panic if pod replacement policy is turned on and jobs do not set pod replacement policy

This commit is contained in:
Kevin Hannon 2023-10-05 13:13:09 -04:00
parent 755644a169
commit d7ee6b9d1b
2 changed files with 17 additions and 1 deletions

View File

@ -1871,7 +1871,10 @@ func countReadyPods(pods []*v1.Pod) int32 {
// PodReplacementPolicy controls when we recreate pods if they are marked as terminating
// Failed means that we recreate only once the pod has terminated.
func onlyReplaceFailedPods(job *batch.Job) bool {
if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && *job.Spec.PodReplacementPolicy == batch.Failed {
// We check both PodReplacementPolicy for nil and failed
// because it is possible that `PodReplacementPolicy` is not defaulted,
// when the `JobPodReplacementPolicy` feature gate is disabled for API server.
if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && job.Spec.PodReplacementPolicy != nil && *job.Spec.PodReplacementPolicy == batch.Failed {
return true
}
return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil

View File

@ -361,6 +361,19 @@ func TestControllerSyncJob(t *testing.T) {
expectedDeletions: 1,
expectedFailed: 1,
},
"WQ job: turn on PodReplacementPolicy but not set PodReplacementPolicy": {
parallelism: 1,
completions: 1,
backoffLimit: 6,
activePods: 1,
failedPods: 1,
jobPodReplacementPolicy: true,
expectedTerminating: ptr.To[int32](1),
terminatingPods: 1,
expectedActive: 1,
expectedPodPatches: 2,
expectedFailed: 2,
},
"WQ job: recreate pods when terminating or failed": {
parallelism: 1,
completions: -1,