Merge pull request #125985 from kaisoz/fix-failureTarget-manually-added

job_controller: Ignore FailureTarget JobCondition with Status != True
This commit is contained in:
Kubernetes Prow Robot 2024-07-10 09:02:59 -07:00 committed by GitHub
commit 1608dc2b09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 5 deletions

View File

@ -845,7 +845,8 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
// Given that the Job already has the SuccessCriteriaMet condition, the termination condition already had confirmed in another cycle.
// So, the job-controller evaluates the podFailurePolicy only when the Job doesn't have the SuccessCriteriaMet condition.
if jobCtx.finishedCondition == nil && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget)
if failureTargetCondition != nil && failureTargetCondition.Status == v1.ConditionTrue {
jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
} else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil {
// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.

View File

@ -232,10 +232,11 @@ func setPodsStatusesWithIndexes(podIndexer cache.Indexer, job *batch.Job, status
}
type jobInitialStatus struct {
active int
succeed int
failed int
startTime *time.Time
active int
succeed int
failed int
startTime *time.Time
conditions []batch.JobCondition
}
func TestControllerSyncJob(t *testing.T) {
@ -1154,6 +1155,38 @@ func TestControllerSyncJob(t *testing.T) {
podIndexLabelDisabled: true,
expectedReady: ptr.To[int32](0),
},
"FailureTarget=False condition added manually is ignored": {
jobPodFailurePolicy: true,
parallelism: 1,
completions: 1,
activePods: 1,
readyPods: 1,
initialStatus: &jobInitialStatus{
active: 1,
startTime: func() *time.Time {
now := time.Now()
return &now
}(),
conditions: []batch.JobCondition{
{
Type: batch.JobFailureTarget,
Status: v1.ConditionFalse,
Reason: "ConditionAddedManually",
Message: "Testing",
},
},
},
expectedActive: 1,
expectedReady: ptr.To[int32](1),
expectedConditions: []batch.JobCondition{
{
Type: batch.JobFailureTarget,
Status: v1.ConditionFalse,
Reason: "ConditionAddedManually",
Message: "Testing",
},
},
},
}
for name, tc := range testCases {
@ -1197,6 +1230,7 @@ func TestControllerSyncJob(t *testing.T) {
startTime := metav1.NewTime(*tc.initialStatus.startTime)
job.Status.StartTime = &startTime
}
job.Status.Conditions = append(job.Status.Conditions, tc.initialStatus.conditions...)
}
key, err := controller.KeyFunc(job)