mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 00:57:27 +00:00
Merge pull request #125510 from mimowo/extend-job-conditions
Delay setting terminal Job conditions until all pods are terminal
This commit is contained in:
commit
0a3330d6c9
@ -518,6 +518,16 @@ func validateJobStatus(job *batch.Job, fldPath *field.Path, opts JobStatusValida
|
|||||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("completionTime"), status.CompletionTime, "completionTime cannot be set before startTime"))
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("completionTime"), status.CompletionTime, "completionTime cannot be set before startTime"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if opts.RejectFailedJobWithoutFailureTarget {
|
||||||
|
if IsJobFailed(job) && !isJobFailureTarget(job) {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set Failed=True condition without the FailureTarget=true condition"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if opts.RejectCompleteJobWithoutSuccessCriteriaMet {
|
||||||
|
if IsJobComplete(job) && !isJobSuccessCriteriaMet(job) {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set Complete=True condition without the SuccessCriteriaMet=true condition"))
|
||||||
|
}
|
||||||
|
}
|
||||||
isJobFinished := IsJobFinished(job)
|
isJobFinished := IsJobFinished(job)
|
||||||
if opts.RejectFinishedJobWithActivePods {
|
if opts.RejectFinishedJobWithActivePods {
|
||||||
if status.Active > 0 && isJobFinished {
|
if status.Active > 0 && isJobFinished {
|
||||||
@ -568,7 +578,17 @@ func validateJobStatus(job *batch.Job, fldPath *field.Path, opts JobStatusValida
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ptr.Deref(job.Spec.CompletionMode, batch.NonIndexedCompletion) != batch.IndexedCompletion && isJobSuccessCriteriaMet(job) {
|
if opts.RejectFinishedJobWithTerminatingPods {
|
||||||
|
if status.Terminating != nil && *status.Terminating > 0 && isJobFinished {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("terminating"), status.Terminating, "terminating>0 is invalid for finished job"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if opts.RejectMoreReadyThanActivePods {
|
||||||
|
if status.Ready != nil && *status.Ready > status.Active {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("ready"), *status.Ready, "cannot set more ready pods than active"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !opts.AllowForSuccessCriteriaMetInExtendedScope && ptr.Deref(job.Spec.CompletionMode, batch.NonIndexedCompletion) != batch.IndexedCompletion && isJobSuccessCriteriaMet(job) {
|
||||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet to NonIndexed Job"))
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet to NonIndexed Job"))
|
||||||
}
|
}
|
||||||
if isJobSuccessCriteriaMet(job) && IsJobFailed(job) {
|
if isJobSuccessCriteriaMet(job) && IsJobFailed(job) {
|
||||||
@ -577,7 +597,7 @@ func validateJobStatus(job *batch.Job, fldPath *field.Path, opts JobStatusValida
|
|||||||
if isJobSuccessCriteriaMet(job) && isJobFailureTarget(job) {
|
if isJobSuccessCriteriaMet(job) && isJobFailureTarget(job) {
|
||||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet=True and FailureTarget=true conditions"))
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet=True and FailureTarget=true conditions"))
|
||||||
}
|
}
|
||||||
if job.Spec.SuccessPolicy == nil && isJobSuccessCriteriaMet(job) {
|
if !opts.AllowForSuccessCriteriaMetInExtendedScope && job.Spec.SuccessPolicy == nil && isJobSuccessCriteriaMet(job) {
|
||||||
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet=True for Job without SuccessPolicy"))
|
allErrs = append(allErrs, field.Invalid(fldPath.Child("conditions"), field.OmitValueType{}, "cannot set SuccessCriteriaMet=True for Job without SuccessPolicy"))
|
||||||
}
|
}
|
||||||
if job.Spec.SuccessPolicy != nil && !isJobSuccessCriteriaMet(job) && IsJobComplete(job) {
|
if job.Spec.SuccessPolicy != nil && !isJobSuccessCriteriaMet(job) && IsJobComplete(job) {
|
||||||
@ -1005,6 +1025,8 @@ type JobStatusValidationOptions struct {
|
|||||||
RejectFailedIndexesOverlappingCompleted bool
|
RejectFailedIndexesOverlappingCompleted bool
|
||||||
RejectCompletedIndexesForNonIndexedJob bool
|
RejectCompletedIndexesForNonIndexedJob bool
|
||||||
RejectFailedIndexesForNoBackoffLimitPerIndex bool
|
RejectFailedIndexesForNoBackoffLimitPerIndex bool
|
||||||
|
RejectFailedJobWithoutFailureTarget bool
|
||||||
|
RejectCompleteJobWithoutSuccessCriteriaMet bool
|
||||||
RejectFinishedJobWithActivePods bool
|
RejectFinishedJobWithActivePods bool
|
||||||
RejectFinishedJobWithoutStartTime bool
|
RejectFinishedJobWithoutStartTime bool
|
||||||
RejectFinishedJobWithUncountedTerminatedPods bool
|
RejectFinishedJobWithUncountedTerminatedPods bool
|
||||||
@ -1015,4 +1037,7 @@ type JobStatusValidationOptions struct {
|
|||||||
RejectNotCompleteJobWithCompletionTime bool
|
RejectNotCompleteJobWithCompletionTime bool
|
||||||
RejectCompleteJobWithFailedCondition bool
|
RejectCompleteJobWithFailedCondition bool
|
||||||
RejectCompleteJobWithFailureTargetCondition bool
|
RejectCompleteJobWithFailureTargetCondition bool
|
||||||
|
AllowForSuccessCriteriaMetInExtendedScope bool
|
||||||
|
RejectMoreReadyThanActivePods bool
|
||||||
|
RejectFinishedJobWithTerminatingPods bool
|
||||||
}
|
}
|
||||||
|
@ -844,7 +844,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
|
|
||||||
// Given that the Job already has the SuccessCriteriaMet condition, the termination condition already had confirmed in another cycle.
|
// Given that the Job already has the SuccessCriteriaMet condition, the termination condition already had confirmed in another cycle.
|
||||||
// So, the job-controller evaluates the podFailurePolicy only when the Job doesn't have the SuccessCriteriaMet condition.
|
// So, the job-controller evaluates the podFailurePolicy only when the Job doesn't have the SuccessCriteriaMet condition.
|
||||||
if jobCtx.finishedCondition == nil && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
|
if jobCtx.finishedCondition == nil && (feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) || delayTerminalCondition()) {
|
||||||
failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget)
|
failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget)
|
||||||
if failureTargetCondition != nil && failureTargetCondition.Status == v1.ConditionTrue {
|
if failureTargetCondition != nil && failureTargetCondition.Status == v1.ConditionTrue {
|
||||||
jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
|
jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
|
||||||
@ -857,9 +857,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
|
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
|
||||||
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
|
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
|
||||||
// OR if the number of failed jobs increased since the last syncJob
|
// OR if the number of failed jobs increased since the last syncJob
|
||||||
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonBackoffLimitExceeded, "Job has reached the specified backoff limit", jm.clock.Now())
|
jobCtx.finishedCondition = jm.newFailureCondition(batch.JobReasonBackoffLimitExceeded, "Job has reached the specified backoff limit")
|
||||||
} else if jm.pastActiveDeadline(&job) {
|
} else if jm.pastActiveDeadline(&job) {
|
||||||
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonDeadlineExceeded, "Job was active longer than specified deadline", jm.clock.Now())
|
jobCtx.finishedCondition = jm.newFailureCondition(batch.JobReasonDeadlineExceeded, "Job was active longer than specified deadline")
|
||||||
} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
|
} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
|
||||||
syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
|
syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
|
||||||
logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
|
logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
|
||||||
@ -874,9 +874,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
|
jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
|
||||||
if jobCtx.finishedCondition == nil {
|
if jobCtx.finishedCondition == nil {
|
||||||
if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
|
if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
|
||||||
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonMaxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
|
jobCtx.finishedCondition = jm.newFailureCondition(batch.JobReasonMaxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes")
|
||||||
} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
|
} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
|
||||||
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonFailedIndexes, "Job has failed indexes", jm.clock.Now())
|
jobCtx.finishedCondition = jm.newFailureCondition(batch.JobReasonFailedIndexes, "Job has failed indexes")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
|
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
|
||||||
@ -925,7 +925,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
complete = jobCtx.succeeded >= *job.Spec.Completions && active == 0
|
complete = jobCtx.succeeded >= *job.Spec.Completions && active == 0
|
||||||
}
|
}
|
||||||
if complete {
|
if complete {
|
||||||
jobCtx.finishedCondition = newCondition(batch.JobComplete, v1.ConditionTrue, "", "", jm.clock.Now())
|
jobCtx.finishedCondition = jm.newSuccessCondition()
|
||||||
} else if manageJobCalled {
|
} else if manageJobCalled {
|
||||||
// Update the conditions / emit events only if manageJob was called in
|
// Update the conditions / emit events only if manageJob was called in
|
||||||
// this syncJob. Otherwise wait for the right syncJob call to make
|
// this syncJob. Otherwise wait for the right syncJob call to make
|
||||||
@ -975,6 +975,27 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
return manageJobErr
|
return manageJobErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (jm *Controller) newFailureCondition(reason, message string) *batch.JobCondition {
|
||||||
|
cType := batch.JobFailed
|
||||||
|
if delayTerminalCondition() {
|
||||||
|
cType = batch.JobFailureTarget
|
||||||
|
}
|
||||||
|
return newCondition(cType, v1.ConditionTrue, reason, message, jm.clock.Now())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jm *Controller) newSuccessCondition() *batch.JobCondition {
|
||||||
|
cType := batch.JobComplete
|
||||||
|
if delayTerminalCondition() {
|
||||||
|
cType = batch.JobSuccessCriteriaMet
|
||||||
|
}
|
||||||
|
return newCondition(cType, v1.ConditionTrue, "", "", jm.clock.Now())
|
||||||
|
}
|
||||||
|
|
||||||
|
func delayTerminalCondition() bool {
|
||||||
|
return feature.DefaultFeatureGate.Enabled(features.JobManagedBy) ||
|
||||||
|
feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy)
|
||||||
|
}
|
||||||
|
|
||||||
// deleteActivePods issues deletion for active Pods, preserving finalizers.
|
// deleteActivePods issues deletion for active Pods, preserving finalizers.
|
||||||
// This is done through DELETE calls that set deletion timestamps.
|
// This is done through DELETE calls that set deletion timestamps.
|
||||||
// The method trackJobStatusAndRemoveFinalizers removes the finalizers, after
|
// The method trackJobStatusAndRemoveFinalizers removes the finalizers, after
|
||||||
@ -1165,7 +1186,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
needsFlush = true
|
needsFlush = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) || delayTerminalCondition() {
|
||||||
if jobCtx.finishedCondition != nil && jobCtx.finishedCondition.Type == batch.JobFailureTarget {
|
if jobCtx.finishedCondition != nil && jobCtx.finishedCondition.Type == batch.JobFailureTarget {
|
||||||
|
|
||||||
// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
|
// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
|
||||||
@ -1378,6 +1399,12 @@ func (jm *Controller) enactJobFinished(logger klog.Logger, jobCtx *syncJobCtx) b
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if delayTerminalCondition() {
|
||||||
|
if *jobCtx.terminating > 0 {
|
||||||
|
logger.V(4).Info("Delaying marking the Job as finished, because there are still terminating pod(s)", "job", klog.KObj(job), "condition", jobCtx.finishedCondition.Type, "count", *jobCtx.terminating)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
finishedCond := jobCtx.finishedCondition
|
finishedCond := jobCtx.finishedCondition
|
||||||
job.Status.Conditions, _ = ensureJobConditionStatus(job.Status.Conditions, finishedCond.Type, finishedCond.Status, finishedCond.Reason, finishedCond.Message, jm.clock.Now())
|
job.Status.Conditions, _ = ensureJobConditionStatus(job.Status.Conditions, finishedCond.Type, finishedCond.Status, finishedCond.Reason, finishedCond.Message, jm.clock.Now())
|
||||||
if finishedCond.Type == batch.JobComplete {
|
if finishedCond.Type == batch.JobComplete {
|
||||||
@ -1964,12 +1991,17 @@ func countReadyPods(pods []*v1.Pod) int32 {
|
|||||||
|
|
||||||
// trackTerminatingPods checks if the count of terminating pods is tracked.
|
// trackTerminatingPods checks if the count of terminating pods is tracked.
|
||||||
// They are tracked when any the following is true:
|
// They are tracked when any the following is true:
|
||||||
// - JobPodReplacementPolicy is enabled to be returned in the status field,
|
// - JobPodReplacementPolicy is enabled to be returned in the status field;
|
||||||
// - only failed pods are replaced, because pod failure policy is used
|
// and to delay setting the Job terminal condition,
|
||||||
|
// - JobManagedBy is enabled to delay setting Job terminal condition,
|
||||||
|
// - only failed pods are replaced, because pod failure policy is used
|
||||||
func trackTerminatingPods(job *batch.Job) bool {
|
func trackTerminatingPods(job *batch.Job) bool {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.JobManagedBy) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil
|
return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +292,7 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
jobPodReplacementPolicy bool
|
jobPodReplacementPolicy bool
|
||||||
jobPodFailurePolicy bool
|
jobPodFailurePolicy bool
|
||||||
jobSuccessPolicy bool
|
jobSuccessPolicy bool
|
||||||
|
jobManagedBy bool
|
||||||
}{
|
}{
|
||||||
"job start": {
|
"job start": {
|
||||||
parallelism: 2,
|
parallelism: 2,
|
||||||
@ -827,7 +828,7 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
expectedCompletedIdxs: "0",
|
expectedCompletedIdxs: "0",
|
||||||
expectedConditions: []batch.JobCondition{
|
expectedConditions: []batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailureTarget,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
Reason: batch.JobReasonBackoffLimitExceeded,
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
Message: "Job has reached the specified backoff limit",
|
Message: "Job has reached the specified backoff limit",
|
||||||
@ -1220,6 +1221,32 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"backoff limit exceeded; JobManagedBy enabled": {
|
||||||
|
jobManagedBy: true,
|
||||||
|
parallelism: 2,
|
||||||
|
completions: 3,
|
||||||
|
backoffLimit: 0,
|
||||||
|
completionMode: batch.IndexedCompletion,
|
||||||
|
podsWithIndexes: []indexPhase{
|
||||||
|
{"0", v1.PodSucceeded},
|
||||||
|
{"1", v1.PodFailed},
|
||||||
|
{"2", v1.PodRunning},
|
||||||
|
},
|
||||||
|
expectedSucceeded: 1,
|
||||||
|
expectedFailed: 2,
|
||||||
|
expectedCompletedIdxs: "0",
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedPodPatches: 3,
|
||||||
|
expectedReady: ptr.To[int32](0),
|
||||||
|
expectedDeletions: 1,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
@ -1229,6 +1256,7 @@ func TestControllerSyncJob(t *testing.T) {
|
|||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.jobPodReplacementPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.jobPodReplacementPolicy)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.jobPodFailurePolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.jobPodFailurePolicy)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.jobSuccessPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.jobSuccessPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.jobManagedBy)
|
||||||
// job manager setup
|
// job manager setup
|
||||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
|
|
||||||
@ -1546,6 +1574,7 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
|||||||
completedCond := newCondition(batch.JobComplete, v1.ConditionTrue, "", "", now)
|
completedCond := newCondition(batch.JobComplete, v1.ConditionTrue, "", "", now)
|
||||||
succeededCond := newCondition(batch.JobSuccessCriteriaMet, v1.ConditionTrue, "", "", minuteAgo)
|
succeededCond := newCondition(batch.JobSuccessCriteriaMet, v1.ConditionTrue, "", "", minuteAgo)
|
||||||
failedCond := newCondition(batch.JobFailed, v1.ConditionTrue, "", "", now)
|
failedCond := newCondition(batch.JobFailed, v1.ConditionTrue, "", "", now)
|
||||||
|
failureTargetCond := newCondition(batch.JobFailureTarget, v1.ConditionTrue, "", "", now)
|
||||||
indexedCompletion := batch.IndexedCompletion
|
indexedCompletion := batch.IndexedCompletion
|
||||||
mockErr := errors.New("mock error")
|
mockErr := errors.New("mock error")
|
||||||
cases := map[string]struct {
|
cases := map[string]struct {
|
||||||
@ -1565,6 +1594,8 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
|||||||
// features
|
// features
|
||||||
enableJobBackoffLimitPerIndex bool
|
enableJobBackoffLimitPerIndex bool
|
||||||
enableJobSuccessPolicy bool
|
enableJobSuccessPolicy bool
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
|
enableJobManagedBy bool
|
||||||
}{
|
}{
|
||||||
"no updates": {},
|
"no updates": {},
|
||||||
"new active": {
|
"new active": {
|
||||||
@ -2114,6 +2145,215 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
|||||||
},
|
},
|
||||||
wantFailedPodsMetric: 2,
|
wantFailedPodsMetric: 2,
|
||||||
},
|
},
|
||||||
|
"pod is terminal; JobFailed condition is set": {
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: failedCond,
|
||||||
|
wantRmFinalizers: 1,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*failedCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; counted as failed, but the JobFailed condition is delayed; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: failedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; counted as failed, but the JobFailed condition is delayed; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: failedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; counted as failed, JobFailed condition is not delayed; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: failedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*failureTargetCond, *failedCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; JobSuccessCriteriaMet, but JobComplete condition is delayed; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
Succeeded: []types.UID{"b"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: completedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Succeeded: 1,
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
wantSucceededPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; JobSuccessCriteriaMet, but JobComplete condition is delayed; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
Succeeded: []types.UID{"b"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: completedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Succeeded: 1,
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
wantSucceededPodsMetric: 1,
|
||||||
|
},
|
||||||
|
"pod is terminating; JobSuccessCriteriaMet, JobComplete condition is not delayed; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
enableJobSuccessPolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
Succeeded: []types.UID{"b"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []*v1.Pod{
|
||||||
|
buildPod().uid("a").phase(v1.PodRunning).deletionTimestamp().Pod,
|
||||||
|
},
|
||||||
|
finishedCond: completedCond,
|
||||||
|
wantStatusUpdates: []batch.JobStatus{
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Succeeded: 1,
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Succeeded: 1,
|
||||||
|
Failed: 1,
|
||||||
|
Conditions: []batch.JobCondition{*succeededCond, *completedCond},
|
||||||
|
CompletionTime: ptr.To(metav1.NewTime(now)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantFailedPodsMetric: 1,
|
||||||
|
wantSucceededPodsMetric: 1,
|
||||||
|
},
|
||||||
|
|
||||||
"indexed job with a failed pod with delayed finalizer removal; the pod is not counted": {
|
"indexed job with a failed pod with delayed finalizer removal; the pod is not counted": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
@ -2200,6 +2440,8 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
|||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
|
|
||||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
manager, _ := newControllerFromClientWithClock(ctx, t, clientSet, controller.NoResyncPeriodFunc, fakeClock)
|
manager, _ := newControllerFromClientWithClock(ctx, t, clientSet, controller.NoResyncPeriodFunc, fakeClock)
|
||||||
@ -2222,14 +2464,15 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
|||||||
expectedRmFinalizers: tc.expectedRmFinalizers,
|
expectedRmFinalizers: tc.expectedRmFinalizers,
|
||||||
finishedCondition: tc.finishedCond,
|
finishedCondition: tc.finishedCond,
|
||||||
}
|
}
|
||||||
|
jobCtx.activePods = controller.FilterActivePods(logger, tc.pods)
|
||||||
if isIndexedJob(job) {
|
if isIndexedJob(job) {
|
||||||
jobCtx.succeededIndexes = parseIndexesFromString(logger, job.Status.CompletedIndexes, int(*job.Spec.Completions))
|
jobCtx.succeededIndexes = parseIndexesFromString(logger, job.Status.CompletedIndexes, int(*job.Spec.Completions))
|
||||||
if tc.enableJobBackoffLimitPerIndex && job.Spec.BackoffLimitPerIndex != nil {
|
if tc.enableJobBackoffLimitPerIndex && job.Spec.BackoffLimitPerIndex != nil {
|
||||||
jobCtx.failedIndexes = calculateFailedIndexes(logger, job, tc.pods)
|
jobCtx.failedIndexes = calculateFailedIndexes(logger, job, tc.pods)
|
||||||
jobCtx.activePods = controller.FilterActivePods(logger, tc.pods)
|
|
||||||
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
|
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
jobCtx.terminating = ptr.To(controller.CountTerminatingPods(tc.pods))
|
||||||
|
|
||||||
err := manager.trackJobStatusAndRemoveFinalizers(ctx, jobCtx, tc.needsFlush)
|
err := manager.trackJobStatusAndRemoveFinalizers(ctx, jobCtx, tc.needsFlush)
|
||||||
if !errors.Is(err, tc.wantErr) {
|
if !errors.Is(err, tc.wantErr) {
|
||||||
@ -2287,6 +2530,10 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
|||||||
expectedSucceeded int32
|
expectedSucceeded int32
|
||||||
expectedFailed int32
|
expectedFailed int32
|
||||||
expectedConditions []batch.JobCondition
|
expectedConditions []batch.JobCondition
|
||||||
|
|
||||||
|
// features
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
|
enableJobManagedBy bool
|
||||||
}{
|
}{
|
||||||
"activeDeadlineSeconds less than single pod execution": {
|
"activeDeadlineSeconds less than single pod execution": {
|
||||||
parallelism: 1,
|
parallelism: 1,
|
||||||
@ -2373,10 +2620,79 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"activeDeadlineSeconds exceeded; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
parallelism: 1,
|
||||||
|
completions: 2,
|
||||||
|
activeDeadlineSeconds: 10,
|
||||||
|
startTime: 15,
|
||||||
|
backoffLimit: 6,
|
||||||
|
activePods: 1,
|
||||||
|
succeededPods: 1,
|
||||||
|
expectedDeletions: 1,
|
||||||
|
expectedSucceeded: 1,
|
||||||
|
expectedFailed: 1,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonDeadlineExceeded,
|
||||||
|
Message: "Job was active longer than specified deadline",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"activeDeadlineSeconds exceeded; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
|
||||||
|
parallelism: 1,
|
||||||
|
completions: 2,
|
||||||
|
activeDeadlineSeconds: 10,
|
||||||
|
startTime: 15,
|
||||||
|
backoffLimit: 6,
|
||||||
|
activePods: 1,
|
||||||
|
succeededPods: 1,
|
||||||
|
expectedDeletions: 1,
|
||||||
|
expectedSucceeded: 1,
|
||||||
|
expectedFailed: 1,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonDeadlineExceeded,
|
||||||
|
Message: "Job was active longer than specified deadline",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"activeDeadlineSeconds exceeded and backofflimit reached; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
|
||||||
|
parallelism: 1,
|
||||||
|
completions: 1,
|
||||||
|
activeDeadlineSeconds: 1,
|
||||||
|
startTime: 10,
|
||||||
|
failedPods: 1,
|
||||||
|
expectedFailed: 1,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
// job manager setup
|
// job manager setup
|
||||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
manager, sharedInformerFactory := newControllerFromClient(ctx, t, clientSet, controller.NoResyncPeriodFunc)
|
manager, sharedInformerFactory := newControllerFromClient(ctx, t, clientSet, controller.NoResyncPeriodFunc)
|
||||||
@ -3894,6 +4210,7 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
enableBackoffLimitPerIndex bool
|
enableBackoffLimitPerIndex bool
|
||||||
enableJobSuccessPolicy bool
|
enableJobSuccessPolicy bool
|
||||||
enableJobPodReplacementPolicy bool
|
enableJobPodReplacementPolicy bool
|
||||||
|
enableJobManagedBy bool
|
||||||
job batch.Job
|
job batch.Job
|
||||||
pods []v1.Pod
|
pods []v1.Pod
|
||||||
wantStatus batch.JobStatus
|
wantStatus batch.JobStatus
|
||||||
@ -3938,12 +4255,6 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
Reason: batch.JobReasonSuccessPolicy,
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
Message: "Matched rules at index 0",
|
Message: "Matched rules at index 0",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Type: batch.JobComplete,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
Reason: batch.JobReasonSuccessPolicy,
|
|
||||||
Message: "Matched rules at index 0",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -4044,12 +4355,6 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
Reason: batch.JobReasonSuccessPolicy,
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
Message: "Matched rules at index 0",
|
Message: "Matched rules at index 0",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Type: batch.JobComplete,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
Reason: batch.JobReasonSuccessPolicy,
|
|
||||||
Message: "Matched rules at index 0",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -4152,12 +4457,6 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
Reason: batch.JobReasonSuccessPolicy,
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
Message: "Matched rules at index 0",
|
Message: "Matched rules at index 0",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Type: batch.JobComplete,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
Reason: batch.JobReasonSuccessPolicy,
|
|
||||||
Message: "Matched rules at index 0",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -4219,12 +4518,8 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
// In the current mechanism, the job controller adds Complete condition to Job
|
|
||||||
// even if some running pods still remain.
|
|
||||||
// So, we need to revisit here before we graduate the JobSuccessPolicy to beta.
|
|
||||||
// TODO(#123775): A Job might finish with ready!=0
|
|
||||||
// REF: https://github.com/kubernetes/kubernetes/issues/123775
|
// REF: https://github.com/kubernetes/kubernetes/issues/123775
|
||||||
"job with successPolicy; jobPodReplacementPolicy feature enabled; job has SuccessCriteriaMet and Complete condition when job meets to successPolicy and some pods still are running": {
|
"job with successPolicy; jobPodReplacementPolicy feature enabled; job has SuccessCriteriaMet condition when job meets to successPolicy and some pods still are running": {
|
||||||
enableJobSuccessPolicy: true,
|
enableJobSuccessPolicy: true,
|
||||||
enableJobPodReplacementPolicy: true,
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
@ -4275,8 +4570,58 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
Reason: batch.JobReasonSuccessPolicy,
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
Message: "Matched rules at index 0",
|
Message: "Matched rules at index 0",
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// REF: https://github.com/kubernetes/kubernetes/issues/123775
|
||||||
|
"job with successPolicy; JobManagedBy feature enabled; job has SuccessCriteriaMet condition when job meets to successPolicy and some pods still are running": {
|
||||||
|
enableJobSuccessPolicy: true,
|
||||||
|
enableJobPodReplacementPolicy: false,
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: validTypeMeta,
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||||
|
Parallelism: ptr.To[int32](3),
|
||||||
|
Completions: ptr.To[int32](3),
|
||||||
|
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||||
|
BackoffLimitPerIndex: ptr.To[int32](3),
|
||||||
|
SuccessPolicy: &batch.SuccessPolicy{
|
||||||
|
Rules: []batch.SuccessPolicyRule{{
|
||||||
|
SucceededIndexes: ptr.To("0,1"),
|
||||||
|
SucceededCount: ptr.To[int32](1),
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
|
Message: "Matched rules at index 0",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
*buildPod().uid("a1").index("0").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("a2").index("1").phase(v1.PodRunning).trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("b").index("1").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("c").index("2").phase(v1.PodRunning).trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
wantStatus: batch.JobStatus{
|
||||||
|
Failed: 1,
|
||||||
|
Succeeded: 1,
|
||||||
|
Terminating: nil,
|
||||||
|
CompletedIndexes: "1",
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
Reason: batch.JobReasonSuccessPolicy,
|
Reason: batch.JobReasonSuccessPolicy,
|
||||||
Message: "Matched rules at index 0",
|
Message: "Matched rules at index 0",
|
||||||
@ -4339,12 +4684,6 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
Reason: batch.JobReasonPodFailurePolicy,
|
Reason: batch.JobReasonPodFailurePolicy,
|
||||||
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
|
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Type: batch.JobFailed,
|
|
||||||
Status: v1.ConditionTrue,
|
|
||||||
Reason: batch.JobReasonPodFailurePolicy,
|
|
||||||
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -4383,6 +4722,12 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
FailedIndexes: ptr.To("0"),
|
FailedIndexes: ptr.To("0"),
|
||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonFailedIndexes,
|
||||||
|
Message: "Job has failed indexes",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -4425,6 +4770,12 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
CompletedIndexes: "1",
|
CompletedIndexes: "1",
|
||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -4753,6 +5104,7 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
|
|||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableBackoffLimitPerIndex)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableBackoffLimitPerIndex)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
|
|
||||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
fakeClock := clocktesting.NewFakeClock(now)
|
fakeClock := clocktesting.NewFakeClock(now)
|
||||||
@ -4821,12 +5173,15 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
testCases := map[string]struct {
|
testCases := map[string]struct {
|
||||||
enableJobBackoffLimitPerIndex bool
|
enableJobBackoffLimitPerIndex bool
|
||||||
enableJobPodFailurePolicy bool
|
enableJobPodFailurePolicy bool
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
|
enableJobManagedBy bool
|
||||||
job batch.Job
|
job batch.Job
|
||||||
pods []v1.Pod
|
pods []v1.Pod
|
||||||
wantStatus batch.JobStatus
|
wantStatus batch.JobStatus
|
||||||
}{
|
}{
|
||||||
"successful job after a single failure within index": {
|
"successful job after a single failure within index": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -4853,6 +5208,10 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
FailedIndexes: ptr.To(""),
|
FailedIndexes: ptr.To(""),
|
||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -4862,6 +5221,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"single failed pod, not counted as the replacement pod creation is delayed": {
|
"single failed pod, not counted as the replacement pod creation is delayed": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -4887,6 +5247,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"single failed pod replaced already": {
|
"single failed pod replaced already": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -4914,6 +5275,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"single failed index due to exceeding the backoff limit per index, the job continues": {
|
"single failed index due to exceeding the backoff limit per index, the job continues": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -4941,6 +5303,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
"single failed index due to FailIndex action, the job continues": {
|
"single failed index due to FailIndex action, the job continues": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
enableJobPodFailurePolicy: true,
|
enableJobPodFailurePolicy: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -4990,6 +5353,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
"job failed index due to FailJob action": {
|
"job failed index due to FailJob action": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
enableJobPodFailurePolicy: true,
|
enableJobPodFailurePolicy: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5054,6 +5418,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
"job pod failure ignored due to matching Ignore action": {
|
"job pod failure ignored due to matching Ignore action": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
enableJobPodFailurePolicy: true,
|
enableJobPodFailurePolicy: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5103,6 +5468,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"job failed due to exceeding backoffLimit before backoffLimitPerIndex": {
|
"job failed due to exceeding backoffLimit before backoffLimitPerIndex": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5127,6 +5493,12 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
Terminating: ptr.To[int32](0),
|
Terminating: ptr.To[int32](0),
|
||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -5138,6 +5510,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"job failed due to failed indexes": {
|
"job failed due to failed indexes": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5163,6 +5536,12 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
CompletedIndexes: "1",
|
CompletedIndexes: "1",
|
||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonFailedIndexes,
|
||||||
|
Message: "Job has failed indexes",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -5174,6 +5553,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"job failed due to exceeding max failed indexes": {
|
"job failed due to exceeding max failed indexes": {
|
||||||
enableJobBackoffLimitPerIndex: true,
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5203,7 +5583,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailureTarget,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
Reason: batch.JobReasonMaxFailedIndexesExceeded,
|
Reason: batch.JobReasonMaxFailedIndexesExceeded,
|
||||||
Message: "Job has exceeded the specified maximal number of failed indexes",
|
Message: "Job has exceeded the specified maximal number of failed indexes",
|
||||||
@ -5213,6 +5593,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
},
|
},
|
||||||
"job with finished indexes; failedIndexes are cleaned when JobBackoffLimitPerIndex disabled": {
|
"job with finished indexes; failedIndexes are cleaned when JobBackoffLimitPerIndex disabled": {
|
||||||
enableJobBackoffLimitPerIndex: false,
|
enableJobBackoffLimitPerIndex: false,
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -5241,11 +5622,167 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"job failed due to failed indexes; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||||
|
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||||
|
BackoffLimitPerIndex: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
*buildPod().uid("a").index("0").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("b").index("1").phase(v1.PodSucceeded).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
wantStatus: batch.JobStatus{
|
||||||
|
Failed: 1,
|
||||||
|
Succeeded: 1,
|
||||||
|
FailedIndexes: ptr.To("0"),
|
||||||
|
CompletedIndexes: "1",
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonFailedIndexes,
|
||||||
|
Message: "Job has failed indexes",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job failed due to failed indexes; JobManagedBy enabled": {
|
||||||
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||||
|
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||||
|
BackoffLimitPerIndex: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
*buildPod().uid("a").index("0").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("b").index("1").phase(v1.PodSucceeded).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
wantStatus: batch.JobStatus{
|
||||||
|
Failed: 1,
|
||||||
|
Succeeded: 1,
|
||||||
|
FailedIndexes: ptr.To("0"),
|
||||||
|
CompletedIndexes: "1",
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonFailedIndexes,
|
||||||
|
Message: "Job has failed indexes",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonFailedIndexes,
|
||||||
|
Message: "Job has failed indexes",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job failed due to exceeding max failed indexes; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
Parallelism: ptr.To[int32](4),
|
||||||
|
Completions: ptr.To[int32](4),
|
||||||
|
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||||
|
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||||
|
BackoffLimitPerIndex: ptr.To[int32](1),
|
||||||
|
MaxFailedIndexes: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
*buildPod().uid("a").index("0").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("b").index("1").phase(v1.PodSucceeded).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("c").index("2").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("d").index("3").phase(v1.PodRunning).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
wantStatus: batch.JobStatus{
|
||||||
|
Failed: 3,
|
||||||
|
Succeeded: 1,
|
||||||
|
FailedIndexes: ptr.To("0,2"),
|
||||||
|
CompletedIndexes: "1",
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonMaxFailedIndexesExceeded,
|
||||||
|
Message: "Job has exceeded the specified maximal number of failed indexes",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job failed due to exceeding max failed indexes; JobManagedBy enabled": {
|
||||||
|
enableJobBackoffLimitPerIndex: true,
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
Parallelism: ptr.To[int32](4),
|
||||||
|
Completions: ptr.To[int32](4),
|
||||||
|
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||||
|
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||||
|
BackoffLimitPerIndex: ptr.To[int32](1),
|
||||||
|
MaxFailedIndexes: ptr.To[int32](1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
*buildPod().uid("a").index("0").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("b").index("1").phase(v1.PodSucceeded).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("c").index("2").phase(v1.PodFailed).indexFailureCount("1").trackingFinalizer().Pod,
|
||||||
|
*buildPod().uid("d").index("3").phase(v1.PodRunning).indexFailureCount("0").trackingFinalizer().Pod,
|
||||||
|
},
|
||||||
|
wantStatus: batch.JobStatus{
|
||||||
|
Failed: 3,
|
||||||
|
Succeeded: 1,
|
||||||
|
FailedIndexes: ptr.To("0,2"),
|
||||||
|
CompletedIndexes: "1",
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonMaxFailedIndexesExceeded,
|
||||||
|
Message: "Job has exceeded the specified maximal number of failed indexes",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
fakeClock := clocktesting.NewFakeClock(now)
|
fakeClock := clocktesting.NewFakeClock(now)
|
||||||
manager, sharedInformerFactory := newControllerFromClientWithClock(ctx, t, clientset, controller.NoResyncPeriodFunc, fakeClock)
|
manager, sharedInformerFactory := newControllerFromClientWithClock(ctx, t, clientset, controller.NoResyncPeriodFunc, fakeClock)
|
||||||
@ -6431,6 +6968,10 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
|||||||
expectedSucceeded int32
|
expectedSucceeded int32
|
||||||
expectedFailed int32
|
expectedFailed int32
|
||||||
expectedConditions []batch.JobCondition
|
expectedConditions []batch.JobCondition
|
||||||
|
|
||||||
|
// features
|
||||||
|
enableJobManagedBy bool
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
}{
|
}{
|
||||||
"backoffLimit 0 should have 1 pod active": {
|
"backoffLimit 0 should have 1 pod active": {
|
||||||
parallelism: 1,
|
parallelism: 1,
|
||||||
@ -6618,10 +7159,100 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"finished job; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
|
||||||
|
parallelism: 2,
|
||||||
|
completions: 4,
|
||||||
|
backoffLimit: 6,
|
||||||
|
suspend: true,
|
||||||
|
restartCounts: []int32{1, 1, 2, 0},
|
||||||
|
podPhase: v1.PodSucceeded,
|
||||||
|
expectedActive: 0,
|
||||||
|
expectedSucceeded: 4,
|
||||||
|
expectedFailed: 0,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobComplete,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"finished job; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
|
||||||
|
parallelism: 2,
|
||||||
|
completions: 4,
|
||||||
|
backoffLimit: 6,
|
||||||
|
suspend: true,
|
||||||
|
restartCounts: []int32{1, 1, 2, 0},
|
||||||
|
podPhase: v1.PodSucceeded,
|
||||||
|
expectedActive: 0,
|
||||||
|
expectedSucceeded: 4,
|
||||||
|
expectedFailed: 0,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobComplete,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"too many job failures with podRunning - multiple pods; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
|
||||||
|
parallelism: 2,
|
||||||
|
completions: 5,
|
||||||
|
backoffLimit: 2,
|
||||||
|
suspend: false,
|
||||||
|
restartCounts: []int32{1, 1},
|
||||||
|
podPhase: v1.PodRunning,
|
||||||
|
expectedActive: 0,
|
||||||
|
expectedSucceeded: 0,
|
||||||
|
expectedFailed: 2,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"too many job failures with podRunning - multiple pods; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
|
||||||
|
parallelism: 2,
|
||||||
|
completions: 5,
|
||||||
|
backoffLimit: 2,
|
||||||
|
suspend: false,
|
||||||
|
restartCounts: []int32{1, 1},
|
||||||
|
podPhase: v1.PodRunning,
|
||||||
|
expectedActive: 0,
|
||||||
|
expectedSucceeded: 0,
|
||||||
|
expectedFailed: 2,
|
||||||
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
// job manager setup
|
// job manager setup
|
||||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
manager, sharedInformerFactory := newControllerFromClient(ctx, t, clientset, controller.NoResyncPeriodFunc)
|
manager, sharedInformerFactory := newControllerFromClient(ctx, t, clientset, controller.NoResyncPeriodFunc)
|
||||||
@ -6723,6 +7354,12 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) {
|
|||||||
expectedSucceeded: 0,
|
expectedSucceeded: 0,
|
||||||
expectedFailed: 2,
|
expectedFailed: 2,
|
||||||
expectedConditions: []batch.JobCondition{
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
@ -6754,6 +7391,12 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) {
|
|||||||
expectedSucceeded: 0,
|
expectedSucceeded: 0,
|
||||||
expectedFailed: 7,
|
expectedFailed: 7,
|
||||||
expectedConditions: []batch.JobCondition{
|
expectedConditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batch.JobReasonBackoffLimitExceeded,
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
|
@ -50,7 +50,7 @@ func matchSuccessPolicy(logger klog.Logger, successPolicy *batch.SuccessPolicy,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func hasSuccessCriteriaMetCondition(job *batch.Job) *batch.JobCondition {
|
func hasSuccessCriteriaMetCondition(job *batch.Job) *batch.JobCondition {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) {
|
if feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) || delayTerminalCondition() {
|
||||||
successCriteriaMet := findConditionByType(job.Status.Conditions, batch.JobSuccessCriteriaMet)
|
successCriteriaMet := findConditionByType(job.Status.Conditions, batch.JobSuccessCriteriaMet)
|
||||||
if successCriteriaMet != nil && successCriteriaMet.Status == v1.ConditionTrue {
|
if successCriteriaMet != nil && successCriteriaMet.Status == v1.ConditionTrue {
|
||||||
return successCriteriaMet
|
return successCriteriaMet
|
||||||
@ -60,7 +60,7 @@ func hasSuccessCriteriaMetCondition(job *batch.Job) *batch.JobCondition {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isSuccessCriteriaMetCondition(cond *batch.JobCondition) bool {
|
func isSuccessCriteriaMetCondition(cond *batch.JobCondition) bool {
|
||||||
return feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) &&
|
return (feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) || delayTerminalCondition()) &&
|
||||||
cond != nil && cond.Type == batch.JobSuccessCriteriaMet && cond.Status == v1.ConditionTrue
|
cond != nil && cond.Type == batch.JobSuccessCriteriaMet && cond.Status == v1.ConditionTrue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -376,12 +376,15 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt
|
|||||||
isJobCompleteChanged := batchvalidation.IsJobComplete(oldJob) != batchvalidation.IsJobComplete(newJob)
|
isJobCompleteChanged := batchvalidation.IsJobComplete(oldJob) != batchvalidation.IsJobComplete(newJob)
|
||||||
isJobFailedChanged := batchvalidation.IsJobFailed(oldJob) != batchvalidation.IsJobFailed(newJob)
|
isJobFailedChanged := batchvalidation.IsJobFailed(oldJob) != batchvalidation.IsJobFailed(newJob)
|
||||||
isJobFailureTargetChanged := batchvalidation.IsConditionTrue(oldJob.Status.Conditions, batch.JobFailureTarget) != batchvalidation.IsConditionTrue(newJob.Status.Conditions, batch.JobFailureTarget)
|
isJobFailureTargetChanged := batchvalidation.IsConditionTrue(oldJob.Status.Conditions, batch.JobFailureTarget) != batchvalidation.IsConditionTrue(newJob.Status.Conditions, batch.JobFailureTarget)
|
||||||
|
isJobSuccessCriteriaMetChanged := batchvalidation.IsConditionTrue(oldJob.Status.Conditions, batch.JobSuccessCriteriaMet) != batchvalidation.IsConditionTrue(newJob.Status.Conditions, batch.JobSuccessCriteriaMet)
|
||||||
isCompletedIndexesChanged := oldJob.Status.CompletedIndexes != newJob.Status.CompletedIndexes
|
isCompletedIndexesChanged := oldJob.Status.CompletedIndexes != newJob.Status.CompletedIndexes
|
||||||
isFailedIndexesChanged := !ptr.Equal(oldJob.Status.FailedIndexes, newJob.Status.FailedIndexes)
|
isFailedIndexesChanged := !ptr.Equal(oldJob.Status.FailedIndexes, newJob.Status.FailedIndexes)
|
||||||
isActiveChanged := oldJob.Status.Active != newJob.Status.Active
|
isActiveChanged := oldJob.Status.Active != newJob.Status.Active
|
||||||
isStartTimeChanged := !ptr.Equal(oldJob.Status.StartTime, newJob.Status.StartTime)
|
isStartTimeChanged := !ptr.Equal(oldJob.Status.StartTime, newJob.Status.StartTime)
|
||||||
isCompletionTimeChanged := !ptr.Equal(oldJob.Status.CompletionTime, newJob.Status.CompletionTime)
|
isCompletionTimeChanged := !ptr.Equal(oldJob.Status.CompletionTime, newJob.Status.CompletionTime)
|
||||||
isUncountedTerminatedPodsChanged := !apiequality.Semantic.DeepEqual(oldJob.Status.UncountedTerminatedPods, newJob.Status.UncountedTerminatedPods)
|
isUncountedTerminatedPodsChanged := !apiequality.Semantic.DeepEqual(oldJob.Status.UncountedTerminatedPods, newJob.Status.UncountedTerminatedPods)
|
||||||
|
isReadyChanged := !ptr.Equal(oldJob.Status.Ready, newJob.Status.Ready)
|
||||||
|
isTerminatingChanged := !ptr.Equal(oldJob.Status.Terminating, newJob.Status.Terminating)
|
||||||
|
|
||||||
return batchvalidation.JobStatusValidationOptions{
|
return batchvalidation.JobStatusValidationOptions{
|
||||||
// We allow to decrease the counter for succeeded pods for jobs which
|
// We allow to decrease the counter for succeeded pods for jobs which
|
||||||
@ -394,6 +397,8 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt
|
|||||||
RejectCompletedIndexesForNonIndexedJob: isCompletedIndexesChanged,
|
RejectCompletedIndexesForNonIndexedJob: isCompletedIndexesChanged,
|
||||||
RejectFailedIndexesForNoBackoffLimitPerIndex: isFailedIndexesChanged,
|
RejectFailedIndexesForNoBackoffLimitPerIndex: isFailedIndexesChanged,
|
||||||
RejectFailedIndexesOverlappingCompleted: isFailedIndexesChanged || isCompletedIndexesChanged,
|
RejectFailedIndexesOverlappingCompleted: isFailedIndexesChanged || isCompletedIndexesChanged,
|
||||||
|
RejectFailedJobWithoutFailureTarget: isJobFailedChanged || isFailedIndexesChanged,
|
||||||
|
RejectCompleteJobWithoutSuccessCriteriaMet: isJobCompleteChanged || isJobSuccessCriteriaMetChanged,
|
||||||
RejectFinishedJobWithActivePods: isJobFinishedChanged || isActiveChanged,
|
RejectFinishedJobWithActivePods: isJobFinishedChanged || isActiveChanged,
|
||||||
RejectFinishedJobWithoutStartTime: isJobFinishedChanged || isStartTimeChanged,
|
RejectFinishedJobWithoutStartTime: isJobFinishedChanged || isStartTimeChanged,
|
||||||
RejectFinishedJobWithUncountedTerminatedPods: isJobFinishedChanged || isUncountedTerminatedPodsChanged,
|
RejectFinishedJobWithUncountedTerminatedPods: isJobFinishedChanged || isUncountedTerminatedPodsChanged,
|
||||||
@ -404,9 +409,19 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt
|
|||||||
RejectCompleteJobWithoutCompletionTime: isJobCompleteChanged || isCompletionTimeChanged,
|
RejectCompleteJobWithoutCompletionTime: isJobCompleteChanged || isCompletionTimeChanged,
|
||||||
RejectCompleteJobWithFailedCondition: isJobCompleteChanged || isJobFailedChanged,
|
RejectCompleteJobWithFailedCondition: isJobCompleteChanged || isJobFailedChanged,
|
||||||
RejectCompleteJobWithFailureTargetCondition: isJobCompleteChanged || isJobFailureTargetChanged,
|
RejectCompleteJobWithFailureTargetCondition: isJobCompleteChanged || isJobFailureTargetChanged,
|
||||||
|
AllowForSuccessCriteriaMetInExtendedScope: true,
|
||||||
|
RejectMoreReadyThanActivePods: isReadyChanged || isActiveChanged,
|
||||||
|
RejectFinishedJobWithTerminatingPods: isJobFinishedChanged || isTerminatingChanged,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return batchvalidation.JobStatusValidationOptions{}
|
if utilfeature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
|
||||||
|
return batchvalidation.JobStatusValidationOptions{
|
||||||
|
AllowForSuccessCriteriaMetInExtendedScope: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return batchvalidation.JobStatusValidationOptions{
|
||||||
|
AllowForSuccessCriteriaMetInExtendedScope: batchvalidation.IsConditionTrue(oldJob.Status.Conditions, batch.JobSuccessCriteriaMet),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WarningsOnUpdate returns warnings for the given update.
|
// WarningsOnUpdate returns warnings for the given update.
|
||||||
|
@ -2063,8 +2063,9 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
nowPlusMinute := metav1.Time{Time: now.Add(time.Minute)}
|
nowPlusMinute := metav1.Time{Time: now.Add(time.Minute)}
|
||||||
|
|
||||||
cases := map[string]struct {
|
cases := map[string]struct {
|
||||||
enableJobManagedBy bool
|
enableJobManagedBy bool
|
||||||
enableJobSuccessPolicy bool
|
enableJobSuccessPolicy bool
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
|
|
||||||
job *batch.Job
|
job *batch.Job
|
||||||
newJob *batch.Job
|
newJob *batch.Job
|
||||||
@ -2154,6 +2155,51 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
wantErrs: field.ErrorList{
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"invalid addition of Complete=True without SuccessCriteriaMet=True": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
CompletionTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobComplete,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErrs: field.ErrorList{
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"invalid addition of Failed=True without FailureTarget=True": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
wantErrs: field.ErrorList{
|
wantErrs: field.ErrorList{
|
||||||
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
},
|
},
|
||||||
@ -2178,11 +2224,23 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
Status: batch.JobStatus{
|
Status: batch.JobStatus{
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2198,12 +2256,24 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
Status: batch.JobStatus{
|
Status: batch.JobStatus{
|
||||||
CompletionTime: &now,
|
CompletionTime: &now,
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2219,6 +2289,16 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Active: 1,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -2227,6 +2307,10 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
CompletionTime: &now,
|
CompletionTime: &now,
|
||||||
Active: 1,
|
Active: 1,
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2238,30 +2322,94 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
{Type: field.ErrorTypeInvalid, Field: "status.active"},
|
{Type: field.ErrorTypeInvalid, Field: "status.active"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"transition to Failed condition with terminating>0 and ready>0": {
|
"invalid attempt to transition to Failed=True with terminating > 0": {
|
||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Terminating: ptr.To[int32](1),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
Status: batch.JobStatus{
|
Status: batch.JobStatus{
|
||||||
StartTime: &now,
|
StartTime: &now,
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Terminating: ptr.To[int32](1),
|
Terminating: ptr.To[int32](1),
|
||||||
Ready: ptr.To[int32](1),
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
wantErrs: field.ErrorList{
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.terminating"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"invalid attempt to transition to Failed=True with active > 0": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Active: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Active: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErrs: field.ErrorList{
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.active"},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"invalid attempt to transition to Failed=True with uncountedTerminatedPods.Failed>0": {
|
"invalid attempt to transition to Failed=True with uncountedTerminatedPods.Failed>0": {
|
||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Failed: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -2271,6 +2419,10 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
Failed: []types.UID{"a"},
|
Failed: []types.UID{"a"},
|
||||||
},
|
},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailureTarget,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2363,6 +2515,18 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||||
|
Succeeded: []types.UID{"a"},
|
||||||
|
},
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
@ -2373,6 +2537,10 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
Succeeded: []types.UID{"a"},
|
Succeeded: []types.UID{"a"},
|
||||||
},
|
},
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2388,12 +2556,25 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
enableJobManagedBy: true,
|
enableJobManagedBy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
StartTime: &now,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
Status: batch.JobStatus{
|
Status: batch.JobStatus{
|
||||||
StartTime: &now,
|
StartTime: &now,
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2499,6 +2680,12 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
ObjectMeta: validObjectMeta,
|
ObjectMeta: validObjectMeta,
|
||||||
Status: batch.JobStatus{
|
Status: batch.JobStatus{
|
||||||
StartTime: &nowPlusMinute,
|
StartTime: &nowPlusMinute,
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
newJob: &batch.Job{
|
newJob: &batch.Job{
|
||||||
@ -2507,6 +2694,10 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
StartTime: &nowPlusMinute,
|
StartTime: &nowPlusMinute,
|
||||||
CompletionTime: &now,
|
CompletionTime: &now,
|
||||||
Conditions: []batch.JobCondition{
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Type: batch.JobComplete,
|
Type: batch.JobComplete,
|
||||||
Status: api.ConditionTrue,
|
Status: api.ConditionTrue,
|
||||||
@ -2941,6 +3132,7 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
},
|
},
|
||||||
wantErrs: field.ErrorList{
|
wantErrs: field.ErrorList{
|
||||||
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"invalid failedIndexes, which overlap with completedIndexes": {
|
"invalid failedIndexes, which overlap with completedIndexes": {
|
||||||
@ -3021,6 +3213,37 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"valid update of Job if SuccessCriteriaMet already present for NonIndexed Jobs; JobSuccessPolicy enabled, while JobManagedBy and JobPodReplacementPolicy disabled": {
|
||||||
|
enableJobSuccessPolicy: true,
|
||||||
|
enableJobManagedBy: false,
|
||||||
|
enableJobPodReplacementPolicy: false,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batch.JobComplete,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
"invalid addition of SuccessCriteriaMet for Job with Failed": {
|
"invalid addition of SuccessCriteriaMet for Job with Failed": {
|
||||||
enableJobSuccessPolicy: true,
|
enableJobSuccessPolicy: true,
|
||||||
job: &batch.Job{
|
job: &batch.Job{
|
||||||
@ -3378,11 +3601,64 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
|
|||||||
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
{Type: field.ErrorTypeInvalid, Field: "status.conditions"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"valid addition of SuccessCriteriaMet when JobManagedBy is enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"valid addition of SuccessCriteriaMet when JobPodReplacementPolicy is enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Conditions: []batch.JobCondition{{
|
||||||
|
Type: batch.JobSuccessCriteriaMet,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"invalid attempt to set more ready pods than active": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](5),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
newJob: &batch.Job{
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Completions: ptr.To[int32](5),
|
||||||
|
},
|
||||||
|
Status: batch.JobStatus{
|
||||||
|
Active: 1,
|
||||||
|
Ready: ptr.To[int32](2),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErrs: field.ErrorList{
|
||||||
|
{Type: field.ErrorTypeInvalid, Field: "status.ready"},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for name, tc := range cases {
|
for name, tc := range cases {
|
||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)
|
||||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
|
||||||
|
|
||||||
errs := StatusStrategy.ValidateUpdate(ctx, tc.newJob, tc.job)
|
errs := StatusStrategy.ValidateUpdate(ctx, tc.newJob, tc.job)
|
||||||
if diff := cmp.Diff(tc.wantErrs, errs, ignoreErrValueDetail); diff != "" {
|
if diff := cmp.Diff(tc.wantErrs, errs, ignoreErrValueDetail); diff != "" {
|
||||||
|
@ -21,7 +21,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
|
||||||
|
|
||||||
batchv1 "k8s.io/api/batch/v1"
|
batchv1 "k8s.io/api/batch/v1"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
@ -35,9 +34,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
utilrand "k8s.io/apimachinery/pkg/util/rand"
|
utilrand "k8s.io/apimachinery/pkg/util/rand"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
"k8s.io/apimachinery/pkg/watch"
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
watchtools "k8s.io/client-go/tools/watch"
|
watchtools "k8s.io/client-go/tools/watch"
|
||||||
"k8s.io/client-go/util/retry"
|
"k8s.io/client-go/util/retry"
|
||||||
@ -705,9 +702,20 @@ done`}
|
|||||||
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Awaiting for the job to have the interim success condition")
|
||||||
|
err = e2ejob.WaitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobSuccessCriteriaMet, "")
|
||||||
|
framework.ExpectNoError(err, "failed to ensure job has the interim success condition: %s", f.Namespace.Name)
|
||||||
|
|
||||||
ginkgo.By("Ensuring job reaches completions")
|
ginkgo.By("Ensuring job reaches completions")
|
||||||
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, *job.Spec.Completions)
|
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, *job.Spec.Completions)
|
||||||
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the Job status fields to ensure correct final state")
|
||||||
|
job, err = e2ejob.GetJob(ctx, f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to retrieve latest job object")
|
||||||
|
gomega.Expect(job.Status.Active).Should(gomega.Equal(int32(0)))
|
||||||
|
gomega.Expect(job.Status.Ready).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
|
gomega.Expect(job.Status.Terminating).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("should fail when exceeds active deadline", func(ctx context.Context) {
|
ginkgo.It("should fail when exceeds active deadline", func(ctx context.Context) {
|
||||||
@ -720,9 +728,21 @@ done`}
|
|||||||
job := e2ejob.NewTestJob("notTerminate", "exceed-active-deadline", v1.RestartPolicyNever, parallelism, completions, &activeDeadlineSeconds, backoffLimit)
|
job := e2ejob.NewTestJob("notTerminate", "exceed-active-deadline", v1.RestartPolicyNever, parallelism, completions, &activeDeadlineSeconds, backoffLimit)
|
||||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||||
|
err = e2ejob.WaitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonDeadlineExceeded)
|
||||||
|
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||||
|
|
||||||
ginkgo.By("Ensuring job past active deadline")
|
ginkgo.By("Ensuring job past active deadline")
|
||||||
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, time.Duration(activeDeadlineSeconds+15)*time.Second, "DeadlineExceeded")
|
err = e2ejob.WaitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonDeadlineExceeded)
|
||||||
framework.ExpectNoError(err, "failed to ensure job past active deadline in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to ensure job past active deadline in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the Job status fields to ensure correct final state")
|
||||||
|
job, err = e2ejob.GetJob(ctx, f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to retrieve latest job object")
|
||||||
|
gomega.Expect(job.Status.Active).Should(gomega.Equal(int32(0)))
|
||||||
|
gomega.Expect(job.Status.Ready).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
|
gomega.Expect(job.Status.Terminating).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
})
|
})
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -823,9 +843,13 @@ done`}
|
|||||||
job := e2ejob.NewTestJob("fail", "backofflimit", v1.RestartPolicyNever, 1, 1, nil, int32(backoff))
|
job := e2ejob.NewTestJob("fail", "backofflimit", v1.RestartPolicyNever, 1, 1, nil, int32(backoff))
|
||||||
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
|
||||||
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
|
||||||
ginkgo.By("Ensuring job exceed backofflimit")
|
|
||||||
|
|
||||||
err = waitForJobFailure(ctx, f.ClientSet, f.Namespace.Name, job.Name, e2ejob.JobTimeout, "BackoffLimitExceeded")
|
ginkgo.By("Awaiting for the job to have the interim failure condition")
|
||||||
|
err = e2ejob.WaitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailureTarget, batchv1.JobReasonBackoffLimitExceeded)
|
||||||
|
framework.ExpectNoError(err, "failed to ensure job has the interim failure condition: %s", f.Namespace.Name)
|
||||||
|
|
||||||
|
ginkgo.By("Ensuring job exceed backofflimit")
|
||||||
|
err = e2ejob.WaitForJobCondition(ctx, f.ClientSet, f.Namespace.Name, job.Name, batchv1.JobFailed, batchv1.JobReasonBackoffLimitExceeded)
|
||||||
framework.ExpectNoError(err, "failed to ensure job exceed backofflimit in namespace: %s", f.Namespace.Name)
|
framework.ExpectNoError(err, "failed to ensure job exceed backofflimit in namespace: %s", f.Namespace.Name)
|
||||||
|
|
||||||
ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
|
ginkgo.By(fmt.Sprintf("Checking that %d pod created and status is failed", backoff+1))
|
||||||
@ -835,6 +859,13 @@ done`}
|
|||||||
for _, pod := range pods.Items {
|
for _, pod := range pods.Items {
|
||||||
gomega.Expect(pod.Status.Phase).To(gomega.Equal(v1.PodFailed))
|
gomega.Expect(pod.Status.Phase).To(gomega.Equal(v1.PodFailed))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ginkgo.By("Verifying the Job status fields to ensure correct final state")
|
||||||
|
job, err = e2ejob.GetJob(ctx, f.ClientSet, f.Namespace.Name, job.Name)
|
||||||
|
framework.ExpectNoError(err, "failed to retrieve latest job object")
|
||||||
|
gomega.Expect(job.Status.Active).Should(gomega.Equal(int32(0)))
|
||||||
|
gomega.Expect(job.Status.Ready).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
|
gomega.Expect(job.Status.Terminating).Should(gomega.Equal(ptr.To[int32](0)))
|
||||||
})
|
})
|
||||||
|
|
||||||
f.It("should run a job to completion with CPU requests", f.WithSerial(), func(ctx context.Context) {
|
f.It("should run a job to completion with CPU requests", f.WithSerial(), func(ctx context.Context) {
|
||||||
@ -1186,24 +1217,6 @@ func waitForJobEvent(ctx context.Context, config watchEventConfig) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// waitForJobFailure uses c to wait for up to timeout for the Job named jobName in namespace ns to fail.
|
|
||||||
func waitForJobFailure(ctx context.Context, c clientset.Interface, ns, jobName string, timeout time.Duration, reason string) error {
|
|
||||||
return wait.Poll(framework.Poll, timeout, func() (bool, error) {
|
|
||||||
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
for _, c := range curr.Status.Conditions {
|
|
||||||
if c.Type == batchv1.JobFailed && c.Status == v1.ConditionTrue {
|
|
||||||
if reason == "" || reason == c.Reason {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func findConditionByType(list []batchv1.JobCondition, cType batchv1.JobConditionType) *batchv1.JobCondition {
|
func findConditionByType(list []batchv1.JobCondition, cType batchv1.JobConditionType) *batchv1.JobCondition {
|
||||||
for i := range list {
|
for i := range list {
|
||||||
if list[i].Type == cType {
|
if list[i].Type == cType {
|
||||||
|
@ -69,13 +69,16 @@ func waitForJobPodsInPhase(ctx context.Context, c clientset.Interface, ns, jobNa
|
|||||||
|
|
||||||
// WaitForJobComplete uses c to wait for completions to complete for the Job jobName in namespace ns.
|
// WaitForJobComplete uses c to wait for completions to complete for the Job jobName in namespace ns.
|
||||||
func WaitForJobComplete(ctx context.Context, c clientset.Interface, ns, jobName string, completions int32) error {
|
func WaitForJobComplete(ctx context.Context, c clientset.Interface, ns, jobName string, completions int32) error {
|
||||||
return wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
|
if err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
|
||||||
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
return curr.Status.Succeeded == completions, nil
|
return curr.Status.Succeeded == completions, nil
|
||||||
})
|
}); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return WaitForJobCondition(ctx, c, ns, jobName, batchv1.JobComplete, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForJobReady waits for particular value of the Job .status.ready field
|
// WaitForJobReady waits for particular value of the Job .status.ready field
|
||||||
@ -112,6 +115,28 @@ func WaitForJobFailed(c clientset.Interface, ns, jobName string) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// waitForJobCondition waits for the specified Job to have the expected condition with the specific reason.
|
||||||
|
func WaitForJobCondition(ctx context.Context, c clientset.Interface, ns, jobName string, cType batchv1.JobConditionType, reason string) error {
|
||||||
|
err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
|
||||||
|
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
for _, c := range curr.Status.Conditions {
|
||||||
|
if c.Type == cType && c.Status == v1.ConditionTrue {
|
||||||
|
if reason == c.Reason {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("waiting for Job %q to have the condition %q with reason: %q: %w", jobName, cType, reason, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func isJobFailed(j *batchv1.Job) bool {
|
func isJobFailed(j *batchv1.Job) bool {
|
||||||
for _, c := range j.Status.Conditions {
|
for _, c := range j.Status.Conditions {
|
||||||
if (c.Type == batchv1.JobFailed) && c.Status == v1.ConditionTrue {
|
if (c.Type == batchv1.JobFailed) && c.Status == v1.ConditionTrue {
|
||||||
|
@ -29,6 +29,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
batchv1 "k8s.io/api/batch/v1"
|
batchv1 "k8s.io/api/batch/v1"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
eventsv1 "k8s.io/api/events/v1"
|
eventsv1 "k8s.io/api/events/v1"
|
||||||
@ -1160,6 +1161,301 @@ func TestBackoffLimitPerIndex_JobPodsCreatedWithExponentialBackoff(t *testing.T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestDelayTerminalPhaseCondition tests the fix for Job controller to delay
|
||||||
|
// setting the terminal phase conditions (Failed and Complete) until all Pods
|
||||||
|
// are terminal. The fate of the Job is indicated by the interim Job conditions:
|
||||||
|
// FailureTarget, or SuccessCriteriaMet.
|
||||||
|
func TestDelayTerminalPhaseCondition(t *testing.T) {
|
||||||
|
t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, fastPodFailureBackoff))
|
||||||
|
|
||||||
|
podTemplateSpec := v1.PodTemplateSpec{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Finalizers: []string{"fake.example.com/blockDeletion"},
|
||||||
|
},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Name: "main-container",
|
||||||
|
Image: "foo",
|
||||||
|
ImagePullPolicy: v1.PullIfNotPresent,
|
||||||
|
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
failOnePod := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
|
||||||
|
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil {
|
||||||
|
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodFailed, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
succeedOnePodAndScaleDown := func(ctx context.Context, clientSet clientset.Interface, jobObj *batchv1.Job) {
|
||||||
|
// mark one pod as succeeded
|
||||||
|
if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodSucceeded, 0); err != nil {
|
||||||
|
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
|
||||||
|
}
|
||||||
|
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
|
||||||
|
if _, err := updateJob(ctx, jobClient, jobObj.Name, func(j *batchv1.Job) {
|
||||||
|
j.Spec.Parallelism = ptr.To[int32](1)
|
||||||
|
j.Spec.Completions = ptr.To[int32](1)
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("Unexpected error when scaling down the job: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := map[string]struct {
|
||||||
|
enableJobManagedBy bool
|
||||||
|
enableJobPodReplacementPolicy bool
|
||||||
|
|
||||||
|
job batchv1.Job
|
||||||
|
action func(context.Context, clientset.Interface, *batchv1.Job)
|
||||||
|
wantInterimStatus *batchv1.JobStatus
|
||||||
|
wantTerminalStatus batchv1.JobStatus
|
||||||
|
}{
|
||||||
|
"job backoff limit exceeded; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
BackoffLimit: ptr.To[int32](0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: failOnePod,
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Failed: 2,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job backoff limit exceeded; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
BackoffLimit: ptr.To[int32](0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: failOnePod,
|
||||||
|
wantInterimStatus: &batchv1.JobStatus{
|
||||||
|
Failed: 2,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Terminating: ptr.To[int32](1),
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Failed: 2,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Terminating: ptr.To[int32](0),
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job backoff limit exceeded; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
BackoffLimit: ptr.To[int32](0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: failOnePod,
|
||||||
|
wantInterimStatus: &batchv1.JobStatus{
|
||||||
|
Failed: 2,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Failed: 2,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailureTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batchv1.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: batchv1.JobReasonBackoffLimitExceeded,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job scale down to meet completions; JobPodReplacementPolicy and JobManagedBy disabled": {
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: succeedOnePodAndScaleDown,
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Succeeded: 1,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
CompletedIndexes: "0",
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobComplete,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job scale down to meet completions; JobPodReplacementPolicy enabled": {
|
||||||
|
enableJobPodReplacementPolicy: true,
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: succeedOnePodAndScaleDown,
|
||||||
|
wantInterimStatus: &batchv1.JobStatus{
|
||||||
|
Succeeded: 1,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Terminating: ptr.To[int32](1),
|
||||||
|
CompletedIndexes: "0",
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Succeeded: 1,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
Terminating: ptr.To[int32](0),
|
||||||
|
CompletedIndexes: "0",
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batchv1.JobComplete,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job scale down to meet completions; JobManagedBy enabled": {
|
||||||
|
enableJobManagedBy: true,
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](2),
|
||||||
|
Completions: ptr.To[int32](2),
|
||||||
|
CompletionMode: ptr.To(batchv1.IndexedCompletion),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
action: succeedOnePodAndScaleDown,
|
||||||
|
wantInterimStatus: &batchv1.JobStatus{
|
||||||
|
Succeeded: 1,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
CompletedIndexes: "0",
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantTerminalStatus: batchv1.JobStatus{
|
||||||
|
Succeeded: 1,
|
||||||
|
Ready: ptr.To[int32](0),
|
||||||
|
CompletedIndexes: "0",
|
||||||
|
Conditions: []batchv1.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batchv1.JobSuccessCriteriaMet,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: batchv1.JobComplete,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for name, test := range testCases {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
resetMetrics()
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, test.enableJobManagedBy)
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.ElasticIndexedJob, true)
|
||||||
|
|
||||||
|
closeFn, restConfig, clientSet, ns := setup(t, "delay-terminal-condition")
|
||||||
|
t.Cleanup(closeFn)
|
||||||
|
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
|
||||||
|
t.Cleanup(cancel)
|
||||||
|
|
||||||
|
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &test.job)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Error %q while creating the job %q", err, jobObj.Name)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { removePodsFinalizer(ctx, t, clientSet, ns.Name) })
|
||||||
|
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
|
||||||
|
|
||||||
|
waitForPodsToBeActive(ctx, t, jobClient, *jobObj.Spec.Parallelism, jobObj)
|
||||||
|
|
||||||
|
test.action(ctx, clientSet, jobObj)
|
||||||
|
if test.wantInterimStatus != nil {
|
||||||
|
validateJobStatus(ctx, t, clientSet, jobObj, *test.wantInterimStatus)
|
||||||
|
|
||||||
|
// Set terminal phase to all the remaining pods to simulate
|
||||||
|
// Kubelet (or other components like PodGC).
|
||||||
|
jobPods, err := getJobPods(ctx, t, clientSet, jobObj, func(s v1.PodStatus) bool {
|
||||||
|
return (s.Phase == v1.PodPending || s.Phase == v1.PodRunning)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to list Job Pods: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, len(jobPods)); err != nil {
|
||||||
|
t.Fatalf("Failed setting phase %q on Job Pod: %v", v1.PodSucceeded, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
validateJobStatus(ctx, t, clientSet, jobObj, test.wantTerminalStatus)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestBackoffLimitPerIndex tests handling of job and its pods when
|
// TestBackoffLimitPerIndex tests handling of job and its pods when
|
||||||
// backoff limit per index is used.
|
// backoff limit per index is used.
|
||||||
func TestBackoffLimitPerIndex(t *testing.T) {
|
func TestBackoffLimitPerIndex(t *testing.T) {
|
||||||
@ -1966,16 +2262,14 @@ func TestManagedBy_UsingReservedJobFinalizers(t *testing.T) {
|
|||||||
t.Fatalf("Error %v when marking the %q pod as succeeded", err, klog.KObj(podObj))
|
t.Fatalf("Error %v when marking the %q pod as succeeded", err, klog.KObj(podObj))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark the job as finished so that the built-in controller receives the
|
// Trigger termination for the Job so that the built-in controller receives the
|
||||||
// UpdateJob event in reaction to each it would remove the pod's finalizer,
|
// UpdateJob event in reaction to each it would remove the pod's finalizer,
|
||||||
// if not for the custom managedBy field.
|
// if not for the custom managedBy field.
|
||||||
jobObj.Status.Conditions = append(jobObj.Status.Conditions, batchv1.JobCondition{
|
jobObj.Status.Conditions = append(jobObj.Status.Conditions, batchv1.JobCondition{
|
||||||
Type: batchv1.JobComplete,
|
Type: batchv1.JobSuccessCriteriaMet,
|
||||||
Status: v1.ConditionTrue,
|
Status: v1.ConditionTrue,
|
||||||
})
|
})
|
||||||
jobObj.Status.StartTime = ptr.To(metav1.Now())
|
jobObj.Status.StartTime = ptr.To(metav1.Now())
|
||||||
jobObj.Status.CompletionTime = ptr.To(metav1.Now())
|
|
||||||
|
|
||||||
if jobObj, err = clientSet.BatchV1().Jobs(jobObj.Namespace).UpdateStatus(ctx, jobObj, metav1.UpdateOptions{}); err != nil {
|
if jobObj, err = clientSet.BatchV1().Jobs(jobObj.Namespace).UpdateStatus(ctx, jobObj, metav1.UpdateOptions{}); err != nil {
|
||||||
t.Fatalf("Error %v when updating the job as finished %v", err, klog.KObj(jobObj))
|
t.Fatalf("Error %v when updating the job as finished %v", err, klog.KObj(jobObj))
|
||||||
}
|
}
|
||||||
@ -2821,7 +3115,7 @@ func TestElasticIndexedJob(t *testing.T) {
|
|||||||
jobUpdates: []jobUpdate{
|
jobUpdates: []jobUpdate{
|
||||||
{
|
{
|
||||||
completions: ptr.To[int32](0),
|
completions: ptr.To[int32](0),
|
||||||
wantTerminating: ptr.To[int32](3),
|
wantTerminating: ptr.To[int32](0),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -3595,6 +3889,25 @@ func validateJobsPodsStatusOnlyWithTimeout(ctx context.Context, t testing.TB, cl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func validateJobStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, wantStatus batchv1.JobStatus) {
|
||||||
|
t.Helper()
|
||||||
|
diff := ""
|
||||||
|
if err := wait.PollUntilContextTimeout(ctx, waitInterval, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) {
|
||||||
|
gotJob, err := clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to get updated Job: %v, last status diff (-want,+got):\n%s", err, diff)
|
||||||
|
}
|
||||||
|
diff = cmp.Diff(wantStatus, gotJob.Status,
|
||||||
|
cmpopts.EquateEmpty(),
|
||||||
|
cmpopts.IgnoreFields(batchv1.JobStatus{}, "StartTime", "UncountedTerminatedPods", "CompletionTime"),
|
||||||
|
cmpopts.IgnoreFields(batchv1.JobCondition{}, "LastProbeTime", "LastTransitionTime", "Message"),
|
||||||
|
)
|
||||||
|
return diff == "", nil
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("Waiting for Job Status: %v\n, Status diff (-want,+got):\n%s", err, diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func validateJobPodsStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus) {
|
func validateJobPodsStatus(ctx context.Context, t testing.TB, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, desired)
|
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, desired)
|
||||||
|
Loading…
Reference in New Issue
Block a user