move reasons to api package for job controller

This commit is contained in:
Kevin Hannon
2023-08-25 10:38:23 -04:00
parent b8ef1a0791
commit c6e9fba79b
4 changed files with 65 additions and 32 deletions

View File

@@ -56,12 +56,6 @@ import (
"k8s.io/utils/pointer"
)
const (
// PodFailurePolicy reason indicates a job failure condition is added due to
// a failed pod matching a pod failure policy rule
jobConditionReasonPodFailurePolicy = "PodFailurePolicy"
)
// controllerKind contains the schema.GroupVersionKind for this controller type.
var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
@@ -85,6 +79,11 @@ var (
MaxPodCreateDeletePerSync = 500
)
// MaxFailedIndexesExceeded indicates that an indexed of a job failed
// https://kep.k8s.io/3850
// In Beta, this should be moved to staging as an API field.
const maxFailedIndexesExceeded string = "MaxFailedIndexesExceeded"
// Controller ensures that all Job objects have corresponding pods to
// run their configured workload.
type Controller struct {
@@ -816,16 +815,16 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
} else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil {
// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage, jm.clock.Now())
jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, string(batch.PodFailurePolicyMatched), *failJobMessage, jm.clock.Now())
}
}
if jobCtx.finishedCondition == nil {
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
// OR if the number of failed jobs increased since the last syncJob
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded", "Job has reached the specified backoff limit", jm.clock.Now())
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.BackoffLimitExceeded), "Job has reached the specified backoff limit", jm.clock.Now())
} else if jm.pastActiveDeadline(&job) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline", jm.clock.Now())
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.DeadlineExceeded), "Job was active longer than specified deadline", jm.clock.Now())
} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
@@ -840,9 +839,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
if jobCtx.finishedCondition == nil {
if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "MaxFailedIndexesExceeded", "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, maxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "FailedIndexes", "Job has failed indexes", jm.clock.Now())
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.FailedIndexes), "Job has failed indexes", jm.clock.Now())
}
}
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)