Rename job reasons to JobReasons as part of api review

This commit is contained in:
Kevin Hannon 2023-09-14 13:24:12 -04:00
parent c6e9fba79b
commit a62eb45ae2
4 changed files with 40 additions and 57 deletions

View File

@ -533,23 +533,6 @@ const (
JobFailureTarget JobConditionType = "FailureTarget" JobFailureTarget JobConditionType = "FailureTarget"
) )
type JobReasonType string
const (
// PodFailurePolicy reason indicates a job failure condition is added due to
// a failed pod matching a pod failure policy rule
// https://kep.k8s.io/3329
// This is currently a beta field.
PodFailurePolicyMatched JobReasonType = "PodFailurePolicy"
// BackOffLimitExceeded reason indicates that pods within a job have failed a number of
// times higher than backOffLimit times.
BackoffLimitExceeded JobReasonType = "BackoffLimitExceeded"
// DeadlineExceeded means job duration is past ActiveDeadline
DeadlineExceeded JobReasonType = "DeadlineExceeded"
// FailedIndexes means Job has failed indexes.
FailedIndexes JobReasonType = "FailedIndexes"
)
// JobCondition describes current state of a job. // JobCondition describes current state of a job.
type JobCondition struct { type JobCondition struct {
// Type of job condition. // Type of job condition.

View File

@ -79,10 +79,14 @@ var (
MaxPodCreateDeletePerSync = 500 MaxPodCreateDeletePerSync = 500
) )
// MaxFailedIndexesExceeded indicates that an indexed of a job failed const (
// https://kep.k8s.io/3850 // MaxFailedIndexesExceeded indicates that an indexed of a job failed
// In Beta, this should be moved to staging as an API field. // https://kep.k8s.io/3850
const maxFailedIndexesExceeded string = "MaxFailedIndexesExceeded" // In Beta, this should be moved to staging as an API field.
jobReasonMaxFailedIndexesExceeded string = "MaxFailedIndexesExceeded"
// FailedIndexes means Job has failed indexes.
jobReasonFailedIndexes string = "FailedIndexes"
)
// Controller ensures that all Job objects have corresponding pods to // Controller ensures that all Job objects have corresponding pods to
// run their configured workload. // run their configured workload.
@ -815,16 +819,16 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now()) jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
} else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil { } else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil {
// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed. // Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, string(batch.PodFailurePolicyMatched), *failJobMessage, jm.clock.Now()) jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, batch.JobReasonPodFailurePolicy, *failJobMessage, jm.clock.Now())
} }
} }
if jobCtx.finishedCondition == nil { if jobCtx.finishedCondition == nil {
if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) { if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
// check if the number of pod restart exceeds backoff (for restart OnFailure only) // check if the number of pod restart exceeds backoff (for restart OnFailure only)
// OR if the number of failed jobs increased since the last syncJob // OR if the number of failed jobs increased since the last syncJob
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.BackoffLimitExceeded), "Job has reached the specified backoff limit", jm.clock.Now()) jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonBackoffLimitExceeded, "Job has reached the specified backoff limit", jm.clock.Now())
} else if jm.pastActiveDeadline(&job) { } else if jm.pastActiveDeadline(&job) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.DeadlineExceeded), "Job was active longer than specified deadline", jm.clock.Now()) jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonDeadlineExceeded, "Job was active longer than specified deadline", jm.clock.Now())
} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) { } else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time) syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration) logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
@ -839,9 +843,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods) jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
if jobCtx.finishedCondition == nil { if jobCtx.finishedCondition == nil {
if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) { if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, maxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now()) jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, jobReasonMaxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) { } else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, string(batch.FailedIndexes), "Job has failed indexes", jm.clock.Now()) jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, jobReasonFailedIndexes, "Job has failed indexes", jm.clock.Now())
} }
} }
jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx) jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)

View File

@ -1909,7 +1909,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
expectedDeletions: 1, expectedDeletions: 1,
expectedFailed: 1, expectedFailed: 1,
expectedCondition: batch.JobFailed, expectedCondition: batch.JobFailed,
expectedConditionReason: string(batch.DeadlineExceeded), expectedConditionReason: batch.JobReasonDeadlineExceeded,
}, },
"activeDeadlineSeconds bigger than single pod execution": { "activeDeadlineSeconds bigger than single pod execution": {
parallelism: 1, parallelism: 1,
@ -1923,7 +1923,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
expectedSucceeded: 1, expectedSucceeded: 1,
expectedFailed: 1, expectedFailed: 1,
expectedCondition: batch.JobFailed, expectedCondition: batch.JobFailed,
expectedConditionReason: string(batch.DeadlineExceeded), expectedConditionReason: batch.JobReasonDeadlineExceeded,
}, },
"activeDeadlineSeconds times-out before any pod starts": { "activeDeadlineSeconds times-out before any pod starts": {
parallelism: 1, parallelism: 1,
@ -1932,7 +1932,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
startTime: 10, startTime: 10,
backoffLimit: 6, backoffLimit: 6,
expectedCondition: batch.JobFailed, expectedCondition: batch.JobFailed,
expectedConditionReason: string(batch.DeadlineExceeded), expectedConditionReason: batch.JobReasonDeadlineExceeded,
}, },
"activeDeadlineSeconds with backofflimit reach": { "activeDeadlineSeconds with backofflimit reach": {
parallelism: 1, parallelism: 1,
@ -1942,7 +1942,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
failedPods: 1, failedPods: 1,
expectedFailed: 1, expectedFailed: 1,
expectedCondition: batch.JobFailed, expectedCondition: batch.JobFailed,
expectedConditionReason: string(batch.BackoffLimitExceeded), expectedConditionReason: batch.JobReasonBackoffLimitExceeded,
}, },
"activeDeadlineSeconds is not triggered when Job is suspended": { "activeDeadlineSeconds is not triggered when Job is suspended": {
suspend: true, suspend: true,
@ -2098,7 +2098,7 @@ func TestPastDeadlineJobFinished(t *testing.T) {
if err != nil { if err != nil {
return false, nil return false, nil
} }
if getCondition(j, batch.JobFailed, v1.ConditionTrue, string(batch.DeadlineExceeded)) { if getCondition(j, batch.JobFailed, v1.ConditionTrue, batch.JobReasonDeadlineExceeded) {
if manager.clock.Since(j.Status.StartTime.Time) < time.Duration(*j.Spec.ActiveDeadlineSeconds)*time.Second { if manager.clock.Since(j.Status.StartTime.Time) < time.Duration(*j.Spec.ActiveDeadlineSeconds)*time.Second {
return true, errors.New("Job contains DeadlineExceeded condition earlier than expected") return true, errors.New("Job contains DeadlineExceeded condition earlier than expected")
} }
@ -2397,7 +2397,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2425,7 +2425,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailureTarget, Type: batch.JobFailureTarget,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2452,7 +2452,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2480,7 +2480,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailureTarget, Type: batch.JobFailureTarget,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2507,7 +2507,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2596,7 +2596,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-1 failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/mypod-1 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2642,7 +2642,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1", Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2695,7 +2695,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container main-container for pod default/mypod-0 failed with exit code 42 matching FailJob rule at index 0", Message: "Container main-container for pod default/mypod-0 failed with exit code 42 matching FailJob rule at index 0",
}, },
}, },
@ -2797,7 +2797,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container init-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1", Message: "Container init-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
}, },
}, },
@ -2924,7 +2924,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.BackoffLimitExceeded), Reason: batch.JobReasonBackoffLimitExceeded,
Message: "Job has reached the specified backoff limit", Message: "Job has reached the specified backoff limit",
}, },
}, },
@ -3185,7 +3185,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0", Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
}, },
}, },
@ -3571,13 +3571,13 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
{ {
Type: batch.JobFailureTarget, Type: batch.JobFailureTarget,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container x for pod default/mypod-0 failed with exit code 3 matching FailJob rule at index 0", Message: "Container x for pod default/mypod-0 failed with exit code 3 matching FailJob rule at index 0",
}, },
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.PodFailurePolicyMatched), Reason: batch.JobReasonPodFailurePolicy,
Message: "Container x for pod default/mypod-0 failed with exit code 3 matching FailJob rule at index 0", Message: "Container x for pod default/mypod-0 failed with exit code 3 matching FailJob rule at index 0",
}, },
}, },
@ -3660,7 +3660,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.BackoffLimitExceeded), Reason: batch.JobReasonBackoffLimitExceeded,
Message: "Job has reached the specified backoff limit", Message: "Job has reached the specified backoff limit",
}, },
}, },
@ -3695,7 +3695,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: string(batch.FailedIndexes), Reason: jobReasonFailedIndexes,
Message: "Job has failed indexes", Message: "Job has failed indexes",
}, },
}, },
@ -3733,7 +3733,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
{ {
Type: batch.JobFailed, Type: batch.JobFailed,
Status: v1.ConditionTrue, Status: v1.ConditionTrue,
Reason: maxFailedIndexesExceeded, Reason: jobReasonMaxFailedIndexesExceeded,
Message: "Job has exceeded the specified maximal number of failed indexes", Message: "Job has exceeded the specified maximal number of failed indexes",
}, },
}, },

View File

@ -535,21 +535,17 @@ const (
JobFailureTarget JobConditionType = "FailureTarget" JobFailureTarget JobConditionType = "FailureTarget"
) )
type JobReasonType string
const ( const (
// PodFailurePolicy reason indicates a job failure condition is added due to // JobReasonPodFailurePolicy reason indicates a job failure condition is added due to
// a failed pod matching a pod failure policy rule // a failed pod matching a pod failure policy rule
// https://kep.k8s.io/3329 // https://kep.k8s.io/3329
// This is currently a beta field. // This is currently a beta field.
PodFailurePolicyMatched JobReasonType = "PodFailurePolicy" JobReasonPodFailurePolicy string = "PodFailurePolicy"
// BackOffLimitExceeded reason indicates that pods within a job have failed a number of // JobReasonBackOffLimitExceeded reason indicates that pods within a job have failed a number of
// times higher than backOffLimit times. // times higher than backOffLimit times.
BackoffLimitExceeded JobReasonType = "BackoffLimitExceeded" JobReasonBackoffLimitExceeded string = "BackoffLimitExceeded"
// DeadlineExceeded means job duration is past ActiveDeadline // JobReasponDeadlineExceeded means job duration is past ActiveDeadline
DeadlineExceeded JobReasonType = "DeadlineExceeded" JobReasonDeadlineExceeded string = "DeadlineExceeded"
// FailedIndexes means Job has failed indexes.
FailedIndexes JobReasonType = "FailedIndexes"
) )
// JobCondition describes current state of a job. // JobCondition describes current state of a job.