diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 2cd1705debb..28d713a3a87 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -1424,7 +1424,7 @@ func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobC jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") } jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed") - metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc() + metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", finishedCond.Reason).Inc() } else { jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message) metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc() diff --git a/pkg/controller/job/metrics/metrics.go b/pkg/controller/job/metrics/metrics.go index 2a184ad2721..a52067ada7f 100644 --- a/pkg/controller/job/metrics/metrics.go +++ b/pkg/controller/job/metrics/metrics.go @@ -55,12 +55,14 @@ var ( }, []string{"completion_mode", "result", "action"}, ) - // JobFinishedNum tracks the number of Jobs that finish. Empty reason label - // is used to count successful jobs. + // JobFinishedNum tracks the number of Jobs that finish. + // TODO: Once we remove the JobSuccessPolicy feature gate, we need to remove "" reason label comment. + // When the JobSuccessPolicy feature gate is disabled, empty reason label is used to count successful jobs. + // Otherwise, "CompletionsReached" reason label is used to count successful jobs. // Possible label values: // completion_mode: Indexed, NonIndexed // result: failed, succeeded - // reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "" + // reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "SuccessPolicy", "CompletionsReached", "" JobFinishedNum = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: JobControllerSubsystem, diff --git a/test/integration/job/job_test.go b/test/integration/job/job_test.go index 0e3a17b7466..9908f6ca882 100644 --- a/test/integration/job/job_test.go +++ b/test/integration/job/job_test.go @@ -511,10 +511,10 @@ func TestSuccessPolicy(t *testing.T) { testCases := map[string]struct { enableJobSuccessPolicy bool enableBackoffLimitPerIndex bool - job batchv1.Job - podTerminations []podTerminationWithExpectations - wantConditionTypes []batchv1.JobConditionType - wantJobFinishedNumMetric []metricLabelsWithValue + job batchv1.Job + podTerminations []podTerminationWithExpectations + wantConditionTypes []batchv1.JobConditionType + wantJobFinishedNumMetric []metricLabelsWithValue }{ "all indexes succeeded; JobSuccessPolicy is enabled": { enableJobSuccessPolicy: true, @@ -547,7 +547,7 @@ func TestSuccessPolicy(t *testing.T) { wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete}, wantJobFinishedNumMetric: []metricLabelsWithValue{ { - Labels: []string{"Indexed", "succeeded", ""}, + Labels: []string{"Indexed", "succeeded", "SuccessPolicy"}, Value: 1, }, }, @@ -587,6 +587,37 @@ func TestSuccessPolicy(t *testing.T) { }, }, }, + "job without successPolicy; incremented the jobs_finished_total metric with CompletionsReached reason": { + enableJobSuccessPolicy: true, + job: batchv1.Job{ + Spec: batchv1.JobSpec{ + Parallelism: ptr.To[int32](1), + Completions: ptr.To[int32](1), + CompletionMode: completionModePtr(batchv1.IndexedCompletion), + Template: podTemplateSpec, + }, + }, + podTerminations: []podTerminationWithExpectations{ + { + index: 0, + status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + wantActive: 0, + wantFailed: 0, + wantSucceeded: 1, + wantCompletedIndexes: "0", + wantTerminating: ptr.To[int32](0), + }, + }, + wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete}, + wantJobFinishedNumMetric: []metricLabelsWithValue{ + { + Labels: []string{"Indexed", "succeeded", "CompletionsReached"}, + Value: 1, + }, + }, + }, "job with successPolicy with succeededIndexes; job has SuccessCriteriaMet and Complete conditions even if some indexes remain pending": { enableJobSuccessPolicy: true, job: batchv1.Job{ @@ -629,7 +660,7 @@ func TestSuccessPolicy(t *testing.T) { wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete}, wantJobFinishedNumMetric: []metricLabelsWithValue{ { - Labels: []string{"Indexed", "succeeded", ""}, + Labels: []string{"Indexed", "succeeded", "SuccessPolicy"}, Value: 1, }, }, @@ -676,7 +707,7 @@ func TestSuccessPolicy(t *testing.T) { wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete}, wantJobFinishedNumMetric: []metricLabelsWithValue{ { - Labels: []string{"Indexed", "succeeded", ""}, + Labels: []string{"Indexed", "succeeded", "SuccessPolicy"}, Value: 1, }, },