mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Job: Extend the jobs_finished_total metric reason label with SuccessPolicy and CompletionsReached
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
This commit is contained in:
parent
594490fd77
commit
6e8dc2c250
@ -1424,7 +1424,7 @@ func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobC
|
|||||||
jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
|
jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
|
||||||
}
|
}
|
||||||
jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
|
jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
|
||||||
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
|
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", finishedCond.Reason).Inc()
|
||||||
} else {
|
} else {
|
||||||
jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
|
jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
|
||||||
metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
|
metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
|
||||||
|
@ -55,12 +55,14 @@ var (
|
|||||||
},
|
},
|
||||||
[]string{"completion_mode", "result", "action"},
|
[]string{"completion_mode", "result", "action"},
|
||||||
)
|
)
|
||||||
// JobFinishedNum tracks the number of Jobs that finish. Empty reason label
|
// JobFinishedNum tracks the number of Jobs that finish.
|
||||||
// is used to count successful jobs.
|
// TODO: Once we remove the JobSuccessPolicy feature gate, we need to remove "" reason label comment.
|
||||||
|
// When the JobSuccessPolicy feature gate is disabled, empty reason label is used to count successful jobs.
|
||||||
|
// Otherwise, "CompletionsReached" reason label is used to count successful jobs.
|
||||||
// Possible label values:
|
// Possible label values:
|
||||||
// completion_mode: Indexed, NonIndexed
|
// completion_mode: Indexed, NonIndexed
|
||||||
// result: failed, succeeded
|
// result: failed, succeeded
|
||||||
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", ""
|
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "SuccessPolicy", "CompletionsReached", ""
|
||||||
JobFinishedNum = metrics.NewCounterVec(
|
JobFinishedNum = metrics.NewCounterVec(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
Subsystem: JobControllerSubsystem,
|
Subsystem: JobControllerSubsystem,
|
||||||
|
@ -511,10 +511,10 @@ func TestSuccessPolicy(t *testing.T) {
|
|||||||
testCases := map[string]struct {
|
testCases := map[string]struct {
|
||||||
enableJobSuccessPolicy bool
|
enableJobSuccessPolicy bool
|
||||||
enableBackoffLimitPerIndex bool
|
enableBackoffLimitPerIndex bool
|
||||||
job batchv1.Job
|
job batchv1.Job
|
||||||
podTerminations []podTerminationWithExpectations
|
podTerminations []podTerminationWithExpectations
|
||||||
wantConditionTypes []batchv1.JobConditionType
|
wantConditionTypes []batchv1.JobConditionType
|
||||||
wantJobFinishedNumMetric []metricLabelsWithValue
|
wantJobFinishedNumMetric []metricLabelsWithValue
|
||||||
}{
|
}{
|
||||||
"all indexes succeeded; JobSuccessPolicy is enabled": {
|
"all indexes succeeded; JobSuccessPolicy is enabled": {
|
||||||
enableJobSuccessPolicy: true,
|
enableJobSuccessPolicy: true,
|
||||||
@ -547,7 +547,7 @@ func TestSuccessPolicy(t *testing.T) {
|
|||||||
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
||||||
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
||||||
{
|
{
|
||||||
Labels: []string{"Indexed", "succeeded", ""},
|
Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
|
||||||
Value: 1,
|
Value: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -587,6 +587,37 @@ func TestSuccessPolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"job without successPolicy; incremented the jobs_finished_total metric with CompletionsReached reason": {
|
||||||
|
enableJobSuccessPolicy: true,
|
||||||
|
job: batchv1.Job{
|
||||||
|
Spec: batchv1.JobSpec{
|
||||||
|
Parallelism: ptr.To[int32](1),
|
||||||
|
Completions: ptr.To[int32](1),
|
||||||
|
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
|
||||||
|
Template: podTemplateSpec,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
podTerminations: []podTerminationWithExpectations{
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
status: v1.PodStatus{
|
||||||
|
Phase: v1.PodSucceeded,
|
||||||
|
},
|
||||||
|
wantActive: 0,
|
||||||
|
wantFailed: 0,
|
||||||
|
wantSucceeded: 1,
|
||||||
|
wantCompletedIndexes: "0",
|
||||||
|
wantTerminating: ptr.To[int32](0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
||||||
|
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
||||||
|
{
|
||||||
|
Labels: []string{"Indexed", "succeeded", "CompletionsReached"},
|
||||||
|
Value: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
"job with successPolicy with succeededIndexes; job has SuccessCriteriaMet and Complete conditions even if some indexes remain pending": {
|
"job with successPolicy with succeededIndexes; job has SuccessCriteriaMet and Complete conditions even if some indexes remain pending": {
|
||||||
enableJobSuccessPolicy: true,
|
enableJobSuccessPolicy: true,
|
||||||
job: batchv1.Job{
|
job: batchv1.Job{
|
||||||
@ -629,7 +660,7 @@ func TestSuccessPolicy(t *testing.T) {
|
|||||||
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
||||||
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
||||||
{
|
{
|
||||||
Labels: []string{"Indexed", "succeeded", ""},
|
Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
|
||||||
Value: 1,
|
Value: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -676,7 +707,7 @@ func TestSuccessPolicy(t *testing.T) {
|
|||||||
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
wantConditionTypes: []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete},
|
||||||
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
wantJobFinishedNumMetric: []metricLabelsWithValue{
|
||||||
{
|
{
|
||||||
Labels: []string{"Indexed", "succeeded", ""},
|
Labels: []string{"Indexed", "succeeded", "SuccessPolicy"},
|
||||||
Value: 1,
|
Value: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user