Graduate Backoff Limit Per Index as stable

Reenable the JobBackoffLimitPerIndex_Reenabling integration test
This commit is contained in:
Michal Wozniak 2025-02-10 17:25:58 +01:00
parent 50ba48369d
commit a91ed902fe
13 changed files with 69 additions and 42 deletions

View File

@ -4690,7 +4690,7 @@
"type": "integer"
},
"backoffLimitPerIndex": {
"description": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable.",
"format": "int32",
"type": "integer"
},
@ -4712,7 +4712,7 @@
"type": "boolean"
},
"maxFailedIndexes": {
"description": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5.",
"format": "int32",
"type": "integer"
},
@ -4788,7 +4788,7 @@
"type": "integer"
},
"failedIndexes": {
"description": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.\n\nThis field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.",
"type": "string"
},
"ready": {
@ -4897,7 +4897,7 @@
"description": "PodFailurePolicyRule describes how a pod failure is handled when the requirements are met. One of onExitCodes and onPodConditions, but not both, can be used in each rule.",
"properties": {
"action": {
"description": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n This value is beta-level. It can be used when the\n `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"description": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"type": "string"
},
"onExitCodes": {

View File

@ -331,7 +331,7 @@
"type": "integer"
},
"backoffLimitPerIndex": {
"description": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable.",
"format": "int32",
"type": "integer"
},
@ -353,7 +353,7 @@
"type": "boolean"
},
"maxFailedIndexes": {
"description": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5.",
"format": "int32",
"type": "integer"
},
@ -455,7 +455,7 @@
"type": "integer"
},
"failedIndexes": {
"description": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.\n\nThis field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"description": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.",
"type": "string"
},
"ready": {
@ -592,7 +592,7 @@
"properties": {
"action": {
"default": "",
"description": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n This value is beta-level. It can be used when the\n `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"description": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"type": "string"
},
"onExitCodes": {

View File

@ -132,7 +132,6 @@ const (
// This is an action which might be taken on a pod failure - mark the
// Job's index as failed to avoid restarts within this index. This action
// can only be used when backoffLimitPerIndex is set.
// This value is beta-level.
PodFailurePolicyActionFailIndex PodFailurePolicyAction = "FailIndex"
// This is an action which might be taken on a pod failure - the counter towards
@ -226,8 +225,6 @@ type PodFailurePolicyRule struct {
// running pods are terminated.
// - FailIndex: indicates that the pod's index is marked as Failed and will
// not be restarted.
// This value is beta-level. It can be used when the
// `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).
// - Ignore: indicates that the counter towards the .backoffLimit is not
// incremented and a replacement pod is created.
// - Count: indicates that the pod is handled in the default way - the
@ -363,8 +360,6 @@ type JobSpec struct {
// batch.kubernetes.io/job-index-failure-count annotation. It can only
// be set when Job's completionMode=Indexed, and the Pod's restart
// policy is Never. The field is immutable.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
BackoffLimitPerIndex *int32
@ -376,8 +371,6 @@ type JobSpec struct {
// It can only be specified when backoffLimitPerIndex is set.
// It can be null or up to completions. It is required and must be
// less than or equal to 10^4 when is completions greater than 10^5.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
MaxFailedIndexes *int32
@ -571,8 +564,6 @@ type JobStatus struct {
// represented as "1,3-5,7".
// The set of failed indexes cannot overlap with the set of completed indexes.
//
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
FailedIndexes *string

View File

@ -2437,6 +2437,10 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
@ -5171,6 +5175,10 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
if !tc.enableBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableBackoffLimitPerIndex)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
@ -5849,6 +5857,10 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManagedBy)

View File

@ -23,6 +23,7 @@ import (
batch "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilversion "k8s.io/apimachinery/pkg/util/version"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
_ "k8s.io/kubernetes/pkg/apis/core/install"
@ -867,6 +868,10 @@ func TestMatchPodFailurePolicy(t *testing.T) {
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
jobFailMessage, countFailed, action := matchPodFailurePolicy(tc.podFailurePolicy, tc.failedPod)
if diff := cmp.Diff(tc.wantJobFailureMessage, jobFailMessage); diff != "" {

View File

@ -414,6 +414,7 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
JobBackoffLimitPerIndex: {
{Version: version.MustParse("1.28"), Default: false, PreRelease: featuregate.Alpha},
{Version: version.MustParse("1.29"), Default: true, PreRelease: featuregate.Beta},
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.36
},
JobManagedBy: {

View File

@ -17728,14 +17728,14 @@ func schema_k8sio_api_batch_v1_JobSpec(ref common.ReferenceCallback) common.Open
},
"backoffLimitPerIndex": {
SchemaProps: spec.SchemaProps{
Description: "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
Description: "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable.",
Type: []string{"integer"},
Format: "int32",
},
},
"maxFailedIndexes": {
SchemaProps: spec.SchemaProps{
Description: "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
Description: "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5.",
Type: []string{"integer"},
Format: "int32",
},
@ -17883,7 +17883,7 @@ func schema_k8sio_api_batch_v1_JobStatus(ref common.ReferenceCallback) common.Op
},
"failedIndexes": {
SchemaProps: spec.SchemaProps{
Description: "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.\n\nThis field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
Description: "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.",
Type: []string{"string"},
Format: "",
},
@ -18062,7 +18062,7 @@ func schema_k8sio_api_batch_v1_PodFailurePolicyRule(ref common.ReferenceCallback
Properties: map[string]spec.Schema{
"action": {
SchemaProps: spec.SchemaProps{
Description: "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n This value is beta-level. It can be used when the\n `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.\n\nPossible enum values:\n - `\"Count\"` This is an action which might be taken on a pod failure - the pod failure is handled in the default way - the counter towards .backoffLimit, represented by the job's .status.failed field, is incremented.\n - `\"FailIndex\"` This is an action which might be taken on a pod failure - mark the Job's index as failed to avoid restarts within this index. This action can only be used when backoffLimitPerIndex is set. This value is beta-level.\n - `\"FailJob\"` This is an action which might be taken on a pod failure - mark the pod's job as Failed and terminate all running pods.\n - `\"Ignore\"` This is an action which might be taken on a pod failure - the counter towards .backoffLimit, represented by the job's .status.failed field, is not incremented and a replacement pod is created.",
Description: "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.\n\nPossible enum values:\n - `\"Count\"` This is an action which might be taken on a pod failure - the pod failure is handled in the default way - the counter towards .backoffLimit, represented by the job's .status.failed field, is incremented.\n - `\"FailIndex\"` This is an action which might be taken on a pod failure - mark the Job's index as failed to avoid restarts within this index. This action can only be used when backoffLimitPerIndex is set.\n - `\"FailJob\"` This is an action which might be taken on a pod failure - mark the pod's job as Failed and terminate all running pods.\n - `\"Ignore\"` This is an action which might be taken on a pod failure - the counter towards .backoffLimit, represented by the job's .status.failed field, is not incremented and a replacement pod is created.",
Default: "",
Type: []string{"string"},
Format: "",

View File

@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/validation/field"
utilversion "k8s.io/apimachinery/pkg/util/version"
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/registry/rest"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -512,6 +513,10 @@ func TestJobStrategy_PrepareForUpdate(t *testing.T) {
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
@ -893,6 +898,10 @@ func TestJobStrategy_PrepareForCreate(t *testing.T) {
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, tc.enableJobPodReplacementPolicy)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobManagedBy, tc.enableJobManageBy)
@ -1165,6 +1174,10 @@ func TestJobStrategy_ValidateUpdate(t *testing.T) {
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
newJob := tc.job.DeepCopy()
tc.update(newJob)
@ -1775,6 +1788,10 @@ func TestJobStrategy_Validate(t *testing.T) {
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
if !tc.enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, utilfeature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)
errs := Strategy.Validate(ctx, tc.job)
if len(errs) != int(tc.wantWarningCount) {

View File

@ -238,8 +238,6 @@ message JobSpec {
// batch.kubernetes.io/job-index-failure-count annotation. It can only
// be set when Job's completionMode=Indexed, and the Pod's restart
// policy is Never. The field is immutable.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
optional int32 backoffLimitPerIndex = 12;
@ -251,8 +249,6 @@ message JobSpec {
// It can only be specified when backoffLimitPerIndex is set.
// It can be null or up to completions. It is required and must be
// less than or equal to 10^4 when is completions greater than 10^5.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
optional int32 maxFailedIndexes = 13;
@ -442,8 +438,6 @@ message JobStatus {
// represented as "1,3-5,7".
// The set of failed indexes cannot overlap with the set of completed indexes.
//
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
optional string failedIndexes = 10;
@ -554,8 +548,6 @@ message PodFailurePolicyRule {
// running pods are terminated.
// - FailIndex: indicates that the pod's index is marked as Failed and will
// not be restarted.
// This value is beta-level. It can be used when the
// `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).
// - Ignore: indicates that the counter towards the .backoffLimit is not
// incremented and a replacement pod is created.
// - Count: indicates that the pod is handled in the default way - the

View File

@ -128,7 +128,6 @@ const (
// This is an action which might be taken on a pod failure - mark the
// Job's index as failed to avoid restarts within this index. This action
// can only be used when backoffLimitPerIndex is set.
// This value is beta-level.
PodFailurePolicyActionFailIndex PodFailurePolicyAction = "FailIndex"
// This is an action which might be taken on a pod failure - the counter towards
@ -223,8 +222,6 @@ type PodFailurePolicyRule struct {
// running pods are terminated.
// - FailIndex: indicates that the pod's index is marked as Failed and will
// not be restarted.
// This value is beta-level. It can be used when the
// `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).
// - Ignore: indicates that the counter towards the .backoffLimit is not
// incremented and a replacement pod is created.
// - Count: indicates that the pod is handled in the default way - the
@ -362,8 +359,6 @@ type JobSpec struct {
// batch.kubernetes.io/job-index-failure-count annotation. It can only
// be set when Job's completionMode=Indexed, and the Pod's restart
// policy is Never. The field is immutable.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
BackoffLimitPerIndex *int32 `json:"backoffLimitPerIndex,omitempty" protobuf:"varint,12,opt,name=backoffLimitPerIndex"`
@ -375,8 +370,6 @@ type JobSpec struct {
// It can only be specified when backoffLimitPerIndex is set.
// It can be null or up to completions. It is required and must be
// less than or equal to 10^4 when is completions greater than 10^5.
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
MaxFailedIndexes *int32 `json:"maxFailedIndexes,omitempty" protobuf:"varint,13,opt,name=maxFailedIndexes"`
@ -571,8 +564,6 @@ type JobStatus struct {
// represented as "1,3-5,7".
// The set of failed indexes cannot overlap with the set of completed indexes.
//
// This field is beta-level. It can be used when the `JobBackoffLimitPerIndex`
// feature gate is enabled (enabled by default).
// +optional
FailedIndexes *string `json:"failedIndexes,omitempty" protobuf:"bytes,10,opt,name=failedIndexes"`

View File

@ -118,8 +118,8 @@ var map_JobSpec = map[string]string{
"podFailurePolicy": "Specifies the policy of handling failed pods. In particular, it allows to specify the set of actions and conditions which need to be satisfied to take the associated action. If empty, the default behaviour applies - the counter of failed pods, represented by the jobs's .status.failed field, is incremented and it is checked against the backoffLimit. This field cannot be used in combination with restartPolicy=OnFailure.",
"successPolicy": "successPolicy specifies the policy when the Job can be declared as succeeded. If empty, the default behavior applies - the Job is declared as succeeded only when the number of succeeded pods equals to the completions. When the field is specified, it must be immutable and works only for the Indexed Jobs. Once the Job meets the SuccessPolicy, the lingering pods are terminated.\n\nThis field is beta-level. To use this field, you must enable the `JobSuccessPolicy` feature gate (enabled by default).",
"backoffLimit": "Specifies the number of retries before marking this job failed. Defaults to 6",
"backoffLimitPerIndex": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"maxFailedIndexes": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5. This field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"backoffLimitPerIndex": "Specifies the limit for the number of retries within an index before marking this index as failed. When enabled the number of failures per index is kept in the pod's batch.kubernetes.io/job-index-failure-count annotation. It can only be set when Job's completionMode=Indexed, and the Pod's restart policy is Never. The field is immutable.",
"maxFailedIndexes": "Specifies the maximal number of failed indexes before marking the Job as failed, when backoffLimitPerIndex is set. Once the number of failed indexes exceeds this number the entire Job is marked as Failed and its execution is terminated. When left as null the job continues execution of all of its indexes and is marked with the `Complete` Job condition. It can only be specified when backoffLimitPerIndex is set. It can be null or up to completions. It is required and must be less than or equal to 10^4 when is completions greater than 10^5.",
"selector": "A label query over pods that should match the pod count. Normally, the system sets this field for you. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors",
"manualSelector": "manualSelector controls generation of pod labels and pod selectors. Leave `manualSelector` unset unless you are certain what you are doing. When false or unset, the system pick labels unique to this job and appends those labels to the pod template. When true, the user is responsible for picking unique labels and specifying the selector. Failure to pick a unique label may cause this and other jobs to not function correctly. However, You may see `manualSelector=true` in jobs that were created with the old `extensions/v1beta1` API. More info: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#specifying-your-own-pod-selector",
"template": "Describes the pod that will be created when executing a job. The only allowed template.spec.restartPolicy values are \"Never\" or \"OnFailure\". More info: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/",
@ -144,7 +144,7 @@ var map_JobStatus = map[string]string{
"failed": "The number of pods which reached phase Failed. The value increases monotonically.",
"terminating": "The number of pods which are terminating (in phase Pending or Running and have a deletionTimestamp).\n\nThis field is beta-level. The job controller populates the field when the feature gate JobPodReplacementPolicy is enabled (enabled by default).",
"completedIndexes": "completedIndexes holds the completed indexes when .spec.completionMode = \"Indexed\" in a text format. The indexes are represented as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the completed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\".",
"failedIndexes": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.\n\nThis field is beta-level. It can be used when the `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).",
"failedIndexes": "FailedIndexes holds the failed indexes when spec.backoffLimitPerIndex is set. The indexes are represented in the text format analogous as for the `completedIndexes` field, ie. they are kept as decimal integers separated by commas. The numbers are listed in increasing order. Three or more consecutive numbers are compressed and represented by the first and last element of the series, separated by a hyphen. For example, if the failed indexes are 1, 3, 4, 5 and 7, they are represented as \"1,3-5,7\". The set of failed indexes cannot overlap with the set of completed indexes.",
"uncountedTerminatedPods": "uncountedTerminatedPods holds the UIDs of Pods that have terminated but the job controller hasn't yet accounted for in the status counters.\n\nThe job controller creates pods with a finalizer. When a pod terminates (succeeded or failed), the controller does three steps to account for it in the job status:\n\n1. Add the pod UID to the arrays in this field. 2. Remove the pod finalizer. 3. Remove the pod UID from the arrays while increasing the corresponding\n counter.\n\nOld jobs might not be tracked using this field, in which case the field remains null. The structure is empty for finished jobs.",
"ready": "The number of active pods which have a Ready condition and are not terminating (without a deletionTimestamp).",
}
@ -195,7 +195,7 @@ func (PodFailurePolicyOnPodConditionsPattern) SwaggerDoc() map[string]string {
var map_PodFailurePolicyRule = map[string]string{
"": "PodFailurePolicyRule describes how a pod failure is handled when the requirements are met. One of onExitCodes and onPodConditions, but not both, can be used in each rule.",
"action": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n This value is beta-level. It can be used when the\n `JobBackoffLimitPerIndex` feature gate is enabled (enabled by default).\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"action": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are:\n\n- FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- FailIndex: indicates that the pod's index is marked as Failed and will\n not be restarted.\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.",
"onExitCodes": "Represents the requirement on the container exit codes.",
"onPodConditions": "Represents the requirement on the pod conditions. The requirement is represented as a list of pod condition patterns. The requirement is satisfied if at least one pattern matches an actual pod condition. At most 20 elements are allowed.",
}

View File

@ -556,6 +556,10 @@
lockToDefault: false
preRelease: Beta
version: "1.29"
- default: true
lockToDefault: true
preRelease: GA
version: "1.33"
- name: JobManagedBy
versionedSpecs:
- default: false

View File

@ -37,6 +37,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/util/feature"
@ -824,6 +825,10 @@ func TestSuccessPolicy(t *testing.T) {
t.Run(name, func(t *testing.T) {
resetMetrics()
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobSuccessPolicy, tc.enableJobSuccessPolicy)
if !tc.enableBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableBackoffLimitPerIndex)
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
@ -1028,6 +1033,7 @@ func TestBackoffLimitPerIndex_DelayedPodDeletion(t *testing.T) {
// TestBackoffLimitPerIndex_Reenabling tests handling of pod failures when
// reenabling the BackoffLimitPerIndex feature.
func TestBackoffLimitPerIndex_Reenabling(t *testing.T) {
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(t, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, fastPodFailureBackoff))
featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, true)
@ -3425,6 +3431,10 @@ func BenchmarkLargeIndexedJob(b *testing.B) {
for name, tc := range cases {
b.Run(name, func(b *testing.B) {
enableJobBackoffLimitPerIndex := tc.backoffLimitPerIndex != nil
if !enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(b, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(b, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, enableJobBackoffLimitPerIndex)
b.ResetTimer()
for n := 0; n < b.N; n++ {
@ -3512,6 +3522,10 @@ func BenchmarkLargeFailureHandling(b *testing.B) {
b.Run(name, func(b *testing.B) {
enableJobBackoffLimitPerIndex := tc.backoffLimitPerIndex != nil
timeout := ptr.Deref(tc.customTimeout, wait.ForeverTestTimeout)
if !enableJobBackoffLimitPerIndex {
// TODO: this will be removed in 1.36
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(b, feature.DefaultFeatureGate, utilversion.MustParse("1.32"))
}
featuregatetesting.SetFeatureGateDuringTest(b, feature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, enableJobBackoffLimitPerIndex)
b.ResetTimer()
for n := 0; n < b.N; n++ {