mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-04 23:17:50 +00:00
Support handling of pod failures with respect to the specified rules
This commit is contained in:
@@ -2019,6 +2019,962 @@ func TestSyncJobDeleted(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
||||
indexedCompletionMode := batch.IndexedCompletion
|
||||
validObjectMeta := metav1.ObjectMeta{
|
||||
Name: "foobar",
|
||||
UID: uuid.NewUUID(),
|
||||
Namespace: metav1.NamespaceDefault,
|
||||
}
|
||||
validSelector := &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{"foo": "bar"},
|
||||
}
|
||||
validTemplate := v1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{Image: "foo/bar"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
onExitCodeRules := []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{1, 2, 3},
|
||||
},
|
||||
},
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{5, 6, 7},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
testCases := map[string]struct {
|
||||
enableJobPodFailurePolicy bool
|
||||
job batch.Job
|
||||
pods []v1.PodStatus
|
||||
wantConditions *[]batch.JobCondition
|
||||
wantStatusFailed int32
|
||||
wantStatusActive int32
|
||||
wantStatusSucceeded int32
|
||||
}{
|
||||
"default handling for pod failure if the container matching the exit codes does not match the containerName restriction": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
ContainerName: pointer.String("main-container"),
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{1, 2, 3},
|
||||
},
|
||||
},
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
ContainerName: pointer.String("main-container"),
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{5, 6, 7},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "monitoring-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 42,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
wantStatusFailed: 1,
|
||||
},
|
||||
"running pod should not result in job fail based on OnExitCodes": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job based on OnExitCodes": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"job marked already as failure target with failed pod": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
Status: batch.JobStatus{
|
||||
Conditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"job marked already as failure target with failed pod, message based on already deleted pod": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
Status: batch.JobStatus{
|
||||
Conditions: []batch.JobCondition{
|
||||
{
|
||||
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"default handling for a failed pod when the feature is disabled even, despite matching rule": {
|
||||
enableJobPodFailurePolicy: false,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job with multiple pods": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(2),
|
||||
Completions: pointer.Int32(2),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodRunning,
|
||||
},
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-1 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 2,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail indexed job based on OnExitCodes": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
CompletionMode: &indexedCompletionMode,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job based on OnExitCodes with NotIn operator": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
|
||||
Values: []int32{5, 6, 7},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 42,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container main-container for pod default/mypod-0 failed with exit code 42 matching FailJob rule at index 0",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"default handling job based on OnExitCodes with NotIn operator": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
|
||||
Values: []int32{5, 6, 7},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job based on OnExitCodes for InitContainer": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
InitContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "init-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "main-container",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 143,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Container init-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"ignore pod failure; both rules are matching, the first is executed only": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(0),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "container1",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "container2",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 6,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"ignore pod failure based on OnExitCodes": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(0),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"default job based on OnExitCodes": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(0),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: onExitCodeRules,
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "BackoffLimitExceeded",
|
||||
Message: "Job has reached the specified backoff limit",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"count pod failure based on OnExitCodes; both rules are matching, the first is executed only": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionCount,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{1, 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
|
||||
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
|
||||
Values: []int32{2, 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"count pod failure based on OnPodConditions; both rules are matching, the first is executed only": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionCount,
|
||||
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||
{
|
||||
Type: v1.PodConditionType("ResourceLimitExceeded"),
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodConditionType("ResourceLimitExceeded"),
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"ignore pod failure based on OnPodConditions": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(0),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 1,
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job based on OnPodConditions": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(6),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionFailJob,
|
||||
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.PodStatus{
|
||||
{
|
||||
Phase: v1.PodFailed,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: &[]batch.JobCondition{
|
||||
{
|
||||
Type: batch.JobFailed,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: "PodFailurePolicy",
|
||||
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
|
||||
},
|
||||
},
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 1,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
}
|
||||
for _, wFinalizers := range []bool{false, true} {
|
||||
for name, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
||||
fakePodControl := controller.FakePodControl{}
|
||||
manager.podControl = &fakePodControl
|
||||
manager.podStoreSynced = alwaysReady
|
||||
manager.jobStoreSynced = alwaysReady
|
||||
job := &tc.job
|
||||
|
||||
if wFinalizers {
|
||||
job.Annotations = map[string]string{
|
||||
batch.JobTrackingFinalizer: "",
|
||||
}
|
||||
}
|
||||
|
||||
actual := job
|
||||
manager.updateStatusHandler = func(ctx context.Context, job *batch.Job) (*batch.Job, error) {
|
||||
actual = job
|
||||
return job, nil
|
||||
}
|
||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||
for i, podStatus := range tc.pods {
|
||||
pb := buildPod().name(fmt.Sprintf("mypod-%d", i)).job(job).status(podStatus)
|
||||
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
|
||||
pb.index(fmt.Sprintf("%v", i))
|
||||
}
|
||||
if wFinalizers {
|
||||
pb.trackingFinalizer()
|
||||
}
|
||||
sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer().Add(pb.Pod)
|
||||
}
|
||||
|
||||
manager.syncJob(context.TODO(), testutil.GetKey(job, t))
|
||||
|
||||
if tc.wantConditions != nil {
|
||||
for _, wantCondition := range *tc.wantConditions {
|
||||
conditions := getConditionsByType(actual.Status.Conditions, wantCondition.Type)
|
||||
if len(conditions) != 1 {
|
||||
t.Fatalf("Expected a single completion condition. Got %#v for type: %q", conditions, wantCondition.Type)
|
||||
}
|
||||
condition := *conditions[0]
|
||||
if diff := cmp.Diff(wantCondition, condition, cmpopts.IgnoreFields(batch.JobCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
|
||||
t.Errorf("Unexpected job condition (-want,+got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cond := hasTrueCondition(actual); cond != nil {
|
||||
t.Errorf("Got condition %s, want none", *cond)
|
||||
}
|
||||
}
|
||||
// validate status
|
||||
if actual.Status.Active != tc.wantStatusActive {
|
||||
t.Errorf("unexpected number of active pods. Expected %d, saw %d\n", tc.wantStatusActive, actual.Status.Active)
|
||||
}
|
||||
if actual.Status.Succeeded != tc.wantStatusSucceeded {
|
||||
t.Errorf("unexpected number of succeeded pods. Expected %d, saw %d\n", tc.wantStatusSucceeded, actual.Status.Succeeded)
|
||||
}
|
||||
if actual.Status.Failed != tc.wantStatusFailed {
|
||||
t.Errorf("unexpected number of failed pods. Expected %d, saw %d\n", tc.wantStatusFailed, actual.Status.Failed)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncJobUpdateRequeue(t *testing.T) {
|
||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
|
||||
@@ -3449,6 +4405,11 @@ func (pb podBuilder) index(ix string) podBuilder {
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) status(s v1.PodStatus) podBuilder {
|
||||
pb.Status = s
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
|
||||
pb.Status.Phase = p
|
||||
return pb
|
||||
|
||||
Reference in New Issue
Block a user