Support handling of pod failures with respect to the specified rules

This commit is contained in:
Michal Wozniak
2022-08-04 08:21:32 +02:00
parent c8edeab234
commit bf9ce70de3
43 changed files with 5934 additions and 127 deletions

View File

@@ -2019,6 +2019,962 @@ func TestSyncJobDeleted(t *testing.T) {
}
}
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
indexedCompletionMode := batch.IndexedCompletion
validObjectMeta := metav1.ObjectMeta{
Name: "foobar",
UID: uuid.NewUUID(),
Namespace: metav1.NamespaceDefault,
}
validSelector := &metav1.LabelSelector{
MatchLabels: map[string]string{"foo": "bar"},
}
validTemplate := v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"foo": "bar",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{Image: "foo/bar"},
},
},
}
onExitCodeRules := []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{5, 6, 7},
},
},
}
testCases := map[string]struct {
enableJobPodFailurePolicy bool
job batch.Job
pods []v1.PodStatus
wantConditions *[]batch.JobCondition
wantStatusFailed int32
wantStatusActive int32
wantStatusSucceeded int32
}{
"default handling for pod failure if the container matching the exit codes does not match the containerName restriction": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("main-container"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2, 3},
},
},
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
ContainerName: pointer.String("main-container"),
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{5, 6, 7},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "monitoring-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 42,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusSucceeded: 0,
wantStatusFailed: 1,
},
"running pod should not result in job fail based on OnExitCodes": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 0,
wantStatusSucceeded: 0,
},
"fail job based on OnExitCodes": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"job marked already as failure target with failed pod": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
Status: batch.JobStatus{
Conditions: []batch.JobCondition{
{
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"job marked already as failure target with failed pod, message based on already deleted pod": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
Status: batch.JobStatus{
Conditions: []batch.JobCondition{
{
Type: batch.AlphaNoCompatGuaranteeJobFailureTarget,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/already-deleted-pod failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"default handling for a failed pod when the feature is disabled even, despite matching rule": {
enableJobPodFailurePolicy: false,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"fail job with multiple pods": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(2),
Completions: pointer.Int32(2),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodRunning,
},
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-1 failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 2,
wantStatusSucceeded: 0,
},
"fail indexed job based on OnExitCodes": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
CompletionMode: &indexedCompletionMode,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"fail job based on OnExitCodes with NotIn operator": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
Values: []int32{5, 6, 7},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 42,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container main-container for pod default/mypod-0 failed with exit code 42 matching FailJob rule at index 0",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"default handling job based on OnExitCodes with NotIn operator": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
Values: []int32{5, 6, 7},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"fail job based on OnExitCodes for InitContainer": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
InitContainerStatuses: []v1.ContainerStatus{
{
Name: "init-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 5,
},
},
},
},
ContainerStatuses: []v1.ContainerStatus{
{
Name: "main-container",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 143,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Container init-container for pod default/mypod-0 failed with exit code 5 matching FailJob rule at index 1",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"ignore pod failure; both rules are matching, the first is executed only": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(0),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "container1",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 2,
},
},
},
{
Name: "container2",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 6,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 0,
wantStatusSucceeded: 0,
},
"ignore pod failure based on OnExitCodes": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(0),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 1,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 0,
wantStatusSucceeded: 0,
},
"default job based on OnExitCodes": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(0),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: onExitCodeRules,
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 10,
},
},
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "BackoffLimitExceeded",
Message: "Job has reached the specified backoff limit",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"count pod failure based on OnExitCodes; both rules are matching, the first is executed only": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionCount,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 2},
},
},
{
Action: batch.PodFailurePolicyActionIgnore,
OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
Operator: batch.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{2, 3},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 2,
},
},
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"count pod failure based on OnPodConditions; both rules are matching, the first is executed only": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionCount,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.PodConditionType("ResourceLimitExceeded"),
Status: v1.ConditionTrue,
},
},
},
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.PodConditionType("ResourceLimitExceeded"),
Status: v1.ConditionTrue,
},
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
"ignore pod failure based on OnPodConditions": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(0),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionIgnore,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
wantConditions: nil,
wantStatusActive: 1,
wantStatusFailed: 0,
wantStatusSucceeded: 0,
},
"fail job based on OnPodConditions": {
enableJobPodFailurePolicy: true,
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: pointer.Int32(1),
Completions: pointer.Int32(1),
BackoffLimit: pointer.Int32(6),
PodFailurePolicy: &batch.PodFailurePolicy{
Rules: []batch.PodFailurePolicyRule{
{
Action: batch.PodFailurePolicyActionFailJob,
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
},
},
},
pods: []v1.PodStatus{
{
Phase: v1.PodFailed,
Conditions: []v1.PodCondition{
{
Type: v1.AlphaNoCompatGuaranteeDisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
wantConditions: &[]batch.JobCondition{
{
Type: batch.JobFailed,
Status: v1.ConditionTrue,
Reason: "PodFailurePolicy",
Message: "Pod default/mypod-0 has condition DisruptionTarget matching FailJob rule at index 0",
},
},
wantStatusActive: 0,
wantStatusFailed: 1,
wantStatusSucceeded: 0,
},
}
for _, wFinalizers := range []bool{false, true} {
for name, tc := range testCases {
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)()
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
fakePodControl := controller.FakePodControl{}
manager.podControl = &fakePodControl
manager.podStoreSynced = alwaysReady
manager.jobStoreSynced = alwaysReady
job := &tc.job
if wFinalizers {
job.Annotations = map[string]string{
batch.JobTrackingFinalizer: "",
}
}
actual := job
manager.updateStatusHandler = func(ctx context.Context, job *batch.Job) (*batch.Job, error) {
actual = job
return job, nil
}
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
for i, podStatus := range tc.pods {
pb := buildPod().name(fmt.Sprintf("mypod-%d", i)).job(job).status(podStatus)
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
pb.index(fmt.Sprintf("%v", i))
}
if wFinalizers {
pb.trackingFinalizer()
}
sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer().Add(pb.Pod)
}
manager.syncJob(context.TODO(), testutil.GetKey(job, t))
if tc.wantConditions != nil {
for _, wantCondition := range *tc.wantConditions {
conditions := getConditionsByType(actual.Status.Conditions, wantCondition.Type)
if len(conditions) != 1 {
t.Fatalf("Expected a single completion condition. Got %#v for type: %q", conditions, wantCondition.Type)
}
condition := *conditions[0]
if diff := cmp.Diff(wantCondition, condition, cmpopts.IgnoreFields(batch.JobCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
t.Errorf("Unexpected job condition (-want,+got):\n%s", diff)
}
}
} else {
if cond := hasTrueCondition(actual); cond != nil {
t.Errorf("Got condition %s, want none", *cond)
}
}
// validate status
if actual.Status.Active != tc.wantStatusActive {
t.Errorf("unexpected number of active pods. Expected %d, saw %d\n", tc.wantStatusActive, actual.Status.Active)
}
if actual.Status.Succeeded != tc.wantStatusSucceeded {
t.Errorf("unexpected number of succeeded pods. Expected %d, saw %d\n", tc.wantStatusSucceeded, actual.Status.Succeeded)
}
if actual.Status.Failed != tc.wantStatusFailed {
t.Errorf("unexpected number of failed pods. Expected %d, saw %d\n", tc.wantStatusFailed, actual.Status.Failed)
}
})
}
}
}
func TestSyncJobUpdateRequeue(t *testing.T) {
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
@@ -3449,6 +4405,11 @@ func (pb podBuilder) index(ix string) podBuilder {
return pb
}
func (pb podBuilder) status(s v1.PodStatus) podBuilder {
pb.Status = s
return pb
}
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
pb.Status.Phase = p
return pb