mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
Wait for Pods to finish before considering Failed in Job (#113860)
* Wait for Pods to finish before considering Failed Limit behavior to feature gates PodDisruptionConditions and JobPodFailurePolicy and jobs with a podFailurePolicy. Change-Id: I926391cc2521b389c8e52962afb0d4a6a845ab8f * Remove check for unsheduled terminating pod Change-Id: I3dc05bb4ea3738604f01bf8cb5fc8cc0f6ea54ec
This commit is contained in:
parent
dc7fd39eb7
commit
7dc36bdf82
@ -1030,15 +1030,26 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
podFailureCountByPolicyAction := map[string]int{}
|
podFailureCountByPolicyAction := map[string]int{}
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
||||||
|
// This pod was processed in a previous sync.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
podFinished := pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed
|
|
||||||
// Terminating pods are counted as failed. This guarantees that orphan Pods
|
// Terminating pods are counted as failed. This guarantees that orphan Pods
|
||||||
// count as failures.
|
// count as failures.
|
||||||
// Active pods are terminated when the job has completed, thus they count as
|
// Active pods are terminated when the job has completed, thus they count as
|
||||||
// failures as well.
|
// failures as well.
|
||||||
podTerminating := pod.DeletionTimestamp != nil || finishedCond != nil
|
considerTerminated := pod.DeletionTimestamp != nil || finishedCond != nil
|
||||||
if podFinished || podTerminating || job.DeletionTimestamp != nil {
|
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
|
// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
|
||||||
|
// For now, we do so to avoid affecting all running Jobs without the
|
||||||
|
// avaibility to opt-out into the old behavior.
|
||||||
|
// We can also simplify the check to remove finalizers to:
|
||||||
|
// considerTerminated || job.DeletionTimestamp != nil
|
||||||
|
considerTerminated = podutil.IsPodTerminal(pod) ||
|
||||||
|
finishedCond != nil || // The Job is terminating. Any running Pod is considered failed.
|
||||||
|
isPodFailed(pod, job, true /* using finalizers */)
|
||||||
|
}
|
||||||
|
if podutil.IsPodTerminal(pod) || considerTerminated || job.DeletionTimestamp != nil {
|
||||||
podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
|
podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
|
||||||
}
|
}
|
||||||
if pod.Status.Phase == v1.PodSucceeded && !uncounted.failed.Has(string(pod.UID)) {
|
if pod.Status.Phase == v1.PodSucceeded && !uncounted.failed.Has(string(pod.UID)) {
|
||||||
@ -1054,7 +1065,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
needsFlush = true
|
needsFlush = true
|
||||||
uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
|
uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
|
||||||
}
|
}
|
||||||
} else if pod.Status.Phase == v1.PodFailed || podTerminating {
|
} else if pod.Status.Phase == v1.PodFailed || considerTerminated {
|
||||||
ix := getCompletionIndex(pod.Annotations)
|
ix := getCompletionIndex(pod.Annotations)
|
||||||
if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) {
|
if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
@ -1346,7 +1357,7 @@ func getFailJobMessage(job *batch.Job, pods []*v1.Pod, uncounted sets.String) *s
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
for _, p := range pods {
|
for _, p := range pods {
|
||||||
if isPodFailed(p, uncounted != nil) {
|
if isPodFailed(p, job, uncounted != nil) {
|
||||||
jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
||||||
if jobFailureMessage != nil {
|
if jobFailureMessage != nil {
|
||||||
return jobFailureMessage
|
return jobFailureMessage
|
||||||
@ -1368,13 +1379,13 @@ func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPod
|
|||||||
}))
|
}))
|
||||||
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
if !isPodFailed(p, uncounted != nil) {
|
if !isPodFailed(p, job, uncounted != nil) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
_, countFailed, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
_, countFailed, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
||||||
return countFailed
|
return countFailed
|
||||||
} else {
|
} else {
|
||||||
return isPodFailed(p, uncounted != nil)
|
return isPodFailed(p, job, uncounted != nil)
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
return succeeded, failed
|
return succeeded, failed
|
||||||
@ -1724,7 +1735,15 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio
|
|||||||
return list, false
|
return list, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPodFailed(p *v1.Pod, wFinalizers bool) bool {
|
func isPodFailed(p *v1.Pod, job *batch.Job, wFinalizers bool) bool {
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
|
// When PodDisruptionConditions is enabled, orphan Pods and unschedulable
|
||||||
|
// terminating Pods are marked as Failed. So we only need to check the phase.
|
||||||
|
// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
|
||||||
|
// For now, we do so to avoid affecting all running Jobs without the
|
||||||
|
// avaibility to opt-out into the old behavior.
|
||||||
|
return p.Status.Phase == v1.PodFailed
|
||||||
|
}
|
||||||
if p.Status.Phase == v1.PodFailed {
|
if p.Status.Phase == v1.PodFailed {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -1986,6 +1986,7 @@ func TestSyncJobDeleted(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
||||||
|
now := metav1.Now()
|
||||||
indexedCompletionMode := batch.IndexedCompletion
|
indexedCompletionMode := batch.IndexedCompletion
|
||||||
validObjectMeta := metav1.ObjectMeta{
|
validObjectMeta := metav1.ObjectMeta{
|
||||||
Name: "foobar",
|
Name: "foobar",
|
||||||
@ -2026,13 +2027,15 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
testCases := map[string]struct {
|
testCases := map[string]struct {
|
||||||
enableJobPodFailurePolicy bool
|
wFinalizersExclusive *bool
|
||||||
job batch.Job
|
enableJobPodFailurePolicy bool
|
||||||
pods []v1.PodStatus
|
enablePodDisruptionConditions bool
|
||||||
wantConditions *[]batch.JobCondition
|
job batch.Job
|
||||||
wantStatusFailed int32
|
pods []v1.Pod
|
||||||
wantStatusActive int32
|
wantConditions *[]batch.JobCondition
|
||||||
wantStatusSucceeded int32
|
wantStatusFailed int32
|
||||||
|
wantStatusActive int32
|
||||||
|
wantStatusSucceeded int32
|
||||||
}{
|
}{
|
||||||
"default handling for pod failure if the container matching the exit codes does not match the containerName restriction": {
|
"default handling for pod failure if the container matching the exit codes does not match the containerName restriction": {
|
||||||
enableJobPodFailurePolicy: true,
|
enableJobPodFailurePolicy: true,
|
||||||
@ -2067,23 +2070,25 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "monitoring-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "monitoring-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
{
|
||||||
{
|
Name: "main-container",
|
||||||
Name: "main-container",
|
State: v1.ContainerState{
|
||||||
State: v1.ContainerState{
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
ExitCode: 42,
|
||||||
ExitCode: 42,
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2111,15 +2116,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodRunning,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodRunning,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2147,15 +2154,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2200,15 +2209,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2253,15 +2264,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2296,15 +2309,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2332,18 +2347,22 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodRunning,
|
Status: v1.PodStatus{
|
||||||
|
Phase: v1.PodRunning,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2379,15 +2398,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2430,15 +2451,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 42,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 42,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2481,15 +2504,17 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "main-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "main-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2517,25 +2542,27 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
InitContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
InitContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "init-container",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "init-container",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 5,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 5,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
{
|
||||||
{
|
Name: "main-container",
|
||||||
Name: "main-container",
|
State: v1.ContainerState{
|
||||||
State: v1.ContainerState{
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
ExitCode: 143,
|
||||||
ExitCode: 143,
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2570,23 +2597,25 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
Name: "container1",
|
{
|
||||||
State: v1.ContainerState{
|
Name: "container1",
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 2,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 2,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
{
|
||||||
{
|
Name: "container2",
|
||||||
Name: "container2",
|
State: v1.ContainerState{
|
||||||
State: v1.ContainerState{
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
ExitCode: 6,
|
||||||
ExitCode: 6,
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2614,14 +2643,16 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
State: v1.ContainerState{
|
{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 1,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 1,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2649,14 +2680,16 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
State: v1.ContainerState{
|
{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 10,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 10,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2706,14 +2739,16 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
Phase: v1.PodFailed,
|
||||||
{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
State: v1.ContainerState{
|
{
|
||||||
Terminated: &v1.ContainerStateTerminated{
|
State: v1.ContainerState{
|
||||||
ExitCode: 2,
|
Terminated: &v1.ContainerStateTerminated{
|
||||||
|
ExitCode: 2,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2760,17 +2795,19 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
Conditions: []v1.PodCondition{
|
Phase: v1.PodFailed,
|
||||||
{
|
Conditions: []v1.PodCondition{
|
||||||
Type: v1.PodConditionType("ResourceLimitExceeded"),
|
{
|
||||||
Status: v1.ConditionTrue,
|
Type: v1.PodConditionType("ResourceLimitExceeded"),
|
||||||
},
|
Status: v1.ConditionTrue,
|
||||||
{
|
},
|
||||||
Type: v1.DisruptionTarget,
|
{
|
||||||
Status: v1.ConditionTrue,
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2806,13 +2843,15 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
Conditions: []v1.PodCondition{
|
Phase: v1.PodFailed,
|
||||||
{
|
Conditions: []v1.PodCondition{
|
||||||
Type: v1.DisruptionTarget,
|
{
|
||||||
Status: v1.ConditionTrue,
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2848,13 +2887,15 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
Phase: v1.PodFailed,
|
Status: v1.PodStatus{
|
||||||
Conditions: []v1.PodCondition{
|
Phase: v1.PodFailed,
|
||||||
{
|
Conditions: []v1.PodCondition{
|
||||||
Type: v1.DisruptionTarget,
|
{
|
||||||
Status: v1.ConditionTrue,
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2871,11 +2912,96 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
wantStatusSucceeded: 0,
|
wantStatusSucceeded: 0,
|
||||||
},
|
},
|
||||||
|
"terminating Pod considered failed when PodDisruptionConditions is disabled": {
|
||||||
|
wFinalizersExclusive: pointer.Bool(true),
|
||||||
|
enableJobPodFailurePolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Parallelism: pointer.Int32(1),
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
BackoffLimit: pointer.Int32(0),
|
||||||
|
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||||
|
Rules: []batch.PodFailurePolicyRule{
|
||||||
|
{
|
||||||
|
Action: batch.PodFailurePolicyActionCount,
|
||||||
|
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
DeletionTimestamp: &now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantConditions: &[]batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: "BackoffLimitExceeded",
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantStatusFailed: 1,
|
||||||
|
},
|
||||||
|
"terminating Pod not considered failed when PodDisruptionConditions is enabled": {
|
||||||
|
enableJobPodFailurePolicy: true,
|
||||||
|
enablePodDisruptionConditions: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Parallelism: pointer.Int32(1),
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
BackoffLimit: pointer.Int32(0),
|
||||||
|
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||||
|
Rules: []batch.PodFailurePolicyRule{
|
||||||
|
{
|
||||||
|
Action: batch.PodFailurePolicyActionCount,
|
||||||
|
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
DeletionTimestamp: &now,
|
||||||
|
},
|
||||||
|
Status: v1.PodStatus{
|
||||||
|
Phase: v1.PodRunning,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantStatusActive: 1, // This is a replacement Pod: the terminating Pod is neither active nor failed.
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, wFinalizers := range []bool{false, true} {
|
for _, wFinalizers := range []bool{false, true} {
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
|
||||||
|
if tc.wFinalizersExclusive != nil && *tc.wFinalizersExclusive != wFinalizers {
|
||||||
|
t.Skipf("Test is exclusive for wFinalizers=%t", *tc.wFinalizersExclusive)
|
||||||
|
}
|
||||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, tc.enablePodDisruptionConditions)()
|
||||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
||||||
fakePodControl := controller.FakePodControl{}
|
fakePodControl := controller.FakePodControl{}
|
||||||
@ -2896,8 +3022,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
return job, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||||
for i, podStatus := range tc.pods {
|
for i, pod := range tc.pods {
|
||||||
pb := buildPod().name(fmt.Sprintf("mypod-%d", i)).job(job).status(podStatus)
|
pod := pod
|
||||||
|
pb := podBuilder{Pod: &pod}.name(fmt.Sprintf("mypod-%d", i)).job(job)
|
||||||
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
|
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
|
||||||
pb.index(fmt.Sprintf("%v", i))
|
pb.index(fmt.Sprintf("%v", i))
|
||||||
}
|
}
|
||||||
@ -4363,11 +4490,6 @@ func (pb podBuilder) index(ix string) podBuilder {
|
|||||||
return pb
|
return pb
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pb podBuilder) status(s v1.PodStatus) podBuilder {
|
|
||||||
pb.Status = s
|
|
||||||
return pb
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
|
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
|
||||||
pb.Status.Phase = p
|
pb.Status.Phase = p
|
||||||
return pb
|
return pb
|
||||||
|
@ -94,8 +94,6 @@ func getMatchingContainerFromList(containerStatuses []v1.ContainerStatus, requir
|
|||||||
for _, containerStatus := range containerStatuses {
|
for _, containerStatus := range containerStatuses {
|
||||||
if containerStatus.State.Terminated == nil {
|
if containerStatus.State.Terminated == nil {
|
||||||
// This container is still be terminating. There is no exit code to match.
|
// This container is still be terminating. There is no exit code to match.
|
||||||
// TODO(#113855): Remove this check when it's guaranteed that the
|
|
||||||
// container is terminated.
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name {
|
if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name {
|
||||||
|
@ -538,6 +538,7 @@ func CreateZoneID(region, zone string) string {
|
|||||||
// GetKey is a helper function used by controllers unit tests to get the
|
// GetKey is a helper function used by controllers unit tests to get the
|
||||||
// key for a given kubernetes resource.
|
// key for a given kubernetes resource.
|
||||||
func GetKey(obj interface{}, t *testing.T) string {
|
func GetKey(obj interface{}, t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||||
if ok {
|
if ok {
|
||||||
// if tombstone , try getting the value from tombstone.Obj
|
// if tombstone , try getting the value from tombstone.Obj
|
||||||
|
Loading…
Reference in New Issue
Block a user