mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 09:52:49 +00:00
Wait for Pods to finish before considering Failed in Job (#113860)
* Wait for Pods to finish before considering Failed Limit behavior to feature gates PodDisruptionConditions and JobPodFailurePolicy and jobs with a podFailurePolicy. Change-Id: I926391cc2521b389c8e52962afb0d4a6a845ab8f * Remove check for unsheduled terminating pod Change-Id: I3dc05bb4ea3738604f01bf8cb5fc8cc0f6ea54ec
This commit is contained in:
parent
dc7fd39eb7
commit
7dc36bdf82
@ -1030,15 +1030,26 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
podFailureCountByPolicyAction := map[string]int{}
|
podFailureCountByPolicyAction := map[string]int{}
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
||||||
|
// This pod was processed in a previous sync.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
podFinished := pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed
|
|
||||||
// Terminating pods are counted as failed. This guarantees that orphan Pods
|
// Terminating pods are counted as failed. This guarantees that orphan Pods
|
||||||
// count as failures.
|
// count as failures.
|
||||||
// Active pods are terminated when the job has completed, thus they count as
|
// Active pods are terminated when the job has completed, thus they count as
|
||||||
// failures as well.
|
// failures as well.
|
||||||
podTerminating := pod.DeletionTimestamp != nil || finishedCond != nil
|
considerTerminated := pod.DeletionTimestamp != nil || finishedCond != nil
|
||||||
if podFinished || podTerminating || job.DeletionTimestamp != nil {
|
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
|
// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
|
||||||
|
// For now, we do so to avoid affecting all running Jobs without the
|
||||||
|
// avaibility to opt-out into the old behavior.
|
||||||
|
// We can also simplify the check to remove finalizers to:
|
||||||
|
// considerTerminated || job.DeletionTimestamp != nil
|
||||||
|
considerTerminated = podutil.IsPodTerminal(pod) ||
|
||||||
|
finishedCond != nil || // The Job is terminating. Any running Pod is considered failed.
|
||||||
|
isPodFailed(pod, job, true /* using finalizers */)
|
||||||
|
}
|
||||||
|
if podutil.IsPodTerminal(pod) || considerTerminated || job.DeletionTimestamp != nil {
|
||||||
podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
|
podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
|
||||||
}
|
}
|
||||||
if pod.Status.Phase == v1.PodSucceeded && !uncounted.failed.Has(string(pod.UID)) {
|
if pod.Status.Phase == v1.PodSucceeded && !uncounted.failed.Has(string(pod.UID)) {
|
||||||
@ -1054,7 +1065,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
needsFlush = true
|
needsFlush = true
|
||||||
uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
|
uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
|
||||||
}
|
}
|
||||||
} else if pod.Status.Phase == v1.PodFailed || podTerminating {
|
} else if pod.Status.Phase == v1.PodFailed || considerTerminated {
|
||||||
ix := getCompletionIndex(pod.Annotations)
|
ix := getCompletionIndex(pod.Annotations)
|
||||||
if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) {
|
if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
@ -1346,7 +1357,7 @@ func getFailJobMessage(job *batch.Job, pods []*v1.Pod, uncounted sets.String) *s
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
for _, p := range pods {
|
for _, p := range pods {
|
||||||
if isPodFailed(p, uncounted != nil) {
|
if isPodFailed(p, job, uncounted != nil) {
|
||||||
jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
||||||
if jobFailureMessage != nil {
|
if jobFailureMessage != nil {
|
||||||
return jobFailureMessage
|
return jobFailureMessage
|
||||||
@ -1368,13 +1379,13 @@ func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPod
|
|||||||
}))
|
}))
|
||||||
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
if !isPodFailed(p, uncounted != nil) {
|
if !isPodFailed(p, job, uncounted != nil) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
_, countFailed, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
_, countFailed, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
|
||||||
return countFailed
|
return countFailed
|
||||||
} else {
|
} else {
|
||||||
return isPodFailed(p, uncounted != nil)
|
return isPodFailed(p, job, uncounted != nil)
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
return succeeded, failed
|
return succeeded, failed
|
||||||
@ -1724,7 +1735,15 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio
|
|||||||
return list, false
|
return list, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPodFailed(p *v1.Pod, wFinalizers bool) bool {
|
func isPodFailed(p *v1.Pod, job *batch.Job, wFinalizers bool) bool {
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
|
||||||
|
// When PodDisruptionConditions is enabled, orphan Pods and unschedulable
|
||||||
|
// terminating Pods are marked as Failed. So we only need to check the phase.
|
||||||
|
// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
|
||||||
|
// For now, we do so to avoid affecting all running Jobs without the
|
||||||
|
// avaibility to opt-out into the old behavior.
|
||||||
|
return p.Status.Phase == v1.PodFailed
|
||||||
|
}
|
||||||
if p.Status.Phase == v1.PodFailed {
|
if p.Status.Phase == v1.PodFailed {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -1986,6 +1986,7 @@ func TestSyncJobDeleted(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
||||||
|
now := metav1.Now()
|
||||||
indexedCompletionMode := batch.IndexedCompletion
|
indexedCompletionMode := batch.IndexedCompletion
|
||||||
validObjectMeta := metav1.ObjectMeta{
|
validObjectMeta := metav1.ObjectMeta{
|
||||||
Name: "foobar",
|
Name: "foobar",
|
||||||
@ -2026,9 +2027,11 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
testCases := map[string]struct {
|
testCases := map[string]struct {
|
||||||
|
wFinalizersExclusive *bool
|
||||||
enableJobPodFailurePolicy bool
|
enableJobPodFailurePolicy bool
|
||||||
|
enablePodDisruptionConditions bool
|
||||||
job batch.Job
|
job batch.Job
|
||||||
pods []v1.PodStatus
|
pods []v1.Pod
|
||||||
wantConditions *[]batch.JobCondition
|
wantConditions *[]batch.JobCondition
|
||||||
wantStatusFailed int32
|
wantStatusFailed int32
|
||||||
wantStatusActive int32
|
wantStatusActive int32
|
||||||
@ -2067,8 +2070,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2090,6 +2094,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusSucceeded: 0,
|
wantStatusSucceeded: 0,
|
||||||
@ -2111,8 +2116,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodRunning,
|
Phase: v1.PodRunning,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2126,6 +2132,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 0,
|
wantStatusFailed: 0,
|
||||||
@ -2147,8 +2154,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2162,6 +2170,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2200,8 +2209,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2215,6 +2225,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2253,8 +2264,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2268,6 +2280,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2296,8 +2309,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2311,6 +2325,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
@ -2332,11 +2347,14 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodRunning,
|
Phase: v1.PodRunning,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2350,6 +2368,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2379,8 +2398,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2394,6 +2414,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2430,8 +2451,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2445,6 +2467,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2481,8 +2504,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2496,6 +2520,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
@ -2517,8 +2542,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
InitContainerStatuses: []v1.ContainerStatus{
|
InitContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2542,6 +2568,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2570,8 +2597,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2593,6 +2621,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 0,
|
wantStatusFailed: 0,
|
||||||
@ -2614,8 +2643,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2628,6 +2658,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 0,
|
wantStatusFailed: 0,
|
||||||
@ -2649,8 +2680,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2663,6 +2695,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2706,8 +2739,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
ContainerStatuses: []v1.ContainerStatus{
|
ContainerStatuses: []v1.ContainerStatus{
|
||||||
{
|
{
|
||||||
@ -2720,6 +2754,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
@ -2760,8 +2795,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
Conditions: []v1.PodCondition{
|
Conditions: []v1.PodCondition{
|
||||||
{
|
{
|
||||||
@ -2775,6 +2811,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
@ -2806,8 +2843,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
Conditions: []v1.PodCondition{
|
Conditions: []v1.PodCondition{
|
||||||
{
|
{
|
||||||
@ -2817,6 +2855,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: nil,
|
wantConditions: nil,
|
||||||
wantStatusActive: 1,
|
wantStatusActive: 1,
|
||||||
wantStatusFailed: 0,
|
wantStatusFailed: 0,
|
||||||
@ -2848,8 +2887,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pods: []v1.PodStatus{
|
pods: []v1.Pod{
|
||||||
{
|
{
|
||||||
|
Status: v1.PodStatus{
|
||||||
Phase: v1.PodFailed,
|
Phase: v1.PodFailed,
|
||||||
Conditions: []v1.PodCondition{
|
Conditions: []v1.PodCondition{
|
||||||
{
|
{
|
||||||
@ -2859,6 +2899,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
wantConditions: &[]batch.JobCondition{
|
wantConditions: &[]batch.JobCondition{
|
||||||
{
|
{
|
||||||
Type: batch.JobFailed,
|
Type: batch.JobFailed,
|
||||||
@ -2871,11 +2912,96 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
wantStatusFailed: 1,
|
wantStatusFailed: 1,
|
||||||
wantStatusSucceeded: 0,
|
wantStatusSucceeded: 0,
|
||||||
},
|
},
|
||||||
|
"terminating Pod considered failed when PodDisruptionConditions is disabled": {
|
||||||
|
wFinalizersExclusive: pointer.Bool(true),
|
||||||
|
enableJobPodFailurePolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Parallelism: pointer.Int32(1),
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
BackoffLimit: pointer.Int32(0),
|
||||||
|
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||||
|
Rules: []batch.PodFailurePolicyRule{
|
||||||
|
{
|
||||||
|
Action: batch.PodFailurePolicyActionCount,
|
||||||
|
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
DeletionTimestamp: &now,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantConditions: &[]batch.JobCondition{
|
||||||
|
{
|
||||||
|
Type: batch.JobFailed,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
Reason: "BackoffLimitExceeded",
|
||||||
|
Message: "Job has reached the specified backoff limit",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantStatusFailed: 1,
|
||||||
|
},
|
||||||
|
"terminating Pod not considered failed when PodDisruptionConditions is enabled": {
|
||||||
|
enableJobPodFailurePolicy: true,
|
||||||
|
enablePodDisruptionConditions: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Parallelism: pointer.Int32(1),
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
BackoffLimit: pointer.Int32(0),
|
||||||
|
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||||
|
Rules: []batch.PodFailurePolicyRule{
|
||||||
|
{
|
||||||
|
Action: batch.PodFailurePolicyActionCount,
|
||||||
|
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
DeletionTimestamp: &now,
|
||||||
|
},
|
||||||
|
Status: v1.PodStatus{
|
||||||
|
Phase: v1.PodRunning,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantStatusActive: 1, // This is a replacement Pod: the terminating Pod is neither active nor failed.
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, wFinalizers := range []bool{false, true} {
|
for _, wFinalizers := range []bool{false, true} {
|
||||||
for name, tc := range testCases {
|
for name, tc := range testCases {
|
||||||
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%s; finalizers=%t", name, wFinalizers), func(t *testing.T) {
|
||||||
|
if tc.wFinalizersExclusive != nil && *tc.wFinalizersExclusive != wFinalizers {
|
||||||
|
t.Skipf("Test is exclusive for wFinalizers=%t", *tc.wFinalizersExclusive)
|
||||||
|
}
|
||||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobPodFailurePolicy, tc.enableJobPodFailurePolicy)()
|
||||||
|
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, tc.enablePodDisruptionConditions)()
|
||||||
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||||
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
|
||||||
fakePodControl := controller.FakePodControl{}
|
fakePodControl := controller.FakePodControl{}
|
||||||
@ -2896,8 +3022,9 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
return job, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||||
for i, podStatus := range tc.pods {
|
for i, pod := range tc.pods {
|
||||||
pb := buildPod().name(fmt.Sprintf("mypod-%d", i)).job(job).status(podStatus)
|
pod := pod
|
||||||
|
pb := podBuilder{Pod: &pod}.name(fmt.Sprintf("mypod-%d", i)).job(job)
|
||||||
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
|
if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion {
|
||||||
pb.index(fmt.Sprintf("%v", i))
|
pb.index(fmt.Sprintf("%v", i))
|
||||||
}
|
}
|
||||||
@ -4363,11 +4490,6 @@ func (pb podBuilder) index(ix string) podBuilder {
|
|||||||
return pb
|
return pb
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pb podBuilder) status(s v1.PodStatus) podBuilder {
|
|
||||||
pb.Status = s
|
|
||||||
return pb
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
|
func (pb podBuilder) phase(p v1.PodPhase) podBuilder {
|
||||||
pb.Status.Phase = p
|
pb.Status.Phase = p
|
||||||
return pb
|
return pb
|
||||||
|
@ -94,8 +94,6 @@ func getMatchingContainerFromList(containerStatuses []v1.ContainerStatus, requir
|
|||||||
for _, containerStatus := range containerStatuses {
|
for _, containerStatus := range containerStatuses {
|
||||||
if containerStatus.State.Terminated == nil {
|
if containerStatus.State.Terminated == nil {
|
||||||
// This container is still be terminating. There is no exit code to match.
|
// This container is still be terminating. There is no exit code to match.
|
||||||
// TODO(#113855): Remove this check when it's guaranteed that the
|
|
||||||
// container is terminated.
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name {
|
if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name {
|
||||||
|
@ -538,6 +538,7 @@ func CreateZoneID(region, zone string) string {
|
|||||||
// GetKey is a helper function used by controllers unit tests to get the
|
// GetKey is a helper function used by controllers unit tests to get the
|
||||||
// key for a given kubernetes resource.
|
// key for a given kubernetes resource.
|
||||||
func GetKey(obj interface{}, t *testing.T) string {
|
func GetKey(obj interface{}, t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||||
if ok {
|
if ok {
|
||||||
// if tombstone , try getting the value from tombstone.Obj
|
// if tombstone , try getting the value from tombstone.Obj
|
||||||
|
Loading…
Reference in New Issue
Block a user