From c6e34e58c53a4aac3cb9f34af20cdb94e90ce18f Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:25:44 -0400 Subject: [PATCH] job: Ignore namespace termination errors when creating pods or jobs Instead of reporting an event or displaying an error, simply exit when the namespace is being terminated. This reduces the amount of controller churn on namespace shutdown. While we could technically exit the entire processing loop early for very large jobs, we should wait for more evidence that is an issue before changing that logic substantially. --- pkg/controller/cronjob/BUILD | 1 + pkg/controller/cronjob/cronjob_controller.go | 7 +++++- pkg/controller/job/job_controller.go | 25 +++++++++++++------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/pkg/controller/cronjob/BUILD b/pkg/controller/cronjob/BUILD index 7f6c6269f11..6c5af9982d5 100644 --- a/pkg/controller/cronjob/BUILD +++ b/pkg/controller/cronjob/BUILD @@ -20,6 +20,7 @@ go_library( "//staging/src/k8s.io/api/batch/v1:go_default_library", "//staging/src/k8s.io/api/batch/v1beta1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", diff --git a/pkg/controller/cronjob/cronjob_controller.go b/pkg/controller/cronjob/cronjob_controller.go index 755d5ef9b58..f778079571f 100644 --- a/pkg/controller/cronjob/cronjob_controller.go +++ b/pkg/controller/cronjob/cronjob_controller.go @@ -39,6 +39,7 @@ import ( batchv1 "k8s.io/api/batch/v1" batchv1beta1 "k8s.io/api/batch/v1beta1" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -333,7 +334,11 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo } jobResp, err := jc.CreateJob(sj.Namespace, jobReq) if err != nil { - recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + } return } klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index fdd84e431c1..252a6ec23ca 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -771,15 +771,22 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b go func() { defer wait.Done() err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + return + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return + } } if err != nil { defer utilruntime.HandleError(err)