job: Ignore namespace termination errors when creating pods or jobs

Instead of reporting an event or displaying an error, simply exit
when the namespace is being terminated. This reduces the amount of
controller churn on namespace shutdown. While we could technically
exit the entire processing loop early for very large jobs,
we should wait for more evidence that is an issue before changing
that logic substantially.
This commit is contained in:
Clayton Coleman 2019-10-20 16:25:44 -04:00
parent 8f74c8970b
commit c6e34e58c5
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 23 additions and 10 deletions

View File

@ -20,6 +20,7 @@ go_library(
"//staging/src/k8s.io/api/batch/v1:go_default_library",
"//staging/src/k8s.io/api/batch/v1beta1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",

View File

@ -39,6 +39,7 @@ import (
batchv1 "k8s.io/api/batch/v1"
batchv1beta1 "k8s.io/api/batch/v1beta1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
@ -333,7 +334,11 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo
}
jobResp, err := jc.CreateJob(sj.Namespace, jobReq)
if err != nil {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
}
return
}
klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog)

View File

@ -771,15 +771,22 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b
go func() {
defer wait.Done()
err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind))
if err != nil && errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
if err != nil {
if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
// If the namespace is being torn down, we can safely ignore
// this error since all subsequent creations will fail.
return
}
if errors.IsTimeout(err) {
// Pod is created but its initialization has timed out.
// If the initialization is successful eventually, the
// controller will observe the creation via the informer.
// If the initialization fails, or if the pod keeps
// uninitialized for a long time, the informer will not
// receive any update, and the controller will create a new
// pod when the expectation expires.
return
}
}
if err != nil {
defer utilruntime.HandleError(err)