diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 1619c3fffd9..ec876e1eb2b 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -578,7 +578,10 @@ func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodT } newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod) if err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) + // only send an event if the namespace isn't terminating + if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) + } return err } accessor, err := meta.Accessor(object) diff --git a/pkg/controller/cronjob/BUILD b/pkg/controller/cronjob/BUILD index 7f6c6269f11..6c5af9982d5 100644 --- a/pkg/controller/cronjob/BUILD +++ b/pkg/controller/cronjob/BUILD @@ -20,6 +20,7 @@ go_library( "//staging/src/k8s.io/api/batch/v1:go_default_library", "//staging/src/k8s.io/api/batch/v1beta1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", diff --git a/pkg/controller/cronjob/cronjob_controller.go b/pkg/controller/cronjob/cronjob_controller.go index 755d5ef9b58..f778079571f 100644 --- a/pkg/controller/cronjob/cronjob_controller.go +++ b/pkg/controller/cronjob/cronjob_controller.go @@ -39,6 +39,7 @@ import ( batchv1 "k8s.io/api/batch/v1" batchv1beta1 "k8s.io/api/batch/v1beta1" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -333,7 +334,11 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo } jobResp, err := jc.CreateJob(sj.Namespace, jobReq) if err != nil { - recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + } return } klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog) diff --git a/pkg/controller/daemon/daemon_controller.go b/pkg/controller/daemon/daemon_controller.go index fd2b93f4f6e..443e4b79752 100644 --- a/pkg/controller/daemon/daemon_controller.go +++ b/pkg/controller/daemon/daemon_controller.go @@ -951,15 +951,22 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod err := dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate, ds, metav1.NewControllerRef(ds, controllerKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + return + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return + } } if err != nil { klog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name) diff --git a/pkg/controller/deployment/deployment_controller.go b/pkg/controller/deployment/deployment_controller.go index b04ded1b288..10068fd6cd5 100644 --- a/pkg/controller/deployment/deployment_controller.go +++ b/pkg/controller/deployment/deployment_controller.go @@ -475,7 +475,7 @@ func (dc *DeploymentController) processNextWorkItem() bool { } func (dc *DeploymentController) handleErr(err error, key interface{}) { - if err == nil { + if err == nil || errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { dc.queue.Forget(key) return } diff --git a/pkg/controller/deployment/sync.go b/pkg/controller/deployment/sync.go index b0a6e7e3f14..a208fd0cfb5 100644 --- a/pkg/controller/deployment/sync.go +++ b/pkg/controller/deployment/sync.go @@ -256,6 +256,9 @@ func (dc *DeploymentController) getNewReplicaSet(d *apps.Deployment, rsList, old klog.V(2).Infof("Found a hash collision for deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount) } return nil, err + case errors.HasStatusCause(err, v1.NamespaceTerminatingCause): + // if the namespace is terminating, all subsequent creates will fail and we can safely do nothing + return nil, err case err != nil: msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err) if deploymentutil.HasProgressDeadline(d) { diff --git a/pkg/controller/endpoint/endpoints_controller.go b/pkg/controller/endpoint/endpoints_controller.go index 77db49dfa68..625f68da217 100644 --- a/pkg/controller/endpoint/endpoints_controller.go +++ b/pkg/controller/endpoint/endpoints_controller.go @@ -506,6 +506,11 @@ func (e *EndpointController) syncService(key string) error { // 2. policy is misconfigured, in which case no service would function anywhere. // Given the frequency of 1, we log at a lower level. klog.V(5).Infof("Forbidden from creating endpoints: %v", err) + + // If the namespace is terminating, creates will continue to fail. Simply drop the item. + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + return nil + } } if createEndpoints { diff --git a/pkg/controller/endpointslice/reconciler.go b/pkg/controller/endpointslice/reconciler.go index a275896713e..4e9c6a6502a 100644 --- a/pkg/controller/endpointslice/reconciler.go +++ b/pkg/controller/endpointslice/reconciler.go @@ -23,6 +23,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1alpha1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" @@ -150,6 +151,10 @@ func (r *reconciler) finalize( addTriggerTimeAnnotation(endpointSlice, triggerTime) _, err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Create(endpointSlice) if err != nil { + // If the namespace is terminating, creates will continue to fail. Simply drop the item. + if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) { + return nil + } errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err)) } } diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index fdd84e431c1..252a6ec23ca 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -771,15 +771,22 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b go func() { defer wait.Done() err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + return + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return + } } if err != nil { defer utilruntime.HandleError(err) diff --git a/pkg/controller/replicaset/replica_set.go b/pkg/controller/replicaset/replica_set.go index 6727df358bb..37b51df9158 100644 --- a/pkg/controller/replicaset/replica_set.go +++ b/pkg/controller/replicaset/replica_set.go @@ -523,15 +523,22 @@ func (rsc *ReplicaSetController) manageReplicas(filteredPods []*v1.Pod, rs *apps // event spam that those failures would generate. successfulCreations, err := slowStartBatch(diff, controller.SlowStartInitialBatchSize, func() error { err := rsc.podControl.CreatePodsWithControllerRef(rs.Namespace, &rs.Spec.Template, rs, metav1.NewControllerRef(rs, rsc.GroupVersionKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return nil + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // if the namespace is being terminated, we don't have to do + // anything because any creation will fail + return nil + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return nil + } } return err }) diff --git a/pkg/controller/serviceaccount/serviceaccounts_controller.go b/pkg/controller/serviceaccount/serviceaccounts_controller.go index c775706a605..83214feb9df 100644 --- a/pkg/controller/serviceaccount/serviceaccounts_controller.go +++ b/pkg/controller/serviceaccount/serviceaccounts_controller.go @@ -213,7 +213,10 @@ func (c *ServiceAccountsController) syncNamespace(key string) error { sa.Namespace = ns.Name if _, err := c.client.CoreV1().ServiceAccounts(ns.Name).Create(&sa); err != nil && !apierrs.IsAlreadyExists(err) { - createFailures = append(createFailures, err) + // we can safely ignore terminating namespace errors + if !apierrs.HasStatusCause(err, v1.NamespaceTerminatingCause) { + createFailures = append(createFailures, err) + } } } diff --git a/pkg/controller/serviceaccount/tokens_controller.go b/pkg/controller/serviceaccount/tokens_controller.go index 6e128a8da03..62dab048695 100644 --- a/pkg/controller/serviceaccount/tokens_controller.go +++ b/pkg/controller/serviceaccount/tokens_controller.go @@ -408,6 +408,10 @@ func (e *TokensController) ensureReferencedToken(serviceAccount *v1.ServiceAccou // Save the secret createdToken, err := e.client.CoreV1().Secrets(serviceAccount.Namespace).Create(secret) if err != nil { + // if the namespace is being terminated, create will fail no matter what + if apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + return false, err + } // retriable error return true, err } diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index a284d627148..90fb0670816 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -4659,6 +4659,12 @@ const ( NamespaceTerminating NamespacePhase = "Terminating" ) +const ( + // NamespaceTerminatingCause is returned as a defaults.cause item when a change is + // forbidden due to the namespace being terminated. + NamespaceTerminatingCause metav1.CauseType = "NamespaceTerminating" +) + type NamespaceConditionType string // These are valid conditions of a namespace. diff --git a/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go b/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go index 95d5c7a3553..e9012b5cc53 100644 --- a/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go +++ b/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go @@ -70,6 +70,28 @@ func (e *StatusError) DebugError() (string, []interface{}) { return "server response object: %#v", []interface{}{e.ErrStatus} } +// HasStatusCause returns true if the provided error has a details cause +// with the provided type name. +func HasStatusCause(err error, name metav1.CauseType) bool { + _, ok := StatusCause(err, name) + return ok +} + +// StatusCause returns the named cause from the provided error if it exists and +// the error is of the type APIStatus. Otherwise it returns false. +func StatusCause(err error, name metav1.CauseType) (metav1.StatusCause, bool) { + apierr, ok := err.(APIStatus) + if !ok || apierr == nil || apierr.Status().Details == nil { + return metav1.StatusCause{}, false + } + for _, cause := range apierr.Status().Details.Causes { + if cause.Type == name { + return cause, true + } + } + return metav1.StatusCause{}, false +} + // UnexpectedObjectError can be returned by FromObject if it's passed a non-status object. type UnexpectedObjectError struct { Object runtime.Object diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD index 5e605f4af59..21217f11e05 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD @@ -34,10 +34,12 @@ go_test( embed = [":go_default_library"], deps = [ "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apiserver/pkg/admission:go_default_library", diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go index 1fbac451742..5db82404583 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go @@ -170,8 +170,15 @@ func (l *Lifecycle) Admit(ctx context.Context, a admission.Attributes, o admissi return nil } - // TODO: This should probably not be a 403 - return admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace())) + err := admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace())) + if apierr, ok := err.(*errors.StatusError); ok { + apierr.ErrStatus.Details.Causes = append(apierr.ErrStatus.Details.Causes, metav1.StatusCause{ + Type: v1.NamespaceTerminatingCause, + Message: fmt.Sprintf("namespace %s is being terminated", a.GetNamespace()), + Field: "metadata.namespace", + }) + } + return err } return nil diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go index 0ab9bf3e797..1948996c776 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go @@ -19,14 +19,17 @@ package lifecycle import ( "context" "fmt" + "reflect" "testing" "time" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/clock" + "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/admission" @@ -192,6 +195,14 @@ func TestAdmissionNamespaceTerminating(t *testing.T) { if err == nil { t.Errorf("Expected error rejecting creates in a namespace when it is terminating") } + expectedCause := metav1.StatusCause{ + Type: v1.NamespaceTerminatingCause, + Message: fmt.Sprintf("namespace %s is being terminated", namespace), + Field: "metadata.namespace", + } + if cause, ok := errors.StatusCause(err, v1.NamespaceTerminatingCause); !ok || !reflect.DeepEqual(expectedCause, cause) { + t.Errorf("Expected status cause indicating the namespace is terminating: %t %s", ok, diff.ObjectReflectDiff(expectedCause, cause)) + } // verify update operations in the namespace can proceed err = handler.Admit(context.TODO(), admission.NewAttributesRecord(&pod, nil, v1.SchemeGroupVersion.WithKind("Pod").GroupKind().WithVersion("version"), pod.Namespace, pod.Name, v1.Resource("pods").WithVersion("version"), "", admission.Update, &metav1.UpdateOptions{}, false, nil), nil)