From a62c5b282fda7c0832d329cde45e5e0a836924e8 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sat, 19 Oct 2019 22:57:21 -0400 Subject: [PATCH 1/7] namespace: Provide a special status cause when a namespace is terminating Clients should be able to identify when a namespace is being terminated and take special action such as backing off or giving up. Add a helper for getting the cause of an error and then add a special cause to the forbidden error that namespace lifecycle admission returns. We can't change the forbidden reason without potentially breaking older clients and so cause is the appropriate tool. Add `StatusCause` and `HasStatusCause` to the errors package to make checking for causes simpler. Add `NamespaceTerminatingCause` to the v1 API as a constant. --- staging/src/k8s.io/api/core/v1/types.go | 6 +++++ .../apimachinery/pkg/api/errors/errors.go | 22 +++++++++++++++++++ .../plugin/namespace/lifecycle/BUILD | 2 ++ .../plugin/namespace/lifecycle/admission.go | 11 ++++++++-- .../namespace/lifecycle/admission_test.go | 11 ++++++++++ 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index d785c13aeb5..da2ab92279d 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -4659,6 +4659,12 @@ const ( NamespaceTerminating NamespacePhase = "Terminating" ) +const ( + // NamespaceTerminatingCause is returned as a defaults.cause item when a change is + // forbidden due to the namespace being terminated. + NamespaceTerminatingCause metav1.CauseType = "NamespaceTerminating" +) + type NamespaceConditionType string // These are valid conditions of a namespace. diff --git a/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go b/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go index 95d5c7a3553..e9012b5cc53 100644 --- a/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go +++ b/staging/src/k8s.io/apimachinery/pkg/api/errors/errors.go @@ -70,6 +70,28 @@ func (e *StatusError) DebugError() (string, []interface{}) { return "server response object: %#v", []interface{}{e.ErrStatus} } +// HasStatusCause returns true if the provided error has a details cause +// with the provided type name. +func HasStatusCause(err error, name metav1.CauseType) bool { + _, ok := StatusCause(err, name) + return ok +} + +// StatusCause returns the named cause from the provided error if it exists and +// the error is of the type APIStatus. Otherwise it returns false. +func StatusCause(err error, name metav1.CauseType) (metav1.StatusCause, bool) { + apierr, ok := err.(APIStatus) + if !ok || apierr == nil || apierr.Status().Details == nil { + return metav1.StatusCause{}, false + } + for _, cause := range apierr.Status().Details.Causes { + if cause.Type == name { + return cause, true + } + } + return metav1.StatusCause{}, false +} + // UnexpectedObjectError can be returned by FromObject if it's passed a non-status object. type UnexpectedObjectError struct { Object runtime.Object diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD index 5e605f4af59..21217f11e05 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/BUILD @@ -34,10 +34,12 @@ go_test( embed = [":go_default_library"], deps = [ "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apiserver/pkg/admission:go_default_library", diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go index 1fbac451742..5db82404583 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission.go @@ -170,8 +170,15 @@ func (l *Lifecycle) Admit(ctx context.Context, a admission.Attributes, o admissi return nil } - // TODO: This should probably not be a 403 - return admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace())) + err := admission.NewForbidden(a, fmt.Errorf("unable to create new content in namespace %s because it is being terminated", a.GetNamespace())) + if apierr, ok := err.(*errors.StatusError); ok { + apierr.ErrStatus.Details.Causes = append(apierr.ErrStatus.Details.Causes, metav1.StatusCause{ + Type: v1.NamespaceTerminatingCause, + Message: fmt.Sprintf("namespace %s is being terminated", a.GetNamespace()), + Field: "metadata.namespace", + }) + } + return err } return nil diff --git a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go index 0ab9bf3e797..1948996c776 100644 --- a/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go +++ b/staging/src/k8s.io/apiserver/pkg/admission/plugin/namespace/lifecycle/admission_test.go @@ -19,14 +19,17 @@ package lifecycle import ( "context" "fmt" + "reflect" "testing" "time" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/clock" + "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/admission" @@ -192,6 +195,14 @@ func TestAdmissionNamespaceTerminating(t *testing.T) { if err == nil { t.Errorf("Expected error rejecting creates in a namespace when it is terminating") } + expectedCause := metav1.StatusCause{ + Type: v1.NamespaceTerminatingCause, + Message: fmt.Sprintf("namespace %s is being terminated", namespace), + Field: "metadata.namespace", + } + if cause, ok := errors.StatusCause(err, v1.NamespaceTerminatingCause); !ok || !reflect.DeepEqual(expectedCause, cause) { + t.Errorf("Expected status cause indicating the namespace is terminating: %t %s", ok, diff.ObjectReflectDiff(expectedCause, cause)) + } // verify update operations in the namespace can proceed err = handler.Admit(context.TODO(), admission.NewAttributesRecord(&pod, nil, v1.SchemeGroupVersion.WithKind("Pod").GroupKind().WithVersion("version"), pod.Namespace, pod.Name, v1.Resource("pods").WithVersion("version"), "", admission.Update, &metav1.UpdateOptions{}, false, nil), nil) From 937ef77257f8a8a7059916243497de7eee4fc764 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:04:07 -0400 Subject: [PATCH 2/7] endpoints: If namespace is terminating, drop item immediately Avoid sending an event to the namespace that is being terminated, since it will be rejected. --- pkg/controller/endpoint/endpoints_controller.go | 5 +++++ pkg/controller/endpointslice/reconciler.go | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/pkg/controller/endpoint/endpoints_controller.go b/pkg/controller/endpoint/endpoints_controller.go index 77db49dfa68..625f68da217 100644 --- a/pkg/controller/endpoint/endpoints_controller.go +++ b/pkg/controller/endpoint/endpoints_controller.go @@ -506,6 +506,11 @@ func (e *EndpointController) syncService(key string) error { // 2. policy is misconfigured, in which case no service would function anywhere. // Given the frequency of 1, we log at a lower level. klog.V(5).Infof("Forbidden from creating endpoints: %v", err) + + // If the namespace is terminating, creates will continue to fail. Simply drop the item. + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + return nil + } } if createEndpoints { diff --git a/pkg/controller/endpointslice/reconciler.go b/pkg/controller/endpointslice/reconciler.go index a275896713e..4e9c6a6502a 100644 --- a/pkg/controller/endpointslice/reconciler.go +++ b/pkg/controller/endpointslice/reconciler.go @@ -23,6 +23,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1alpha1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" @@ -150,6 +151,10 @@ func (r *reconciler) finalize( addTriggerTimeAnnotation(endpointSlice, triggerTime) _, err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Create(endpointSlice) if err != nil { + // If the namespace is terminating, creates will continue to fail. Simply drop the item. + if errors.HasStatusCause(err, corev1.NamespaceTerminatingCause) { + return nil + } errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err)) } } From dc0c21c7d7b264db2afb95db681505e341eb00ae Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:09:19 -0400 Subject: [PATCH 3/7] serviceaccount: If namespace is terminating, ignore create errors In some scenarios the service account and token controllers can race with namespace deletion, causing a burst of errors as they attempt to recreate secrets being deleted. Instead, detect these errors and do not retry. --- pkg/controller/serviceaccount/serviceaccounts_controller.go | 5 ++++- pkg/controller/serviceaccount/tokens_controller.go | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/controller/serviceaccount/serviceaccounts_controller.go b/pkg/controller/serviceaccount/serviceaccounts_controller.go index c775706a605..83214feb9df 100644 --- a/pkg/controller/serviceaccount/serviceaccounts_controller.go +++ b/pkg/controller/serviceaccount/serviceaccounts_controller.go @@ -213,7 +213,10 @@ func (c *ServiceAccountsController) syncNamespace(key string) error { sa.Namespace = ns.Name if _, err := c.client.CoreV1().ServiceAccounts(ns.Name).Create(&sa); err != nil && !apierrs.IsAlreadyExists(err) { - createFailures = append(createFailures, err) + // we can safely ignore terminating namespace errors + if !apierrs.HasStatusCause(err, v1.NamespaceTerminatingCause) { + createFailures = append(createFailures, err) + } } } diff --git a/pkg/controller/serviceaccount/tokens_controller.go b/pkg/controller/serviceaccount/tokens_controller.go index 6e128a8da03..62dab048695 100644 --- a/pkg/controller/serviceaccount/tokens_controller.go +++ b/pkg/controller/serviceaccount/tokens_controller.go @@ -408,6 +408,10 @@ func (e *TokensController) ensureReferencedToken(serviceAccount *v1.ServiceAccou // Save the secret createdToken, err := e.client.CoreV1().Secrets(serviceAccount.Namespace).Create(secret) if err != nil { + // if the namespace is being terminated, create will fail no matter what + if apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + return false, err + } // retriable error return true, err } From 2e8ace82ebd4d088bb9004173b24b0ace5530982 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:14:49 -0400 Subject: [PATCH 4/7] replicaset: Ignore namespace termination errors when creating pods Instead of reporting an event or displaying an error, simply exit when the namespace is being terminated. This reduces the amount of controller churn on namespace shutdown. While we could technically exit the entire processing loop early for very large replica sets, we should wait for more evidence that is an issue before changing that logic substantially. --- pkg/controller/controller_utils.go | 5 ++++- pkg/controller/replicaset/replica_set.go | 25 +++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 17fcf98f9a7..9e57264875a 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -577,7 +577,10 @@ func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodT } newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod) if err != nil { - r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) + // only send an event if the namespace isn't terminating + if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) + } return err } accessor, err := meta.Accessor(object) diff --git a/pkg/controller/replicaset/replica_set.go b/pkg/controller/replicaset/replica_set.go index 6727df358bb..37b51df9158 100644 --- a/pkg/controller/replicaset/replica_set.go +++ b/pkg/controller/replicaset/replica_set.go @@ -523,15 +523,22 @@ func (rsc *ReplicaSetController) manageReplicas(filteredPods []*v1.Pod, rs *apps // event spam that those failures would generate. successfulCreations, err := slowStartBatch(diff, controller.SlowStartInitialBatchSize, func() error { err := rsc.podControl.CreatePodsWithControllerRef(rs.Namespace, &rs.Spec.Template, rs, metav1.NewControllerRef(rs, rsc.GroupVersionKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return nil + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // if the namespace is being terminated, we don't have to do + // anything because any creation will fail + return nil + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return nil + } } return err }) From 8f74c8970bb891b0487514d456e337c5afe16837 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:24:59 -0400 Subject: [PATCH 5/7] daemonset: Ignore namespace termination errors when creating pods Instead of reporting an event or displaying an error, simply exit when the namespace is being terminated. This reduces the amount of controller churn on namespace shutdown. While we could technically exit the entire processing loop early for very large daemon sets, we should wait for more evidence that is an issue before changing that logic substantially. --- pkg/controller/daemon/daemon_controller.go | 25 ++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pkg/controller/daemon/daemon_controller.go b/pkg/controller/daemon/daemon_controller.go index bac4ddc4c20..2d42a2020a3 100644 --- a/pkg/controller/daemon/daemon_controller.go +++ b/pkg/controller/daemon/daemon_controller.go @@ -1053,15 +1053,22 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod ds, metav1.NewControllerRef(ds, controllerKind)) } - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + return + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return + } } if err != nil { klog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name) From c6e34e58c53a4aac3cb9f34af20cdb94e90ce18f Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:25:44 -0400 Subject: [PATCH 6/7] job: Ignore namespace termination errors when creating pods or jobs Instead of reporting an event or displaying an error, simply exit when the namespace is being terminated. This reduces the amount of controller churn on namespace shutdown. While we could technically exit the entire processing loop early for very large jobs, we should wait for more evidence that is an issue before changing that logic substantially. --- pkg/controller/cronjob/BUILD | 1 + pkg/controller/cronjob/cronjob_controller.go | 7 +++++- pkg/controller/job/job_controller.go | 25 +++++++++++++------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/pkg/controller/cronjob/BUILD b/pkg/controller/cronjob/BUILD index 7f6c6269f11..6c5af9982d5 100644 --- a/pkg/controller/cronjob/BUILD +++ b/pkg/controller/cronjob/BUILD @@ -20,6 +20,7 @@ go_library( "//staging/src/k8s.io/api/batch/v1:go_default_library", "//staging/src/k8s.io/api/batch/v1beta1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", diff --git a/pkg/controller/cronjob/cronjob_controller.go b/pkg/controller/cronjob/cronjob_controller.go index 755d5ef9b58..f778079571f 100644 --- a/pkg/controller/cronjob/cronjob_controller.go +++ b/pkg/controller/cronjob/cronjob_controller.go @@ -39,6 +39,7 @@ import ( batchv1 "k8s.io/api/batch/v1" batchv1beta1 "k8s.io/api/batch/v1beta1" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -333,7 +334,11 @@ func syncOne(sj *batchv1beta1.CronJob, js []batchv1.Job, now time.Time, jc jobCo } jobResp, err := jc.CreateJob(sj.Namespace, jobReq) if err != nil { - recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + if !errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err) + } return } klog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index fdd84e431c1..252a6ec23ca 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -771,15 +771,22 @@ func (jm *JobController) manageJob(activePods []*v1.Pod, succeeded int32, job *b go func() { defer wait.Done() err := jm.podControl.CreatePodsWithControllerRef(job.Namespace, &job.Spec.Template, job, metav1.NewControllerRef(job, controllerKind)) - if err != nil && errors.IsTimeout(err) { - // Pod is created but its initialization has timed out. - // If the initialization is successful eventually, the - // controller will observe the creation via the informer. - // If the initialization fails, or if the pod keeps - // uninitialized for a long time, the informer will not - // receive any update, and the controller will create a new - // pod when the expectation expires. - return + if err != nil { + if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { + // If the namespace is being torn down, we can safely ignore + // this error since all subsequent creations will fail. + return + } + if errors.IsTimeout(err) { + // Pod is created but its initialization has timed out. + // If the initialization is successful eventually, the + // controller will observe the creation via the informer. + // If the initialization fails, or if the pod keeps + // uninitialized for a long time, the informer will not + // receive any update, and the controller will create a new + // pod when the expectation expires. + return + } } if err != nil { defer utilruntime.HandleError(err) From bd9260711fa09bfdfe95a94c27ab14b95a95cb8a Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 20 Oct 2019 16:26:18 -0400 Subject: [PATCH 7/7] deployment: Ignore namespace termination errors when creating replicasets Instead of reporting an event or displaying an error, simply exit when the namespace is being terminated. This reduces the amount of controller churn on namespace shutdown. Unlike other controllers, we drop the replica set create error very late (in the queue handleErr) in order to avoid changing the structure of the controller substantially. --- pkg/controller/deployment/deployment_controller.go | 2 +- pkg/controller/deployment/sync.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/controller/deployment/deployment_controller.go b/pkg/controller/deployment/deployment_controller.go index b04ded1b288..10068fd6cd5 100644 --- a/pkg/controller/deployment/deployment_controller.go +++ b/pkg/controller/deployment/deployment_controller.go @@ -475,7 +475,7 @@ func (dc *DeploymentController) processNextWorkItem() bool { } func (dc *DeploymentController) handleErr(err error, key interface{}) { - if err == nil { + if err == nil || errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { dc.queue.Forget(key) return } diff --git a/pkg/controller/deployment/sync.go b/pkg/controller/deployment/sync.go index b0a6e7e3f14..a208fd0cfb5 100644 --- a/pkg/controller/deployment/sync.go +++ b/pkg/controller/deployment/sync.go @@ -256,6 +256,9 @@ func (dc *DeploymentController) getNewReplicaSet(d *apps.Deployment, rsList, old klog.V(2).Infof("Found a hash collision for deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount) } return nil, err + case errors.HasStatusCause(err, v1.NamespaceTerminatingCause): + // if the namespace is terminating, all subsequent creates will fail and we can safely do nothing + return nil, err case err != nil: msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err) if deploymentutil.HasProgressDeadline(d) {