diff --git a/pkg/controller/namespace/deletion/namespaced_resources_deleter.go b/pkg/controller/namespace/deletion/namespaced_resources_deleter.go index 6f414ddf9e2..81f0d9b13c3 100644 --- a/pkg/controller/namespace/deletion/namespaced_resources_deleter.go +++ b/pkg/controller/namespace/deletion/namespaced_resources_deleter.go @@ -32,9 +32,11 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/discovery" v1clientset "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/metadata" + "k8s.io/kubernetes/pkg/features" ) // NamespacedResourcesDeleterInterface is the interface to delete a namespace with all resources in it. @@ -526,7 +528,41 @@ func (d *namespacedResourcesDeleter) deleteAllContent(ctx context.Context, ns *v gvrToNumRemaining: map[schema.GroupVersionResource]int{}, finalizersToNumRemaining: map[string]int{}, } + + podsGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + if _, hasPods := groupVersionResources[podsGVR]; hasPods && utilfeature.DefaultFeatureGate.Enabled(features.OrderedNamespaceDeletion) { + // Ensure all pods in the namespace are deleted first + gvrDeletionMetadata, err := d.deleteAllContentForGroupVersionResource(ctx, podsGVR, namespace, namespaceDeletedAt) + if err != nil { + errs = append(errs, fmt.Errorf("failed to delete pods for namespace: %s, err: %w", namespace, err)) + conditionUpdater.ProcessDeleteContentErr(err) + } + if gvrDeletionMetadata.finalizerEstimateSeconds > estimate { + estimate = gvrDeletionMetadata.finalizerEstimateSeconds + } + if gvrDeletionMetadata.numRemaining > 0 { + numRemainingTotals.gvrToNumRemaining[podsGVR] = gvrDeletionMetadata.numRemaining + for finalizer, numRemaining := range gvrDeletionMetadata.finalizersToNumRemaining { + if numRemaining == 0 { + continue + } + numRemainingTotals.finalizersToNumRemaining[finalizer] += numRemaining + } + } + + // Check if any pods remain before proceeding to delete other resources + if numRemainingTotals.gvrToNumRemaining[podsGVR] > 0 { + logger.V(5).Info("Namespace controller - pods still remain, delaying deletion of other resources", "namespace", namespace) + return estimate, utilerrors.NewAggregate(errs) + } + } + + // Proceed with deleting other resources in the namespace for gvr := range groupVersionResources { + if utilfeature.DefaultFeatureGate.Enabled(features.OrderedNamespaceDeletion) && gvr.Group == podsGVR.Group && + gvr.Version == podsGVR.Version && gvr.Resource == podsGVR.Resource { + continue + } gvrDeletionMetadata, err := d.deleteAllContentForGroupVersionResource(ctx, gvr, namespace, namespaceDeletedAt) if err != nil { // If there is an error, hold on to it but proceed with all the remaining diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 11564428025..66adabcd8b4 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -546,6 +546,12 @@ const ( // Permits kubelet to run with swap enabled. NodeSwap featuregate.Feature = "NodeSwap" + // owner: @cici37 + // kep: https://kep.k8s.io/5080 + // + // Enables ordered namespace deletion. + OrderedNamespaceDeletion featuregate.Feature = "OrderedNamespaceDeletion" + // owner: @mortent, @atiratree, @ravig // kep: http://kep.k8s.io/3018 // alpha: v1.26 @@ -1137,6 +1143,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS NodeSwap: {Default: true, PreRelease: featuregate.Beta}, + OrderedNamespaceDeletion: {Default: false, PreRelease: featuregate.Beta}, + PDBUnhealthyPodEvictionPolicy: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.33 PersistentVolumeLastPhaseTransitionTime: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.33 diff --git a/test/e2e/apimachinery/namespace.go b/test/e2e/apimachinery/namespace.go index 121f4be1cc3..f99922abee5 100644 --- a/test/e2e/apimachinery/namespace.go +++ b/test/e2e/apimachinery/namespace.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "k8s.io/kubernetes/pkg/features" "strings" "sync" "time" @@ -475,3 +476,106 @@ func unstructuredToNamespace(obj *unstructured.Unstructured) (*v1.Namespace, err return ns, err } + +var _ = SIGDescribe("OrderedNamespaceDeletion", func() { + f := framework.NewDefaultFramework("namespacedeletion") + f.NamespacePodSecurityLevel = admissionapi.LevelBaseline + + f.It("namespace deletion should delete pod first", feature.OrderedNamespaceDeletion, framework.WithFeatureGate(features.OrderedNamespaceDeletion), func(ctx context.Context) { + ensurePodsAreRemovedFirstInOrderedNamespaceDeletion(ctx, f) + }) +}) + +func ensurePodsAreRemovedFirstInOrderedNamespaceDeletion(ctx context.Context, f *framework.Framework) { + ginkgo.By("Creating a test namespace") + namespaceName := "nsdeletetest" + namespace, err := f.CreateNamespace(ctx, namespaceName, nil) + framework.ExpectNoError(err, "failed to create namespace: %s", namespaceName) + nsName := namespace.Name + + ginkgo.By("Waiting for a default service account to be provisioned in namespace") + err = framework.WaitForDefaultServiceAccountInNamespace(ctx, f.ClientSet, nsName) + framework.ExpectNoError(err, "failure while waiting for a default service account to be provisioned in namespace: %s", nsName) + + ginkgo.By("Creating a pod with finalizer in the namespace") + podName := "test-pod" + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Finalizers: []string{ + "e2e.example.com/finalizer", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: imageutils.GetPauseImageName(), + }, + }, + }, + } + pod, err = f.ClientSet.CoreV1().Pods(nsName).Create(ctx, pod, metav1.CreateOptions{}) + framework.ExpectNoError(err, "failed to create pod %s in namespace: %s", podName, nsName) + + ginkgo.By("Waiting for the pod to have running status") + framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)) + + configMapName := "test-configmap" + ginkgo.By(fmt.Sprintf("Creating a configmap %q in namespace %q", configMapName, nsName)) + configMap := &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: nsName, + }, + Data: map[string]string{ + "key": "value", + }, + } + _, err = f.ClientSet.CoreV1().ConfigMaps(nsName).Create(ctx, configMap, metav1.CreateOptions{}) + framework.ExpectNoError(err, "failed to create configmap %q in namespace %q", configMapName, nsName) + + ginkgo.By("Deleting the namespace") + err = f.ClientSet.CoreV1().Namespaces().Delete(ctx, nsName, metav1.DeleteOptions{}) + framework.ExpectNoError(err, "failed to delete namespace: %s", nsName) + // wait 10 seconds to allow the namespace controller to process + time.Sleep(10 * time.Second) + ginkgo.By("the pod should be deleted before processing deletion for other resources") + framework.ExpectNoError(wait.PollUntilContextTimeout(ctx, 2*time.Second, 60*time.Second, true, + func(ctx context.Context) (bool, error) { + _, err = f.ClientSet.CoreV1().ConfigMaps(nsName).Get(ctx, configMapName, metav1.GetOptions{}) + framework.ExpectNoError(err, "configmap %q should still exist in namespace %q", configMapName, nsName) + // the pod should exist and has a deletionTimestamp set + pod, err = f.ClientSet.CoreV1().Pods(nsName).Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get pod %q in namespace %q", pod.Name, nsName) + if pod.DeletionTimestamp == nil { + framework.Failf("Pod %q in namespace %q does not have a metadata.deletionTimestamp set", pod.Name, nsName) + } + _, err = f.ClientSet.CoreV1().Namespaces().Get(ctx, nsName, metav1.GetOptions{}) + if err != nil && apierrors.IsNotFound(err) { + return false, fmt.Errorf("namespace %s was deleted unexpectedly", nsName) + } + return true, nil + })) + + ginkgo.By(fmt.Sprintf("Removing finalizer from pod %q in namespace %q", podName, nsName)) + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + pod, err = f.ClientSet.CoreV1().Pods(nsName).Get(ctx, podName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get pod %q in namespace %q", pod.Name, nsName) + pod.Finalizers = []string{} + _, err = f.ClientSet.CoreV1().Pods(nsName).Update(ctx, pod, metav1.UpdateOptions{}) + return err + }) + framework.ExpectNoError(err, "failed to update pod %q and remove finalizer in namespace %q", podName, nsName) + + ginkgo.By("Waiting for the namespace to be removed.") + maxWaitSeconds := int64(60) + *pod.Spec.TerminationGracePeriodSeconds + framework.ExpectNoError(wait.PollUntilContextTimeout(ctx, 1*time.Second, time.Duration(maxWaitSeconds)*time.Second, true, + func(ctx context.Context) (bool, error) { + _, err = f.ClientSet.CoreV1().Namespaces().Get(ctx, namespace.Name, metav1.GetOptions{}) + if err != nil && apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + })) +} diff --git a/test/e2e/feature/feature.go b/test/e2e/feature/feature.go index a3aaf165093..24c6a93b266 100644 --- a/test/e2e/feature/feature.go +++ b/test/e2e/feature/feature.go @@ -251,6 +251,10 @@ var ( // TODO: document the feature (owning SIG, when to use this feature for a test) NodeOutOfServiceVolumeDetach = framework.WithFeature(framework.ValidFeatures.Add("NodeOutOfServiceVolumeDetach")) + // Owner: sig-api-machinery + // Marks tests that enforce ordered namespace deletion. + OrderedNamespaceDeletion = framework.WithFeature(framework.ValidFeatures.Add("OrderedNamespaceDeletion")) + // Owner: sig-network // Marks a single test that tests cluster DNS performance with many services. PerformanceDNS = framework.WithFeature(framework.ValidFeatures.Add("PerformanceDNS"))