From 83cc808ffb74974b1f2684a24d58b3bc689647c2 Mon Sep 17 00:00:00 2001 From: derekwaynecarr Date: Mon, 29 Aug 2016 16:33:55 -0400 Subject: [PATCH 1/2] Fix duplicate namespace deletion errors, improve namespace deletion error output --- test/e2e/framework/framework.go | 22 +++-- test/e2e/framework/util.go | 168 +++++++++++++++++++++++++++----- 2 files changed, 159 insertions(+), 31 deletions(-) diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index d16bd03d630..c6f0e1b0917 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -37,6 +37,7 @@ import ( "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_2" "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_3" "k8s.io/kubernetes/pkg/client/restclient" + "k8s.io/kubernetes/pkg/client/typed/dynamic" client "k8s.io/kubernetes/pkg/client/unversioned" "k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/labels" @@ -62,6 +63,7 @@ type Framework struct { Clientset_1_2 *release_1_2.Clientset Clientset_1_3 *release_1_3.Clientset StagingClient *release_1_4.Clientset + ClientPool dynamic.ClientPool Namespace *api.Namespace // Every test has at least one namespace namespacesToDelete []*api.Namespace // Some tests have more than one. @@ -192,6 +194,7 @@ func (f *Framework) BeforeEach() { clientRepoConfig := getClientRepoConfig(config) f.StagingClient, err = release_1_4.NewForConfig(clientRepoConfig) Expect(err).NotTo(HaveOccurred()) + f.ClientPool = dynamic.NewClientPool(config, dynamic.LegacyAPIPathResolverFunc) } if f.federated { @@ -295,30 +298,27 @@ func (f *Framework) AfterEach() { // DeleteNamespace at the very end in defer, to avoid any // expectation failures preventing deleting the namespace. defer func() { + nsDeletionErrors := map[string]error{} if TestContext.DeleteNamespace { for _, ns := range f.namespacesToDelete { By(fmt.Sprintf("Destroying namespace %q for this suite.", ns.Name)) - timeout := 5 * time.Minute if f.NamespaceDeletionTimeout != 0 { timeout = f.NamespaceDeletionTimeout } - if err := deleteNS(f.Client, ns.Name, timeout); err != nil { + if err := deleteNS(f.Client, f.ClientPool, ns.Name, timeout); err != nil { if !apierrs.IsNotFound(err) { - Failf("Couldn't delete ns %q: %s", ns.Name, err) + nsDeletionErrors[ns.Name] = err } else { Logf("Namespace %v was already deleted", ns.Name) } } } - f.namespacesToDelete = nil - // Delete the federation namespace. // TODO(nikhiljindal): Uncomment this, once https://github.com/kubernetes/kubernetes/issues/31077 is fixed. // In the meantime, we will have these extra namespaces in all clusters. // Note: this will not cause any failure since we create a new namespace for each test in BeforeEach(). // f.deleteFederationNs() - } else { Logf("Found DeleteNamespace=false, skipping namespace deletion!") } @@ -327,6 +327,16 @@ func (f *Framework) AfterEach() { f.Namespace = nil f.FederationNamespace = nil f.Client = nil + f.namespacesToDelete = nil + + // if we had errors deleting, report them now. + if len(nsDeletionErrors) != 0 { + messages := []string{} + for namespaceKey, namespaceErr := range nsDeletionErrors { + messages = append(messages, fmt.Sprintf("Couldn't delete ns: %q: %s", namespaceKey, namespaceErr)) + } + Failf(strings.Join(messages, ",")) + } }() if f.federated { diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index d54cf362b61..9d80701e350 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -44,11 +44,13 @@ import ( apierrs "k8s.io/kubernetes/pkg/api/errors" "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/api/unversioned" + "k8s.io/kubernetes/pkg/api/v1" "k8s.io/kubernetes/pkg/apis/extensions" "k8s.io/kubernetes/pkg/client/cache" clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/client/restclient" "k8s.io/kubernetes/pkg/client/typed/discovery" + "k8s.io/kubernetes/pkg/client/typed/dynamic" client "k8s.io/kubernetes/pkg/client/unversioned" "k8s.io/kubernetes/pkg/client/unversioned/clientcmd" clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api" @@ -1033,11 +1035,12 @@ func CheckTestingNSDeletedExcept(c *client.Client, skip string) error { // deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks // whether there are any pods remaining in a non-terminating state. -func deleteNS(c *client.Client, namespace string, timeout time.Duration) error { +func deleteNS(c *client.Client, clientPool dynamic.ClientPool, namespace string, timeout time.Duration) error { if err := c.Namespaces().Delete(namespace); err != nil { return err } + // wait for namespace to delete or timeout. err := wait.PollImmediate(5*time.Second, timeout, func() (bool, error) { if _, err := c.Namespaces().Get(namespace); err != nil { if apierrs.IsNotFound(err) { @@ -1049,40 +1052,155 @@ func deleteNS(c *client.Client, namespace string, timeout time.Duration) error { return false, nil }) - // check for pods that were not deleted - remaining := []string{} - remainingPods := []api.Pod{} - missingTimestamp := false - if pods, perr := c.Pods(namespace).List(api.ListOptions{}); perr == nil { - for _, pod := range pods.Items { - Logf("Pod %s %s on node %s remains, has deletion timestamp %s", namespace, pod.Name, pod.Spec.NodeName, pod.DeletionTimestamp) - remaining = append(remaining, fmt.Sprintf("%s{Reason=%s}", pod.Name, pod.Status.Reason)) - remainingPods = append(remainingPods, pod) - if pod.DeletionTimestamp == nil { - missingTimestamp = true - } - } + // verify there is no more remaining content in the namespace + remainingContent, cerr := hasRemainingContent(c, clientPool, namespace) + if cerr != nil { + return cerr } - // log pod status - if len(remainingPods) > 0 { - logPodStates(remainingPods) + // if content remains, let's dump information about the namespace, and system for flake debugging. + remainingPods := 0 + missingTimestamp := 0 + if remainingContent { + // log information about namespace, and set of namespaces in api server to help flake detection + logNamespace(c, namespace) + logNamespaces(c, namespace) + + // if we can, check if there were pods remaining with no timestamp. + remainingPods, missingTimestamp, _ = countRemainingPods(c, namespace) } - // a timeout occurred + // a timeout waiting for namespace deletion happened! if err != nil { - if missingTimestamp { - return fmt.Errorf("namespace %s was not deleted within limit: %v, some pods were not marked with a deletion timestamp, pods remaining: %v", namespace, err, remaining) + // some content remains in the namespace + if remainingContent { + // pods remain + if remainingPods > 0 { + // but they were all undergoing deletion (kubelet is probably culprit) + if missingTimestamp == 0 { + return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp) + } + // pods remained, but were not undergoing deletion (namespace controller is probably culprit) + return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods) + } + // other content remains (namespace controller is probably screwed up) + return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err) } - return fmt.Errorf("namespace %s was not deleted within limit: %v, pods remaining: %v", namespace, err, remaining) - } - // pods were not deleted but the namespace was deleted - if len(remaining) > 0 { - return fmt.Errorf("pods remained within namespace %s after deletion: %v", namespace, remaining) + // no remaining content, but namespace was not deleted (namespace controller is probably wedged) + return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err) } return nil } +// logNamespaces logs the number of namespaces by phase +// namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs +func logNamespaces(c *client.Client, namespace string) { + namespaceList, err := c.Namespaces().List(api.ListOptions{}) + if err != nil { + Logf("namespace: %v, unable to list namespaces: %v", namespace, err) + return + } + + numActive := 0 + numTerminating := 0 + for _, namespace := range namespaceList.Items { + if namespace.Status.Phase == api.NamespaceActive { + numActive++ + } else { + numTerminating++ + } + } + Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating) +} + +// logNamespace logs detail about a namespace +func logNamespace(c *client.Client, namespace string) { + ns, err := c.Namespaces().Get(namespace) + if err != nil { + if apierrs.IsNotFound(err) { + Logf("namespace: %v no longer exists", namespace) + return + } + Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err) + return + } + Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase) +} + +// countRemainingPods queries the server to count number of remaining pods, and number of pods that had a missing deletion timestamp. +func countRemainingPods(c *client.Client, namespace string) (int, int, error) { + // check for remaining pods + pods, err := c.Pods(namespace).List(api.ListOptions{}) + if err != nil { + return 0, 0, err + } + + // nothing remains! + if len(pods.Items) == 0 { + return 0, 0, nil + } + + // stuff remains, log about it + logPodStates(pods.Items) + + // check if there were any pods with missing deletion timestamp + numPods := len(pods.Items) + missingTimestamp := 0 + for _, pod := range pods.Items { + if pod.DeletionTimestamp == nil { + missingTimestamp++ + } + } + return numPods, missingTimestamp, nil +} + +// hasRemainingContent checks if there is remaining content in the namespace via API discovery +func hasRemainingContent(c *client.Client, clientPool dynamic.ClientPool, namespace string) (bool, error) { + // some tests generate their own framework.Client rather than the default + // TODO: ensure every test call has a configured clientPool + if clientPool == nil { + return false, nil + } + + // find out what content is supported on the server + groupVersionResources, err := c.Discovery().ServerPreferredNamespacedResources() + if err != nil { + return false, err + } + + contentRemaining := false + + // dump how many of resource type is on the server in a log. + for _, gvr := range groupVersionResources { + // get a client for this group version... + dynamicClient, err := clientPool.ClientForGroupVersion(gvr.GroupVersion()) + if err != nil { + // not all resource types support list, so some errors here are normal depending on the resource type. + Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err) + continue + } + // get the api resource + apiResource := unversioned.APIResource{Name: gvr.Resource, Namespaced: true} + obj, err := dynamicClient.Resource(&apiResource, namespace).List(&v1.ListOptions{}) + if err != nil { + // not all resources support list, so we ignore those + if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) { + continue + } + return false, err + } + unstructuredList, ok := obj.(*runtime.UnstructuredList) + if !ok { + return false, fmt.Errorf("namespace: %s, resource: %s, expected *runtime.UnstructuredList, got %#v", namespace, apiResource.Name, obj) + } + if len(unstructuredList.Items) > 0 { + Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items)) + contentRemaining = true + } + } + return contentRemaining, nil +} + func ContainerInitInvariant(older, newer runtime.Object) error { oldPod := older.(*api.Pod) newPod := newer.(*api.Pod) From 367cc42541e0045055e893dc45f798b9f0bb8803 Mon Sep 17 00:00:00 2001 From: derekwaynecarr Date: Wed, 31 Aug 2016 13:41:14 -0400 Subject: [PATCH 2/2] E2E framework ignores whitelisted resources when verifying namespace deletion --- test/e2e/framework/util.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 9d80701e350..c3244219565 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1168,6 +1168,9 @@ func hasRemainingContent(c *client.Client, clientPool dynamic.ClientPool, namesp return false, err } + // TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798 + ignoredResources := sets.NewString("bindings") + contentRemaining := false // dump how many of resource type is on the server in a log. @@ -1181,6 +1184,11 @@ func hasRemainingContent(c *client.Client, clientPool dynamic.ClientPool, namesp } // get the api resource apiResource := unversioned.APIResource{Name: gvr.Resource, Namespaced: true} + // TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798 + if ignoredResources.Has(apiResource.Name) { + Logf("namespace: %s, resource: %s, ignored listing per whitelist", namespace, apiResource.Name) + continue + } obj, err := dynamicClient.Resource(&apiResource, namespace).List(&v1.ListOptions{}) if err != nil { // not all resources support list, so we ignore those