Fix duplicate namespace deletion errors, improve namespace deletion error output

This commit is contained in:
derekwaynecarr
2016-08-29 16:33:55 -04:00
parent 39fbfd00ea
commit 83cc808ffb
2 changed files with 159 additions and 31 deletions

View File

@@ -37,6 +37,7 @@ import (
"k8s.io/kubernetes/pkg/client/clientset_generated/release_1_2"
"k8s.io/kubernetes/pkg/client/clientset_generated/release_1_3"
"k8s.io/kubernetes/pkg/client/restclient"
"k8s.io/kubernetes/pkg/client/typed/dynamic"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/labels"
@@ -62,6 +63,7 @@ type Framework struct {
Clientset_1_2 *release_1_2.Clientset
Clientset_1_3 *release_1_3.Clientset
StagingClient *release_1_4.Clientset
ClientPool dynamic.ClientPool
Namespace *api.Namespace // Every test has at least one namespace
namespacesToDelete []*api.Namespace // Some tests have more than one.
@@ -192,6 +194,7 @@ func (f *Framework) BeforeEach() {
clientRepoConfig := getClientRepoConfig(config)
f.StagingClient, err = release_1_4.NewForConfig(clientRepoConfig)
Expect(err).NotTo(HaveOccurred())
f.ClientPool = dynamic.NewClientPool(config, dynamic.LegacyAPIPathResolverFunc)
}
if f.federated {
@@ -295,30 +298,27 @@ func (f *Framework) AfterEach() {
// DeleteNamespace at the very end in defer, to avoid any
// expectation failures preventing deleting the namespace.
defer func() {
nsDeletionErrors := map[string]error{}
if TestContext.DeleteNamespace {
for _, ns := range f.namespacesToDelete {
By(fmt.Sprintf("Destroying namespace %q for this suite.", ns.Name))
timeout := 5 * time.Minute
if f.NamespaceDeletionTimeout != 0 {
timeout = f.NamespaceDeletionTimeout
}
if err := deleteNS(f.Client, ns.Name, timeout); err != nil {
if err := deleteNS(f.Client, f.ClientPool, ns.Name, timeout); err != nil {
if !apierrs.IsNotFound(err) {
Failf("Couldn't delete ns %q: %s", ns.Name, err)
nsDeletionErrors[ns.Name] = err
} else {
Logf("Namespace %v was already deleted", ns.Name)
}
}
}
f.namespacesToDelete = nil
// Delete the federation namespace.
// TODO(nikhiljindal): Uncomment this, once https://github.com/kubernetes/kubernetes/issues/31077 is fixed.
// In the meantime, we will have these extra namespaces in all clusters.
// Note: this will not cause any failure since we create a new namespace for each test in BeforeEach().
// f.deleteFederationNs()
} else {
Logf("Found DeleteNamespace=false, skipping namespace deletion!")
}
@@ -327,6 +327,16 @@ func (f *Framework) AfterEach() {
f.Namespace = nil
f.FederationNamespace = nil
f.Client = nil
f.namespacesToDelete = nil
// if we had errors deleting, report them now.
if len(nsDeletionErrors) != 0 {
messages := []string{}
for namespaceKey, namespaceErr := range nsDeletionErrors {
messages = append(messages, fmt.Sprintf("Couldn't delete ns: %q: %s", namespaceKey, namespaceErr))
}
Failf(strings.Join(messages, ","))
}
}()
if f.federated {

View File

@@ -44,11 +44,13 @@ import (
apierrs "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/apis/extensions"
"k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/client/restclient"
"k8s.io/kubernetes/pkg/client/typed/discovery"
"k8s.io/kubernetes/pkg/client/typed/dynamic"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
@@ -1033,11 +1035,12 @@ func CheckTestingNSDeletedExcept(c *client.Client, skip string) error {
// deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
// whether there are any pods remaining in a non-terminating state.
func deleteNS(c *client.Client, namespace string, timeout time.Duration) error {
func deleteNS(c *client.Client, clientPool dynamic.ClientPool, namespace string, timeout time.Duration) error {
if err := c.Namespaces().Delete(namespace); err != nil {
return err
}
// wait for namespace to delete or timeout.
err := wait.PollImmediate(5*time.Second, timeout, func() (bool, error) {
if _, err := c.Namespaces().Get(namespace); err != nil {
if apierrs.IsNotFound(err) {
@@ -1049,40 +1052,155 @@ func deleteNS(c *client.Client, namespace string, timeout time.Duration) error {
return false, nil
})
// check for pods that were not deleted
remaining := []string{}
remainingPods := []api.Pod{}
missingTimestamp := false
if pods, perr := c.Pods(namespace).List(api.ListOptions{}); perr == nil {
for _, pod := range pods.Items {
Logf("Pod %s %s on node %s remains, has deletion timestamp %s", namespace, pod.Name, pod.Spec.NodeName, pod.DeletionTimestamp)
remaining = append(remaining, fmt.Sprintf("%s{Reason=%s}", pod.Name, pod.Status.Reason))
remainingPods = append(remainingPods, pod)
if pod.DeletionTimestamp == nil {
missingTimestamp = true
}
}
// verify there is no more remaining content in the namespace
remainingContent, cerr := hasRemainingContent(c, clientPool, namespace)
if cerr != nil {
return cerr
}
// log pod status
if len(remainingPods) > 0 {
logPodStates(remainingPods)
// if content remains, let's dump information about the namespace, and system for flake debugging.
remainingPods := 0
missingTimestamp := 0
if remainingContent {
// log information about namespace, and set of namespaces in api server to help flake detection
logNamespace(c, namespace)
logNamespaces(c, namespace)
// if we can, check if there were pods remaining with no timestamp.
remainingPods, missingTimestamp, _ = countRemainingPods(c, namespace)
}
// a timeout occurred
// a timeout waiting for namespace deletion happened!
if err != nil {
if missingTimestamp {
return fmt.Errorf("namespace %s was not deleted within limit: %v, some pods were not marked with a deletion timestamp, pods remaining: %v", namespace, err, remaining)
// some content remains in the namespace
if remainingContent {
// pods remain
if remainingPods > 0 {
// but they were all undergoing deletion (kubelet is probably culprit)
if missingTimestamp == 0 {
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp)
}
// pods remained, but were not undergoing deletion (namespace controller is probably culprit)
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods)
}
// other content remains (namespace controller is probably screwed up)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err)
}
return fmt.Errorf("namespace %s was not deleted within limit: %v, pods remaining: %v", namespace, err, remaining)
}
// pods were not deleted but the namespace was deleted
if len(remaining) > 0 {
return fmt.Errorf("pods remained within namespace %s after deletion: %v", namespace, remaining)
// no remaining content, but namespace was not deleted (namespace controller is probably wedged)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err)
}
return nil
}
// logNamespaces logs the number of namespaces by phase
// namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs
func logNamespaces(c *client.Client, namespace string) {
namespaceList, err := c.Namespaces().List(api.ListOptions{})
if err != nil {
Logf("namespace: %v, unable to list namespaces: %v", namespace, err)
return
}
numActive := 0
numTerminating := 0
for _, namespace := range namespaceList.Items {
if namespace.Status.Phase == api.NamespaceActive {
numActive++
} else {
numTerminating++
}
}
Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating)
}
// logNamespace logs detail about a namespace
func logNamespace(c *client.Client, namespace string) {
ns, err := c.Namespaces().Get(namespace)
if err != nil {
if apierrs.IsNotFound(err) {
Logf("namespace: %v no longer exists", namespace)
return
}
Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err)
return
}
Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase)
}
// countRemainingPods queries the server to count number of remaining pods, and number of pods that had a missing deletion timestamp.
func countRemainingPods(c *client.Client, namespace string) (int, int, error) {
// check for remaining pods
pods, err := c.Pods(namespace).List(api.ListOptions{})
if err != nil {
return 0, 0, err
}
// nothing remains!
if len(pods.Items) == 0 {
return 0, 0, nil
}
// stuff remains, log about it
logPodStates(pods.Items)
// check if there were any pods with missing deletion timestamp
numPods := len(pods.Items)
missingTimestamp := 0
for _, pod := range pods.Items {
if pod.DeletionTimestamp == nil {
missingTimestamp++
}
}
return numPods, missingTimestamp, nil
}
// hasRemainingContent checks if there is remaining content in the namespace via API discovery
func hasRemainingContent(c *client.Client, clientPool dynamic.ClientPool, namespace string) (bool, error) {
// some tests generate their own framework.Client rather than the default
// TODO: ensure every test call has a configured clientPool
if clientPool == nil {
return false, nil
}
// find out what content is supported on the server
groupVersionResources, err := c.Discovery().ServerPreferredNamespacedResources()
if err != nil {
return false, err
}
contentRemaining := false
// dump how many of resource type is on the server in a log.
for _, gvr := range groupVersionResources {
// get a client for this group version...
dynamicClient, err := clientPool.ClientForGroupVersion(gvr.GroupVersion())
if err != nil {
// not all resource types support list, so some errors here are normal depending on the resource type.
Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err)
continue
}
// get the api resource
apiResource := unversioned.APIResource{Name: gvr.Resource, Namespaced: true}
obj, err := dynamicClient.Resource(&apiResource, namespace).List(&v1.ListOptions{})
if err != nil {
// not all resources support list, so we ignore those
if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
continue
}
return false, err
}
unstructuredList, ok := obj.(*runtime.UnstructuredList)
if !ok {
return false, fmt.Errorf("namespace: %s, resource: %s, expected *runtime.UnstructuredList, got %#v", namespace, apiResource.Name, obj)
}
if len(unstructuredList.Items) > 0 {
Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items))
contentRemaining = true
}
}
return contentRemaining, nil
}
func ContainerInitInvariant(older, newer runtime.Object) error {
oldPod := older.(*api.Pod)
newPod := newer.(*api.Pod)