test/e2e: Delete test namespaces asynchronously

Now that namespace deletion is reliable, use the suite tear down
to catch non terminated namespaces and stop waiting within each test
for deletion.

Serial tests that need a clean set of namespaces must use the
appropriate flag to control whether they wait for clean namespaces
on startup.
This commit is contained in:
Clayton Coleman 2019-09-06 16:04:58 -04:00
parent a5135ecdc5
commit 67283da28b
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 1 additions and 216 deletions

View File

@ -87,7 +87,6 @@ go_library(
"//test/e2e/system:go_default_library",
"//test/utils:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/ginkgo/config:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",

View File

@ -303,11 +303,7 @@ func (f *Framework) AfterEach() {
if TestContext.DeleteNamespace && (TestContext.DeleteNamespaceOnFailure || !ginkgo.CurrentGinkgoTestDescription().Failed) {
for _, ns := range f.namespacesToDelete {
ginkgo.By(fmt.Sprintf("Destroying namespace %q for this suite.", ns.Name))
timeout := DefaultNamespaceDeletionTimeout
if f.NamespaceDeletionTimeout != 0 {
timeout = f.NamespaceDeletionTimeout
}
if err := deleteNS(f.ClientSet, f.DynamicClient, ns.Name, timeout); err != nil {
if err := f.ClientSet.CoreV1().Namespaces().Delete(ns.Name, nil); err != nil {
if !apierrors.IsNotFound(err) {
nsDeletionErrors[ns.Name] = err
} else {

View File

@ -40,7 +40,6 @@ import (
"syscall"
"time"
"github.com/davecgh/go-spew/spew"
"golang.org/x/net/websocket"
"k8s.io/klog"
@ -795,193 +794,6 @@ func CheckTestingNSDeletedExcept(c clientset.Interface, skip string) error {
return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
}
// deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
// whether there are any pods remaining in a non-terminating state.
func deleteNS(c clientset.Interface, dynamicClient dynamic.Interface, namespace string, timeout time.Duration) error {
startTime := time.Now()
if err := c.CoreV1().Namespaces().Delete(namespace, nil); err != nil {
return err
}
// wait for namespace to delete or timeout.
var lastNamespace *v1.Namespace
err := wait.PollImmediate(2*time.Second, timeout, func() (bool, error) {
var err error
lastNamespace, err = c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
if err != nil {
if apierrs.IsNotFound(err) {
return true, nil
}
Logf("Error while waiting for namespace to be terminated: %v", err)
return false, nil
}
return false, nil
})
// verify there is no more remaining content in the namespace
remainingContent, cerr := hasRemainingContent(c, dynamicClient, namespace)
if cerr != nil {
return cerr
}
// if content remains, let's dump information about the namespace, and system for flake debugging.
remainingPods := 0
missingTimestamp := 0
if remainingContent {
// log information about namespace, and set of namespaces in api server to help flake detection
logNamespace(c, namespace)
logNamespaces(c, namespace)
// if we can, check if there were pods remaining with no timestamp.
remainingPods, missingTimestamp, _ = e2epod.CountRemainingPods(c, namespace)
}
// a timeout waiting for namespace deletion happened!
if err != nil {
// namespaces now have conditions that are useful for debugging generic resources and finalizers
Logf("namespace did not cleanup: %s", spew.Sdump(lastNamespace))
// some content remains in the namespace
if remainingContent {
// pods remain
if remainingPods > 0 {
if missingTimestamp != 0 {
// pods remained, but were not undergoing deletion (namespace controller is probably culprit)
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp)
}
// but they were all undergoing deletion (kubelet is probably culprit, check NodeLost)
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods)
}
// other content remains (namespace controller is probably screwed up)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err)
}
// no remaining content, but namespace was not deleted (namespace controller is probably wedged)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err)
}
Logf("namespace %v deletion completed in %s", namespace, time.Since(startTime))
return nil
}
// logNamespaces logs the number of namespaces by phase
// namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs
func logNamespaces(c clientset.Interface, namespace string) {
namespaceList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
if err != nil {
Logf("namespace: %v, unable to list namespaces: %v", namespace, err)
return
}
numActive := 0
numTerminating := 0
for _, namespace := range namespaceList.Items {
if namespace.Status.Phase == v1.NamespaceActive {
numActive++
} else {
numTerminating++
}
}
Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating)
}
// logNamespace logs detail about a namespace
func logNamespace(c clientset.Interface, namespace string) {
ns, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
if err != nil {
if apierrs.IsNotFound(err) {
Logf("namespace: %v no longer exists", namespace)
return
}
Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err)
return
}
Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase)
}
// isDynamicDiscoveryError returns true if the error is a group discovery error
// only for groups expected to be created/deleted dynamically during e2e tests
func isDynamicDiscoveryError(err error) bool {
if !discovery.IsGroupDiscoveryFailedError(err) {
return false
}
discoveryErr := err.(*discovery.ErrGroupDiscoveryFailed)
for gv := range discoveryErr.Groups {
switch gv.Group {
case "mygroup.example.com":
// custom_resource_definition
// garbage_collector
case "wardle.k8s.io":
// aggregator
case "metrics.k8s.io":
// aggregated metrics server add-on, no persisted resources
default:
Logf("discovery error for unexpected group: %#v", gv)
return false
}
}
return true
}
// hasRemainingContent checks if there is remaining content in the namespace via API discovery
func hasRemainingContent(c clientset.Interface, dynamicClient dynamic.Interface, namespace string) (bool, error) {
// some tests generate their own framework.Client rather than the default
// TODO: ensure every test call has a configured dynamicClient
if dynamicClient == nil {
return false, nil
}
// find out what content is supported on the server
// Since extension apiserver is not always available, e.g. metrics server sometimes goes down,
// add retry here.
resources, err := waitForServerPreferredNamespacedResources(c.Discovery(), 30*time.Second)
if err != nil {
return false, err
}
resources = discovery.FilteredBy(discovery.SupportsAllVerbs{Verbs: []string{"list", "delete"}}, resources)
groupVersionResources, err := discovery.GroupVersionResources(resources)
if err != nil {
return false, err
}
// TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798
ignoredResources := sets.NewString("bindings")
contentRemaining := false
// dump how many of resource type is on the server in a log.
for gvr := range groupVersionResources {
// get a client for this group version...
dynamicClient := dynamicClient.Resource(gvr).Namespace(namespace)
if err != nil {
// not all resource types support list, so some errors here are normal depending on the resource type.
Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err)
continue
}
// get the api resource
apiResource := metav1.APIResource{Name: gvr.Resource, Namespaced: true}
if ignoredResources.Has(gvr.Resource) {
Logf("namespace: %s, resource: %s, ignored listing per whitelist", namespace, apiResource.Name)
continue
}
unstructuredList, err := dynamicClient.List(metav1.ListOptions{})
if err != nil {
// not all resources support list, so we ignore those
if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
continue
}
// skip unavailable servers
if apierrs.IsServiceUnavailable(err) {
continue
}
return false, err
}
if len(unstructuredList.Items) > 0 {
Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items))
contentRemaining = true
}
}
return contentRemaining, nil
}
// ContainerInitInvariant checks for an init containers are initialized and invariant on both older and newer.
func ContainerInitInvariant(older, newer runtime.Object) error {
oldPod := older.(*v1.Pod)
@ -3180,28 +2992,6 @@ func DsFromManifest(url string) (*appsv1.DaemonSet, error) {
return &ds, nil
}
// waitForServerPreferredNamespacedResources waits until server preferred namespaced resources could be successfully discovered.
// TODO: Fix https://github.com/kubernetes/kubernetes/issues/55768 and remove the following retry.
func waitForServerPreferredNamespacedResources(d discovery.DiscoveryInterface, timeout time.Duration) ([]*metav1.APIResourceList, error) {
Logf("Waiting up to %v for server preferred namespaced resources to be successfully discovered", timeout)
var resources []*metav1.APIResourceList
if err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
var err error
resources, err = d.ServerPreferredNamespacedResources()
if err == nil || isDynamicDiscoveryError(err) {
return true, nil
}
if !discovery.IsGroupDiscoveryFailedError(err) {
return false, err
}
Logf("Error discoverying server preferred namespaced resources: %v, retrying in %v.", err, Poll)
return false, nil
}); err != nil {
return nil, err
}
return resources, nil
}
// WaitForPersistentVolumeClaimDeleted waits for a PersistentVolumeClaim to be removed from the system until timeout occurs, whichever comes first.
func WaitForPersistentVolumeClaimDeleted(c clientset.Interface, ns string, pvcName string, Poll, timeout time.Duration) error {
Logf("Waiting up to %v for PersistentVolumeClaim %s to be removed", timeout, pvcName)