Merge pull request #82439 from smarterclayton/async_ns_cleanup

test/e2e: Delete test namespaces asynchronously
This commit is contained in:
Kubernetes Prow Robot 2019-09-30 14:35:31 -07:00 committed by GitHub
commit 2d491ace8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1 additions and 216 deletions

View File

@ -88,7 +88,6 @@ go_library(
"//test/e2e/system:go_default_library",
"//test/utils:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/ginkgo/config:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",

View File

@ -303,11 +303,7 @@ func (f *Framework) AfterEach() {
if TestContext.DeleteNamespace && (TestContext.DeleteNamespaceOnFailure || !ginkgo.CurrentGinkgoTestDescription().Failed) {
for _, ns := range f.namespacesToDelete {
ginkgo.By(fmt.Sprintf("Destroying namespace %q for this suite.", ns.Name))
timeout := DefaultNamespaceDeletionTimeout
if f.NamespaceDeletionTimeout != 0 {
timeout = f.NamespaceDeletionTimeout
}
if err := deleteNS(f.ClientSet, f.DynamicClient, ns.Name, timeout); err != nil {
if err := f.ClientSet.CoreV1().Namespaces().Delete(ns.Name, nil); err != nil {
if !apierrors.IsNotFound(err) {
nsDeletionErrors[ns.Name] = err
} else {

View File

@ -40,7 +40,6 @@ import (
"syscall"
"time"
"github.com/davecgh/go-spew/spew"
"golang.org/x/net/websocket"
"k8s.io/klog"
@ -666,193 +665,6 @@ func CheckTestingNSDeletedExcept(c clientset.Interface, skip string) error {
return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
}
// deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
// whether there are any pods remaining in a non-terminating state.
func deleteNS(c clientset.Interface, dynamicClient dynamic.Interface, namespace string, timeout time.Duration) error {
startTime := time.Now()
if err := c.CoreV1().Namespaces().Delete(namespace, nil); err != nil {
return err
}
// wait for namespace to delete or timeout.
var lastNamespace *v1.Namespace
err := wait.PollImmediate(2*time.Second, timeout, func() (bool, error) {
var err error
lastNamespace, err = c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
if err != nil {
if apierrs.IsNotFound(err) {
return true, nil
}
Logf("Error while waiting for namespace to be terminated: %v", err)
return false, nil
}
return false, nil
})
// verify there is no more remaining content in the namespace
remainingContent, cerr := hasRemainingContent(c, dynamicClient, namespace)
if cerr != nil {
return cerr
}
// if content remains, let's dump information about the namespace, and system for flake debugging.
remainingPods := 0
missingTimestamp := 0
if remainingContent {
// log information about namespace, and set of namespaces in api server to help flake detection
logNamespace(c, namespace)
logNamespaces(c, namespace)
// if we can, check if there were pods remaining with no timestamp.
remainingPods, missingTimestamp, _ = e2epod.CountRemainingPods(c, namespace)
}
// a timeout waiting for namespace deletion happened!
if err != nil {
// namespaces now have conditions that are useful for debugging generic resources and finalizers
Logf("namespace did not cleanup: %s", spew.Sdump(lastNamespace))
// some content remains in the namespace
if remainingContent {
// pods remain
if remainingPods > 0 {
if missingTimestamp != 0 {
// pods remained, but were not undergoing deletion (namespace controller is probably culprit)
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp)
}
// but they were all undergoing deletion (kubelet is probably culprit, check NodeLost)
return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods)
}
// other content remains (namespace controller is probably screwed up)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err)
}
// no remaining content, but namespace was not deleted (namespace controller is probably wedged)
return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err)
}
Logf("namespace %v deletion completed in %s", namespace, time.Since(startTime))
return nil
}
// logNamespaces logs the number of namespaces by phase
// namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs
func logNamespaces(c clientset.Interface, namespace string) {
namespaceList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
if err != nil {
Logf("namespace: %v, unable to list namespaces: %v", namespace, err)
return
}
numActive := 0
numTerminating := 0
for _, namespace := range namespaceList.Items {
if namespace.Status.Phase == v1.NamespaceActive {
numActive++
} else {
numTerminating++
}
}
Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating)
}
// logNamespace logs detail about a namespace
func logNamespace(c clientset.Interface, namespace string) {
ns, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
if err != nil {
if apierrs.IsNotFound(err) {
Logf("namespace: %v no longer exists", namespace)
return
}
Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err)
return
}
Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase)
}
// isDynamicDiscoveryError returns true if the error is a group discovery error
// only for groups expected to be created/deleted dynamically during e2e tests
func isDynamicDiscoveryError(err error) bool {
if !discovery.IsGroupDiscoveryFailedError(err) {
return false
}
discoveryErr := err.(*discovery.ErrGroupDiscoveryFailed)
for gv := range discoveryErr.Groups {
switch gv.Group {
case "mygroup.example.com":
// custom_resource_definition
// garbage_collector
case "wardle.k8s.io":
// aggregator
case "metrics.k8s.io":
// aggregated metrics server add-on, no persisted resources
default:
Logf("discovery error for unexpected group: %#v", gv)
return false
}
}
return true
}
// hasRemainingContent checks if there is remaining content in the namespace via API discovery
func hasRemainingContent(c clientset.Interface, dynamicClient dynamic.Interface, namespace string) (bool, error) {
// some tests generate their own framework.Client rather than the default
// TODO: ensure every test call has a configured dynamicClient
if dynamicClient == nil {
return false, nil
}
// find out what content is supported on the server
// Since extension apiserver is not always available, e.g. metrics server sometimes goes down,
// add retry here.
resources, err := waitForServerPreferredNamespacedResources(c.Discovery(), 30*time.Second)
if err != nil {
return false, err
}
resources = discovery.FilteredBy(discovery.SupportsAllVerbs{Verbs: []string{"list", "delete"}}, resources)
groupVersionResources, err := discovery.GroupVersionResources(resources)
if err != nil {
return false, err
}
// TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798
ignoredResources := sets.NewString("bindings")
contentRemaining := false
// dump how many of resource type is on the server in a log.
for gvr := range groupVersionResources {
// get a client for this group version...
dynamicClient := dynamicClient.Resource(gvr).Namespace(namespace)
if err != nil {
// not all resource types support list, so some errors here are normal depending on the resource type.
Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err)
continue
}
// get the api resource
apiResource := metav1.APIResource{Name: gvr.Resource, Namespaced: true}
if ignoredResources.Has(gvr.Resource) {
Logf("namespace: %s, resource: %s, ignored listing per whitelist", namespace, apiResource.Name)
continue
}
unstructuredList, err := dynamicClient.List(metav1.ListOptions{})
if err != nil {
// not all resources support list, so we ignore those
if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
continue
}
// skip unavailable servers
if apierrs.IsServiceUnavailable(err) {
continue
}
return false, err
}
if len(unstructuredList.Items) > 0 {
Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items))
contentRemaining = true
}
}
return contentRemaining, nil
}
// ContainerInitInvariant checks for an init containers are initialized and invariant on both older and newer.
func ContainerInitInvariant(older, newer runtime.Object) error {
oldPod := older.(*v1.Pod)
@ -3051,28 +2863,6 @@ func DsFromManifest(url string) (*appsv1.DaemonSet, error) {
return &ds, nil
}
// waitForServerPreferredNamespacedResources waits until server preferred namespaced resources could be successfully discovered.
// TODO: Fix https://github.com/kubernetes/kubernetes/issues/55768 and remove the following retry.
func waitForServerPreferredNamespacedResources(d discovery.DiscoveryInterface, timeout time.Duration) ([]*metav1.APIResourceList, error) {
Logf("Waiting up to %v for server preferred namespaced resources to be successfully discovered", timeout)
var resources []*metav1.APIResourceList
if err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
var err error
resources, err = d.ServerPreferredNamespacedResources()
if err == nil || isDynamicDiscoveryError(err) {
return true, nil
}
if !discovery.IsGroupDiscoveryFailedError(err) {
return false, err
}
Logf("Error discoverying server preferred namespaced resources: %v, retrying in %v.", err, Poll)
return false, nil
}); err != nil {
return nil, err
}
return resources, nil
}
// WaitForPersistentVolumeClaimDeleted waits for a PersistentVolumeClaim to be removed from the system until timeout occurs, whichever comes first.
func WaitForPersistentVolumeClaimDeleted(c clientset.Interface, ns string, pvcName string, Poll, timeout time.Duration) error {
Logf("Waiting up to %v for PersistentVolumeClaim %s to be removed", timeout, pvcName)