diff --git a/hack/ginkgo-e2e.sh b/hack/ginkgo-e2e.sh index d2f892f58b9..3cffaab44af 100755 --- a/hack/ginkgo-e2e.sh +++ b/hack/ginkgo-e2e.sh @@ -104,6 +104,7 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}" --node-instance-group="${NODE_INSTANCE_GROUP:-}" \ --num-nodes="${NUM_MINIONS:-}" \ --prefix="${KUBE_GCE_INSTANCE_PREFIX:-e2e}" \ + ${E2E_CLEAN_START:+"--clean-start=true"} \ ${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \ ${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \ "${@:-}" diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index afa66752230..78154a8cc0e 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -474,6 +474,8 @@ case ${JOB_NAME} in : ${E2E_DOWN:="false"} : ${E2E_NETWORK:="gce-soak-weekly"} : ${E2E_UP:="false"} + # Clear out any orphaned namespaces in case previous run was interrupted. + : ${E2E_CLEAN_START:="true"} : ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ @@ -502,6 +504,8 @@ case ${JOB_NAME} in : ${E2E_DOWN:="false"} : ${E2E_NETWORK:="gce-soak-weekly-current-release"} : ${E2E_UP:="false"} + # Clear out any orphaned namespaces in case previous run was interrupted. + : ${E2E_CLEAN_START:="true"} : ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \ ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \ ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \ @@ -790,6 +794,8 @@ case ${JOB_NAME} in : ${E2E_NETWORK:="gke-soak-weekly"} : ${E2E_DOWN:="false"} : ${E2E_UP:="false"} + # Clear out any orphaned namespaces in case previous run was interrupted. + : ${E2E_CLEAN_START:="true"} : ${PROJECT:="kubernetes-jenkins"} : ${E2E_OPT:="--check_version_skew=false"} : ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \ @@ -1489,6 +1495,7 @@ export KUBE_SKIP_CONFIRMATIONS=y export E2E_UP="${E2E_UP:-true}" export E2E_TEST="${E2E_TEST:-true}" export E2E_DOWN="${E2E_DOWN:-true}" +export E2E_CLEAN_START="${E2E_CLEAN_START:-}" # Used by hack/ginkgo-e2e.sh to enable ginkgo's parallel test runner. export GINKGO_PARALLEL=${GINKGO_PARALLEL:-} diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 8e9587303a0..1a825661c3a 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -33,6 +33,7 @@ cert-dir certificate-authority cgroup-root chaos-chance +clean-start cleanup-iptables client-ca-file client-certificate diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index c07697f568d..aa0e5a32a81 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -83,6 +83,7 @@ func init() { flag.StringVar(&testContext.PrometheusPushGateway, "prom-push-gateway", "", "The URL to prometheus gateway, so that metrics can be pushed during e2es and scraped by prometheus. Typically something like 127.0.0.1:9091.") flag.BoolVar(&testContext.VerifyServiceAccount, "e2e-verify-service-account", true, "If true tests will verify the service account before running.") flag.BoolVar(&testContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.") + flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.") flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", true, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.") } @@ -126,6 +127,24 @@ func TestE2E(t *testing.T) { } gomega.RegisterFailHandler(ginkgo.Fail) + c, err := loadClient() + if err != nil { + glog.Fatal("Error loading client: ", err) + } + + // Delete any namespaces except default and kube-system. This ensures no + // lingering resources are left over from a previous test run. + if testContext.CleanStart { + deleted, err := deleteNamespaces(c, nil /* deleteFilter */, []string{api.NamespaceSystem, api.NamespaceDefault}) + if err != nil { + t.Errorf("Error deleting orphaned namespaces: %v", err) + } + glog.Infof("Waiting for deletion of the following namespaces: %v", deleted) + if err := waitForNamespacesDeleted(c, deleted, namespaceCleanupTimeout); err != nil { + glog.Fatalf("Failed to delete orphaned namespaces %v: %v", deleted, err) + } + } + // Ensure all pods are running and ready before starting tests (otherwise, // cluster infrastructure pods that are being pulled or started can block // test pods from running, and tests that ensure all pods are running and diff --git a/test/e2e/namespace.go b/test/e2e/namespace.go index fbaa89ee60c..af07c23f1aa 100644 --- a/test/e2e/namespace.go +++ b/test/e2e/namespace.go @@ -32,17 +32,6 @@ import ( . "github.com/onsi/gomega" ) -func countRemaining(c *client.Client, withName string) (int, error) { - var cnt = 0 - nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) - for _, item := range nsList.Items { - if strings.Contains(item.Name, "nslifetest") { - cnt++ - } - } - return cnt, err -} - func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) { var err error @@ -59,40 +48,33 @@ func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSecond } wg.Wait() - By("Waiting 10 seconds") //Wait 10 seconds, then SEND delete requests for all the namespaces. + By("Waiting 10 seconds") time.Sleep(time.Duration(10 * time.Second)) - By("Deleting namespaces") - nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) + deleted, err := deleteNamespaces(c, []string{"nslifetest"}, nil /* skipFilter */) Expect(err).NotTo(HaveOccurred()) - var nsCount = 0 - for _, item := range nsList.Items { - if strings.Contains(item.Name, "nslifetest") { - wg.Add(1) - nsCount++ - go func(nsName string) { - defer wg.Done() - defer GinkgoRecover() - Expect(c.Namespaces().Delete(nsName)).To(Succeed()) - Logf("namespace : %v api call to delete is complete ", nsName) - }(item.Name) - } - } - Expect(nsCount).To(Equal(totalNS)) - wg.Wait() + Expect(len(deleted)).To(Equal(totalNS)) By("Waiting for namespaces to vanish") //Now POLL until all namespaces have been eradicated. expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second, func() (bool, error) { - if rem, err := countRemaining(c, "nslifetest"); err != nil || rem > maxAllowedAfterDel { - Logf("Remaining namespaces : %v", rem) + var cnt = 0 + nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) + if err != nil { return false, err - } else { - return true, nil } + for _, item := range nsList.Items { + if strings.Contains(item.Name, "nslifetest") { + cnt++ + } + } + if cnt > maxAllowedAfterDel { + Logf("Remaining namespaces : %v", cnt) + return false, nil + } + return true, nil })) - } var _ = Describe("Namespaces", func() { diff --git a/test/e2e/util.go b/test/e2e/util.go index f45c7dfd81b..a05a1945a57 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -30,6 +30,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "time" "k8s.io/kubernetes/pkg/api" @@ -68,6 +69,11 @@ const ( // TODO: Make this 30 seconds once #4566 is resolved. podStartTimeout = 5 * time.Minute + // If there are any orphaned namespaces to clean up, this test is running + // on a long lived cluster. A long wait here is preferably to spurious test + // failures caused by leaked resources from a previous test run. + namespaceCleanupTimeout = 15 * time.Minute + // Some pods can take much longer to get ready due to volume attach/detach latency. slowPodStartTimeout = 15 * time.Minute @@ -128,6 +134,7 @@ type TestContextType struct { PrometheusPushGateway string VerifyServiceAccount bool DeleteNamespace bool + CleanStart bool GatherKubeSystemResourceUsageData bool } @@ -402,6 +409,71 @@ func waitForPodsRunningReady(ns string, minPods int, timeout time.Duration) erro return nil } +// deleteNamespaces deletes all namespaces that match the given delete and skip filters. +// Filter is by simple strings.Contains; first skip filter, then delete filter. +// Returns the list of deleted namespaces or an error. +func deleteNamespaces(c *client.Client, deleteFilter, skipFilter []string) ([]string, error) { + By("Deleting namespaces") + nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) + Expect(err).NotTo(HaveOccurred()) + var deleted []string + var wg sync.WaitGroup +OUTER: + for _, item := range nsList.Items { + if skipFilter != nil { + for _, pattern := range skipFilter { + if strings.Contains(item.Name, pattern) { + continue OUTER + } + } + } + if deleteFilter != nil { + var shouldDelete bool + for _, pattern := range deleteFilter { + if strings.Contains(item.Name, pattern) { + shouldDelete = true + break + } + } + if !shouldDelete { + continue OUTER + } + } + wg.Add(1) + deleted = append(deleted, item.Name) + go func(nsName string) { + defer wg.Done() + defer GinkgoRecover() + Expect(c.Namespaces().Delete(nsName)).To(Succeed()) + Logf("namespace : %v api call to delete is complete ", nsName) + }(item.Name) + } + wg.Wait() + return deleted, nil +} + +func waitForNamespacesDeleted(c *client.Client, namespaces []string, timeout time.Duration) error { + By("Waiting for namespaces to vanish") + nsMap := map[string]bool{} + for _, ns := range namespaces { + nsMap[ns] = true + } + //Now POLL until all namespaces have been eradicated. + return wait.Poll(2*time.Second, timeout, + func() (bool, error) { + nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) + if err != nil { + return false, err + } + for _, item := range nsList.Items { + if _, ok := nsMap[item.Name]; ok { + return false, nil + } + } + return true, nil + }) +} + func waitForServiceAccountInNamespace(c *client.Client, ns, serviceAccountName string, timeout time.Duration) error { Logf("Waiting up to %v for service account %s to be provisioned in ns %s", timeout, serviceAccountName, ns) for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {