From 611530889fb0c29baf8593ddb1dc705576b04a2a Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Wed, 19 Aug 2015 11:35:00 -0400 Subject: [PATCH] Cleanup more extensively in e2e --- test/e2e/dns.go | 2 +- test/e2e/etcd_failure.go | 2 +- test/e2e/framework.go | 2 ++ test/e2e/monitoring.go | 3 +++ test/e2e/pd.go | 4 ++-- test/e2e/rc.go | 31 +++++-------------------- test/e2e/resize_nodes.go | 16 +++++++++++-- test/e2e/service.go | 2 +- test/e2e/util.go | 50 +++++++++++++++++++++++++++++++++++----- 9 files changed, 74 insertions(+), 38 deletions(-) diff --git a/test/e2e/dns.go b/test/e2e/dns.go index 9a7f7c0b6b1..33fb9b55d18 100644 --- a/test/e2e/dns.go +++ b/test/e2e/dns.go @@ -158,7 +158,7 @@ func validateDNSResults(f *Framework, pod *api.Pod, fileNames []string) { defer func() { By("deleting the pod") defer GinkgoRecover() - podClient.Delete(pod.Name, nil) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() if _, err := podClient.Create(pod); err != nil { Failf("Failed to create %s pod: %v", pod.Name, err) diff --git a/test/e2e/etcd_failure.go b/test/e2e/etcd_failure.go index 3095cd68766..b6ba2b90daf 100644 --- a/test/e2e/etcd_failure.go +++ b/test/e2e/etcd_failure.go @@ -138,7 +138,7 @@ func checkExistingRCRecovers(f Framework) { pods, err := podClient.List(rcSelector, fields.Everything()) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { - if api.IsPodReady(&pod) { + if pod.DeletionTimestamp == nil && api.IsPodReady(&pod) { return true, nil } } diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 9173be8cabf..136b42ab7ae 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -86,6 +86,8 @@ func (f *Framework) afterEach() { // Note that we don't wait for any cleanup to propagate, which means // that if you delete a bunch of pods right before ending your test, // you may or may not see the killing/deletion/cleanup events. + + dumpAllPodInfo(f.Client) } // Check whether all nodes are ready after the test. diff --git a/test/e2e/monitoring.go b/test/e2e/monitoring.go index 59f75192c23..84e33a77374 100644 --- a/test/e2e/monitoring.go +++ b/test/e2e/monitoring.go @@ -92,6 +92,9 @@ func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error return nil, err } for _, pod := range podList.Items { + if pod.DeletionTimestamp != nil { + continue + } expectedPods = append(expectedPods, string(pod.UID)) } } diff --git a/test/e2e/pd.go b/test/e2e/pd.go index e9b2dabf54e..232430d627f 100644 --- a/test/e2e/pd.go +++ b/test/e2e/pd.go @@ -190,7 +190,7 @@ var _ = Describe("Pod Disks", func() { By("cleaning up PD-RW test environment") // Teardown pods, PD. Ignore errors. // Teardown should do nothing unless test failed. - podClient.Delete(host0Pod.Name, nil) + podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)) detachPD(host0Name, diskName) deletePD(diskName) }() @@ -221,7 +221,7 @@ var _ = Describe("Pod Disks", func() { verifyPDContentsViaContainer(framework, host0Pod.Name, containerName, fileAndContentToVerify) By("deleting host0Pod") - expectNoError(podClient.Delete(host0Pod.Name, nil), "Failed to delete host0Pod") + expectNoError(podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod") } By(fmt.Sprintf("deleting PD %q", diskName)) diff --git a/test/e2e/rc.go b/test/e2e/rc.go index e17c7de3da3..9d25f37c100 100644 --- a/test/e2e/rc.go +++ b/test/e2e/rc.go @@ -21,8 +21,6 @@ import ( "time" "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/pkg/fields" - "k8s.io/kubernetes/pkg/kubectl" "k8s.io/kubernetes/pkg/labels" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util" @@ -88,41 +86,24 @@ func ServeImageOrFail(f *Framework, test string, image string) { // Cleanup the replication controller when we are done. defer func() { // Resize the replication controller to zero to get rid of pods. - By("Cleaning up the replication controller") - rcReaper, err := kubectl.ReaperFor("ReplicationController", f.Client, nil) - if err != nil { + if err := DeleteRC(f.Client, f.Namespace.Name, controller.Name); err != nil { Logf("Failed to cleanup replication controller %v: %v.", controller.Name, err) } - if _, err = rcReaper.Stop(f.Namespace.Name, controller.Name, 0, nil); err != nil { - Logf("Failed to stop replication controller %v: %v.", controller.Name, err) - } }() // List the pods, making sure we observe all the replicas. - listTimeout := time.Minute label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name})) - pods, err := f.Client.Pods(f.Namespace.Name).List(label, fields.Everything()) - Expect(err).NotTo(HaveOccurred()) - t := time.Now() - for { - Logf("Controller %s: Found %d pods out of %d", name, len(pods.Items), replicas) - if len(pods.Items) == replicas { - break - } - if time.Since(t) > listTimeout { - Failf("Controller %s: Gave up waiting for %d pods to come up after seeing only %d pods after %v seconds", - name, replicas, len(pods.Items), time.Since(t).Seconds()) - } - time.Sleep(5 * time.Second) - pods, err = f.Client.Pods(f.Namespace.Name).List(label, fields.Everything()) - Expect(err).NotTo(HaveOccurred()) - } + + pods, err := podsCreated(f.Client, f.Namespace.Name, name, replicas) By("Ensuring each pod is running") // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. for _, pod := range pods.Items { + if pod.DeletionTimestamp != nil { + continue + } err = f.WaitForPodRunning(pod.Name) Expect(err).NotTo(HaveOccurred()) } diff --git a/test/e2e/resize_nodes.go b/test/e2e/resize_nodes.go index 83afcd22b54..6108bd585c2 100644 --- a/test/e2e/resize_nodes.go +++ b/test/e2e/resize_nodes.go @@ -255,8 +255,17 @@ func podsCreated(c *client.Client, ns, name string, replicas int) (*api.PodList, return nil, err } - Logf("Pod name %s: Found %d pods out of %d", name, len(pods.Items), replicas) - if len(pods.Items) == replicas { + created := []api.Pod{} + for _, pod := range pods.Items { + if pod.DeletionTimestamp != nil { + continue + } + created = append(created, pod) + } + Logf("Pod name %s: Found %d pods out of %d", name, len(created), replicas) + + if len(created) == replicas { + pods.Items = created return pods, nil } } @@ -416,6 +425,9 @@ var _ = Describe("Nodes", func() { if err := deleteNS(c, ns); err != nil { Failf("Couldn't delete namespace '%s', %v", ns, err) } + if err := deleteTestingNS(c); err != nil { + Failf("Couldn't delete testing namespaces '%s', %v", ns, err) + } }) Describe("Resize", func() { diff --git a/test/e2e/service.go b/test/e2e/service.go index e99ec341121..d546801d35d 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -1372,7 +1372,7 @@ func verifyServeHostnameServiceUp(c *client.Client, host string, expectedPods [] passed = true break } - Logf("Expected pods: %v, got: %v", expectedPods, pods) + Logf("Waiting for expected pods for %s: %v, got: %v", serviceIP, expectedPods, pods) } if !passed { return fmt.Errorf("service verification failed for:\n %s", cmd) diff --git a/test/e2e/util.go b/test/e2e/util.go index e4e9200746b..5e8d55d5022 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1235,6 +1235,8 @@ func RunRC(config RCConfig) error { for oldRunning != config.Replicas { time.Sleep(interval) + terminating := 0 + running := 0 waiting := 0 pending := 0 @@ -1244,10 +1246,13 @@ func RunRC(config RCConfig) error { containerRestartNodes := util.NewStringSet() pods := podStore.List() - if config.CreatedPods != nil { - *config.CreatedPods = pods - } + created := []*api.Pod{} for _, p := range pods { + if p.DeletionTimestamp != nil { + terminating++ + continue + } + created = append(created, p) if p.Status.Phase == api.PodRunning { running++ for _, v := range FailedContainers(p) { @@ -1266,9 +1271,13 @@ func RunRC(config RCConfig) error { unknown++ } } + pods = created + if config.CreatedPods != nil { + *config.CreatedPods = pods + } - Logf("%v %v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d unknown ", - time.Now(), rc.Name, len(pods), config.Replicas, running, pending, waiting, inactive, unknown) + Logf("%v %v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown ", + time.Now(), rc.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown) promPushRunningPending(running, pending) @@ -1332,6 +1341,16 @@ func dumpPodDebugInfo(c *client.Client, pods []*api.Pod) { dumpNodeDebugInfo(c, badNodes.List()) } +func dumpAllPodInfo(c *client.Client) { + pods, err := c.Pods("").List(labels.Everything(), fields.Everything()) + if err != nil { + Logf("unable to fetch pod debug info: %v", err) + } + for _, pod := range pods.Items { + Logf("Pod %s %s node=%s, deletionTimestamp=%s", pod.Namespace, pod.Name, pod.Spec.NodeName, pod.DeletionTimestamp) + } +} + func dumpNodeDebugInfo(c *client.Client, nodeNames []string) { for _, n := range nodeNames { Logf("\nLogging kubelet events for node %v", n) @@ -1442,9 +1461,29 @@ func DeleteRC(c *client.Client, ns, name string) error { _, err = reaper.Stop(ns, name, 0, api.NewDeleteOptions(0)) deleteRCTime := time.Now().Sub(startTime) Logf("Deleting RC took: %v", deleteRCTime) + if err == nil { + err = waitForRCPodsGone(c, ns, name) + } + terminatePodTime := time.Now().Sub(startTime) - deleteRCTime + Logf("Terminating RC pods took: %v", terminatePodTime) return err } +// waitForRCPodsGone waits until there are no pods reported under an RC's selector (because the pods +// have completed termination). +func waitForRCPodsGone(c *client.Client, ns, name string) error { + rc, err := c.ReplicationControllers(ns).Get(name) + if err != nil { + return err + } + return wait.Poll(poll, singleCallTimeout, func() (bool, error) { + if pods, err := c.Pods(ns).List(labels.SelectorFromSet(rc.Spec.Selector), fields.Everything()); err == nil && len(pods.Items) == 0 { + return true, nil + } + return false, nil + }) +} + // Convenient wrapper around listing nodes supporting retries. func listNodes(c *client.Client, label labels.Selector, field fields.Selector) (*api.NodeList, error) { var nodes *api.NodeList @@ -1606,7 +1645,6 @@ func getSigner(provider string) (ssh.Signer, error) { return nil, fmt.Errorf("getSigner(...) not implemented for %s", provider) } key := filepath.Join(keydir, keyfile) - Logf("Using SSH key: %s", key) return util.MakePrivateKeySignerFromFile(key) }