From e1147184d5bde80d38db2e1f00c42b639a982dab Mon Sep 17 00:00:00 2001 From: Vishnu Kannan Date: Mon, 23 Feb 2015 21:14:15 +0000 Subject: [PATCH] Increase the timeout on apiserver requests to fetch node stats. --- test/e2e/cadvisor.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/test/e2e/cadvisor.go b/test/e2e/cadvisor.go index d1f833c2ccb..709b8b40f62 100644 --- a/test/e2e/cadvisor.go +++ b/test/e2e/cadvisor.go @@ -25,6 +25,12 @@ import ( . "github.com/onsi/ginkgo" ) +const ( + timeout = 1 * time.Minute + maxRetries = 10 + sleepDuration = time.Minute +) + var _ = Describe("Cadvisor", func() { var c *client.Client @@ -44,14 +50,15 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) { nodeList, err := c.Nodes().List() expectNoError(err) var errors []error - for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) { + retries := maxRetries + for { errors = []error{} for _, node := range nodeList.Items { // cadvisor is not accessible directly unless its port (4194 by default) is exposed. // Here, we access '/stats/' REST endpoint on the kubelet which polls cadvisor internally. statsResource := fmt.Sprintf("api/v1beta1/proxy/minions/%s/stats/", node.Name) By(fmt.Sprintf("Querying stats from node %s using url %s", node.Name, statsResource)) - _, err = c.Get().AbsPath(statsResource).Timeout(1 * time.Second).Do().Raw() + _, err = c.Get().AbsPath(statsResource).Timeout(timeout).Do().Raw() if err != nil { errors = append(errors, err) } @@ -59,6 +66,11 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) { if len(errors) == 0 { return } + if retries--; retries <= 0 { + break + } + Logf("failed to retrieve kubelet stats -\n %v", errors) + time.Sleep(sleepDuration) } - Failf("Timed out after %v waiting for cadvisor to be healthy on all nodes. Errors:\n%v", timeout, errors) + Failf("Failed after retrying %d times for cadvisor to be healthy on all nodes. Errors:\n%v", maxRetries, errors) }