Increase the timeout on apiserver requests to fetch node stats.

This commit is contained in:
Vishnu Kannan 2015-02-23 21:14:15 +00:00
parent e186582272
commit e1147184d5

View File

@ -25,6 +25,12 @@ import (
. "github.com/onsi/ginkgo"
)
const (
timeout = 1 * time.Minute
maxRetries = 10
sleepDuration = time.Minute
)
var _ = Describe("Cadvisor", func() {
var c *client.Client
@ -44,14 +50,15 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) {
nodeList, err := c.Nodes().List()
expectNoError(err)
var errors []error
for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
retries := maxRetries
for {
errors = []error{}
for _, node := range nodeList.Items {
// cadvisor is not accessible directly unless its port (4194 by default) is exposed.
// Here, we access '/stats/' REST endpoint on the kubelet which polls cadvisor internally.
statsResource := fmt.Sprintf("api/v1beta1/proxy/minions/%s/stats/", node.Name)
By(fmt.Sprintf("Querying stats from node %s using url %s", node.Name, statsResource))
_, err = c.Get().AbsPath(statsResource).Timeout(1 * time.Second).Do().Raw()
_, err = c.Get().AbsPath(statsResource).Timeout(timeout).Do().Raw()
if err != nil {
errors = append(errors, err)
}
@ -59,6 +66,11 @@ func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) {
if len(errors) == 0 {
return
}
if retries--; retries <= 0 {
break
}
Logf("failed to retrieve kubelet stats -\n %v", errors)
time.Sleep(sleepDuration)
}
Failf("Timed out after %v waiting for cadvisor to be healthy on all nodes. Errors:\n%v", timeout, errors)
Failf("Failed after retrying %d times for cadvisor to be healthy on all nodes. Errors:\n%v", maxRetries, errors)
}