Files
kubernetes/test/e2e/instrumentation/monitoring/cadvisor.go
Patrick Ohly 752203d3fa e2e/instrumentation: decentralized settings
Tests settings should be defined in the test source code itself
because conceptually the framework is a separate entity that not all
test authors can modify.

Using the new framework/config code also has several advantages:
- defaults can be set with less code
- no confusion around what's a duration
- the options can also be set via command line flags

While at it, a minor bug gets fixed:
- readConfig() returns only defaults when called while
  registering Ginkgo tests because Viperize() gets called later,
  so the scale in the logging soak test couldn't really be configured;
  now the value is read when the test runs and thus can be changed

The options get moved into the "instrumentation.logging"
resp. "instrumentation.monitoring" group to make it more obvious where
they are used. This is a breaking change, but that was already
necessary to improve the duration setting from plain integer to a
proper time duration.
2018-10-05 14:24:35 +02:00

78 lines
2.5 KiB
Go

/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package monitoring
import (
"fmt"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/config"
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
. "github.com/onsi/ginkgo"
)
var cadvisor struct {
MaxRetries int `default:"6"`
SleepDuration time.Duration `default:"10000ms"`
}
var _ = config.AddOptions(&cadvisor, "instrumentation.monitoring.cadvisor")
var _ = instrumentation.SIGDescribe("Cadvisor", func() {
f := framework.NewDefaultFramework("cadvisor")
It("should be healthy on every node.", func() {
CheckCadvisorHealthOnAllNodes(f.ClientSet, 5*time.Minute)
})
})
func CheckCadvisorHealthOnAllNodes(c clientset.Interface, timeout time.Duration) {
// It should be OK to list unschedulable Nodes here.
By("getting list of nodes")
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err)
var errors []error
maxRetries := cadvisor.MaxRetries
for {
errors = []error{}
for _, node := range nodeList.Items {
// cadvisor is not accessible directly unless its port (4194 by default) is exposed.
// Here, we access '/stats/' REST endpoint on the kubelet which polls cadvisor internally.
statsResource := fmt.Sprintf("api/v1/nodes/%s/proxy/stats/", node.Name)
By(fmt.Sprintf("Querying stats from node %s using url %s", node.Name, statsResource))
_, err = c.CoreV1().RESTClient().Get().AbsPath(statsResource).Timeout(timeout).Do().Raw()
if err != nil {
errors = append(errors, err)
}
}
if len(errors) == 0 {
return
}
if maxRetries--; maxRetries <= 0 {
break
}
framework.Logf("failed to retrieve kubelet stats -\n %v", errors)
time.Sleep(cadvisor.SleepDuration)
}
framework.Failf("Failed after retrying %d times for cadvisor to be healthy on all nodes. Errors:\n%v", maxRetries, errors)
}