Removing old shell based monitoring test.

This commit is contained in:
Vishnu Kannan 2015-03-11 21:39:56 +00:00
parent 425dd7e3ee
commit cbb3c96f31
2 changed files with 47 additions and 163 deletions

View File

@ -1,134 +0,0 @@
#!/bin/bash
# Copyright 2014 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Assumes a running Kubernetes test cluster; verifies that the monitoring setup
# works. Assumes that we're being called by hack/e2e-test.sh (we use some env
# vars it sets up).
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
: ${KUBE_CONFIG_FILE:="config-test.sh"}
export KUBECTL KUBE_CONFIG_FILE
source "${KUBE_ROOT}/cluster/kube-env.sh"
source "${KUBE_VERSION_ROOT}/cluster/${KUBERNETES_PROVIDER}/util.sh"
prepare-e2e
MONITORING="${KUBE_ROOT}/cluster/addons/cluster-monitoring"
KUBECTL="${KUBE_ROOT}/cluster/kubectl.sh"
BIGRAND=$(printf "%x\n" $(( $RANDOM << 16 | $RANDOM ))) # random 2^32 in hex
MONITORING_FIREWALL_RULE="monitoring-test-${BIGRAND}"
function setup {
# This only has work to do on gce and gke
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]] || [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
detect-project
if ! "${GCLOUD}" compute firewall-rules create "${MONITORING_FIREWALL_RULE}" \
--project "${PROJECT}" \
--network "${NETWORK}" \
--quiet \
--allow tcp:80 tcp:8083 tcp:8086 tcp:9200; then
echo "Failed to set up firewall for monitoring" && false
fi
fi
"${KUBECTL}" create -f "${MONITORING}/"
}
function cleanup {
"${KUBECTL}" stop rc monitoring-influx-grafana-controller &> /dev/null || true
"${KUBECTL}" stop rc monitoring-heapster-controller &> /dev/null || true
"${KUBECTL}" delete -f "${MONITORING}/" &> /dev/null || true
# This only has work to do on gce and gke
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]] || [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
detect-project
if "${GCLOUD}" compute firewall-rules describe "${MONITORING_FIREWALL_RULE}" &> /dev/null; then
"${GCLOUD}" compute firewall-rules delete \
--project "${PROJECT}" \
--quiet \
"${MONITORING_FIREWALL_RULE}" || true
fi
fi
}
function influx-data-exists {
local max_retries=10
local retry_delay=30 #seconds
local influx_ip=$("${KUBECTL}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.hostIP}}:{{end}} | sed s/://g)
local influx_url="http://$influx_ip:8086/db/k8s/series?u=root&p=root"
local ok="false"
for i in `seq 1 10`; do
if curl --retry $max_retries --retry-delay $retry_delay -G $influx_url --data-urlencode "q=select * from stats limit 1" \
&& curl --retry $max_retries --retry-delay $retry_delay -G $influx_url --data-urlencode "q=select * from machine limit 1"; then
echo "retrieved data from InfluxDB."
ok="true"
break
fi
sleep 5
done
if [[ "${ok}" != "true" ]]; then
echo "failed to retrieve stats from InfluxDB. monitoring test failed"
exit 1
fi
}
function wait-for-pods {
local running=false
for i in `seq 1 20`; do
sleep 20
if "${KUBECTL}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.status}}:{{end}} | grep Running &> /dev/null \
&& "${KUBECTL}" get pods -l name=heapster -o template -t {{range.items}}{{.currentState.status}}:{{end}} | grep Running &> /dev/null; then
running=true
break
fi
done
if [ running == false ]; then
echo "giving up waiting on monitoring pods to be active. monitoring test failed"
exit 1
fi
}
trap cleanup EXIT
# Remove any pre-existing monitoring services.
cleanup
# Start monitoring pods and services.
setup
# Wait for a maximum of 5 minutes for the influx grafana pod to be running.
echo "waiting for monitoring pods to be running"
wait-for-pods
# Wait for some time to let heapster push some stats to InfluxDB.
echo "monitoring pods are running. waiting for stats to be pushed to InfluxDB"
sleep 60
# Check if stats data exists in InfluxDB
echo "checking if stats exist in InfluxDB"
influx-data-exists
echo "monitoring setup works"
exit 0

View File

@ -39,7 +39,12 @@ var _ = Describe("Monitoring", func() {
expectNoError(err) expectNoError(err)
}) })
It("pod and node resource usage metrics are available on influxdb using heapster.", func() { It("verify monitoring pods and all cluster nodes are available on influxdb using heapster.", func() {
if testContext.provider != "gce" {
By(fmt.Sprintf("Skipping Monitoring test, which is only supported for provider gce (not %s)",
testContext.provider))
return
}
testMonitoringUsingHeapsterInfluxdb(c) testMonitoringUsingHeapsterInfluxdb(c)
}) })
}) })
@ -51,8 +56,8 @@ const (
influxdbPW = "root" influxdbPW = "root"
podlistQuery = "select distinct(pod) from stats" podlistQuery = "select distinct(pod) from stats"
nodelistQuery = "select distinct(hostname) from machine" nodelistQuery = "select distinct(hostname) from machine"
sleepBetweenAttempts = 30 * time.Second sleepBetweenAttempts = 5 * time.Second
maxAttempts = 10 // Total sleep time of 5 minutes for this test. testTimeout = 5 * time.Minute
) )
var ( var (
@ -67,27 +72,40 @@ var (
} }
) )
func expectedRcsExist(c *client.Client) { func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error) {
rcList, err := c.ReplicationControllers(api.NamespaceDefault).List(labels.Everything()) rcList, err := c.ReplicationControllers(api.NamespaceDefault).List(labels.Everything())
expectNoError(err) if err != nil {
return nil, err
}
expectedPods := []string{}
for _, rc := range rcList.Items { for _, rc := range rcList.Items {
if _, ok := expectedRcs[rc.Name]; ok { if _, ok := expectedRcs[rc.Name]; ok {
if rc.Status.Replicas != 1 { if rc.Status.Replicas != 1 {
Failf("expected to find only one replica for rc %q, found %d", rc.Name, rc.Status.Replicas) return nil, fmt.Errorf("expected to find only one replica for rc %q, found %d", rc.Name, rc.Status.Replicas)
} }
expectedRcs[rc.Name] = true expectedRcs[rc.Name] = true
podList, err := c.Pods(api.NamespaceDefault).List(labels.Set(rc.Spec.Selector).AsSelector())
if err != nil {
return nil, err
}
for _, pod := range podList.Items {
expectedPods = append(expectedPods, pod.Name)
}
} }
} }
for rc, found := range expectedRcs { for rc, found := range expectedRcs {
if !found { if !found {
Failf("Replication Controller %q not found.", rc) return nil, fmt.Errorf("Replication Controller %q not found.", rc)
} }
} }
return expectedPods, nil
} }
func expectedServicesExist(c *client.Client) { func expectedServicesExist(c *client.Client) error {
serviceList, err := c.Services(api.NamespaceDefault).List(labels.Everything()) serviceList, err := c.Services(api.NamespaceDefault).List(labels.Everything())
expectNoError(err) if err != nil {
return err
}
for _, service := range serviceList.Items { for _, service := range serviceList.Items {
if _, ok := expectedServices[service.Name]; ok { if _, ok := expectedServices[service.Name]; ok {
expectedServices[service.Name] = true expectedServices[service.Name] = true
@ -95,29 +113,22 @@ func expectedServicesExist(c *client.Client) {
} }
for service, found := range expectedServices { for service, found := range expectedServices {
if !found { if !found {
Failf("Service %q not found", service) return fmt.Errorf("Service %q not found", service)
} }
} }
return nil
} }
func getAllPodsInCluster(c *client.Client) []string { func getAllNodesInCluster(c *client.Client) ([]string, error) {
podList, err := c.Pods(api.NamespaceAll).List(labels.Everything())
expectNoError(err)
result := []string{}
for _, pod := range podList.Items {
result = append(result, pod.Name)
}
return result
}
func getAllNodesInCluster(c *client.Client) []string {
nodeList, err := c.Nodes().List() nodeList, err := c.Nodes().List()
expectNoError(err) if err != nil {
return nil, err
}
result := []string{} result := []string{}
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
result = append(result, node.Name) result = append(result, node.Name)
} }
return result return result, nil
} }
func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error) { func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error) {
@ -133,6 +144,9 @@ func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error)
} }
result := map[string]bool{} result := map[string]bool{}
for _, point := range series[0].GetPoints() { for _, point := range series[0].GetPoints() {
if len(point) != 2 {
Failf("Expected only two entries in a point for query %q. Got %v", query, point)
}
name, ok := point[1].(string) name, ok := point[1].(string)
if !ok { if !ok {
Failf("expected %v to be a string, but it is %T", point[1], point[1]) Failf("expected %v to be a string, but it is %T", point[1], point[1])
@ -143,6 +157,9 @@ func getInfluxdbData(c *influxdb.Client, query string) (map[string]bool, error)
} }
func expectedItemsExist(expectedItems []string, actualItems map[string]bool) bool { func expectedItemsExist(expectedItems []string, actualItems map[string]bool) bool {
if len(actualItems) < len(expectedItems) {
return false
}
for _, item := range expectedItems { for _, item := range expectedItems {
if _, found := actualItems[item]; !found { if _, found := actualItems[item]; !found {
return false return false
@ -182,8 +199,9 @@ func getMasterHost() string {
func testMonitoringUsingHeapsterInfluxdb(c *client.Client) { func testMonitoringUsingHeapsterInfluxdb(c *client.Client) {
// Check if heapster pods and services are up. // Check if heapster pods and services are up.
expectedRcsExist(c) expectedPods, err := verifyExpectedRcsExistAndGetExpectedPods(c)
expectedServicesExist(c) expectNoError(err)
expectNoError(expectedServicesExist(c))
// TODO: Wait for all pods and services to be running. // TODO: Wait for all pods and services to be running.
kubeMasterHttpClient, ok := c.Client.(*http.Client) kubeMasterHttpClient, ok := c.Client.(*http.Client)
if !ok { if !ok {
@ -202,14 +220,14 @@ func testMonitoringUsingHeapsterInfluxdb(c *client.Client) {
influxdbClient, err := influxdb.NewClient(config) influxdbClient, err := influxdb.NewClient(config)
expectNoError(err, "failed to create influxdb client") expectNoError(err, "failed to create influxdb client")
expectedPods := getAllPodsInCluster(c) expectedNodes, err := getAllNodesInCluster(c)
expectedNodes := getAllNodesInCluster(c) expectNoError(err)
attempt := maxAttempts startTime := time.Now()
for { for {
if validatePodsAndNodes(influxdbClient, expectedPods, expectedNodes) { if validatePodsAndNodes(influxdbClient, expectedPods, expectedNodes) {
return return
} }
if attempt--; attempt <= 0 { if time.Since(startTime) >= testTimeout {
break break
} }
time.Sleep(sleepBetweenAttempts) time.Sleep(sleepBetweenAttempts)