Rewrite summary stats test to validate metrics

This commit is contained in:
Tim St. Clair 2016-09-12 12:17:21 -07:00
parent b19d71792f
commit d4aeaedba0
No known key found for this signature in database
GPG Key ID: 434D16BCEF479EAB
2 changed files with 279 additions and 325 deletions

View File

@ -18,21 +18,14 @@ package e2e_node
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"strings"
"time"
"k8s.io/kubernetes/pkg/api"
apiUnversioned "k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/uuid"
"k8s.io/kubernetes/test/e2e/framework"
"github.com/davecgh/go-spew/spew"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
@ -158,322 +151,4 @@ var _ = framework.KubeDescribe("Kubelet", func() {
}, time.Minute, time.Second*4).Should(Equal("sh: can't create /file: Read-only file system\n"))
})
})
Describe("metrics api", func() {
Context("when querying /stats/summary", func() {
It("it should report resource usage through the stats api", func() {
podNamePrefix := "stats-busybox-" + string(uuid.NewUUID())
volumeNamePrefix := "test-empty-dir"
podNames, volumes := createSummaryTestPods(f.PodClient(), podNamePrefix, 2, volumeNamePrefix)
By("Returning stats summary")
summary := stats.Summary{}
Eventually(func() error {
resp, err := http.Get(*kubeletAddress + "/stats/summary")
if err != nil {
return fmt.Errorf("Failed to get /stats/summary - %v", err)
}
contentsBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("Failed to read /stats/summary - %+v", resp)
}
contents := string(contentsBytes)
decoder := json.NewDecoder(strings.NewReader(contents))
err = decoder.Decode(&summary)
if err != nil {
return fmt.Errorf("Failed to parse /stats/summary to go struct: %+v", resp)
}
missingPods := podsMissingFromSummary(summary, podNames)
if missingPods.Len() != 0 {
return fmt.Errorf("expected pods not found. Following pods are missing - %v", missingPods)
}
missingVolumes := volumesMissingFromSummary(summary, volumes)
if missingVolumes.Len() != 0 {
return fmt.Errorf("expected volumes not found. Following volumes are missing - %v", missingVolumes)
}
if err := testSummaryMetrics(summary, podNamePrefix); err != nil {
return err
}
return nil
}, 5*time.Minute, time.Second*4).Should(BeNil())
})
})
})
})
const (
containerSuffix = "-c"
)
func createSummaryTestPods(podClient *framework.PodClient, podNamePrefix string, count int, volumeNamePrefix string) (sets.String, sets.String) {
podNames := sets.NewString()
volumes := sets.NewString(volumeNamePrefix)
for i := 0; i < count; i++ {
podNames.Insert(fmt.Sprintf("%s%v", podNamePrefix, i))
}
var pods []*api.Pod
for _, podName := range podNames.List() {
pods = append(pods, &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: podName,
},
Spec: api.PodSpec{
// Don't restart the Pod since it is expected to exit
RestartPolicy: api.RestartPolicyNever,
Containers: []api.Container{
{
Image: "gcr.io/google_containers/busybox:1.24",
Command: []string{"sh", "-c", "while true; do echo 'hello world' | tee /test-empty-dir-mnt/file ; sleep 1; done"},
Name: podName + containerSuffix,
VolumeMounts: []api.VolumeMount{
{MountPath: "/test-empty-dir-mnt", Name: volumeNamePrefix},
},
},
},
SecurityContext: &api.PodSecurityContext{
SELinuxOptions: &api.SELinuxOptions{
Level: "s0",
},
},
Volumes: []api.Volume{
// TODO: Test secret volumes
// TODO: Test hostpath volumes
{Name: volumeNamePrefix, VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}}},
},
},
})
}
podClient.CreateBatch(pods)
return podNames, volumes
}
// Returns pods missing from summary.
func podsMissingFromSummary(s stats.Summary, expectedPods sets.String) sets.String {
expectedPods = sets.StringKeySet(expectedPods)
for _, pod := range s.Pods {
if expectedPods.Has(pod.PodRef.Name) {
expectedPods.Delete(pod.PodRef.Name)
}
}
return expectedPods
}
// Returns volumes missing from summary.
func volumesMissingFromSummary(s stats.Summary, expectedVolumes sets.String) sets.String {
for _, pod := range s.Pods {
expectedPodVolumes := sets.StringKeySet(expectedVolumes)
for _, vs := range pod.VolumeStats {
if expectedPodVolumes.Has(vs.Name) {
expectedPodVolumes.Delete(vs.Name)
}
}
if expectedPodVolumes.Len() != 0 {
return expectedPodVolumes
}
}
return sets.NewString()
}
func testSummaryMetrics(s stats.Summary, podNamePrefix string) error {
const (
nonNilValue = "expected %q to not be nil"
nonZeroValue = "expected %q to not be zero"
)
if s.Node.NodeName != framework.TestContext.NodeName {
return fmt.Errorf("unexpected node name - %q", s.Node.NodeName)
}
if s.Node.CPU.UsageCoreNanoSeconds == nil {
return fmt.Errorf(nonNilValue, "cpu instantaneous")
}
if *s.Node.CPU.UsageCoreNanoSeconds == 0 {
return fmt.Errorf(nonZeroValue, "cpu instantaneous")
}
if s.Node.Memory.UsageBytes == nil {
return fmt.Errorf(nonNilValue, "memory")
}
if *s.Node.Memory.UsageBytes == 0 {
return fmt.Errorf(nonZeroValue, "memory")
}
if s.Node.Memory.WorkingSetBytes == nil {
return fmt.Errorf(nonNilValue, "memory working set")
}
if *s.Node.Memory.WorkingSetBytes == 0 {
return fmt.Errorf(nonZeroValue, "memory working set")
}
if s.Node.Fs.AvailableBytes == nil {
return fmt.Errorf(nonNilValue, "memory working set")
}
if *s.Node.Fs.AvailableBytes == 0 {
return fmt.Errorf(nonZeroValue, "node Fs available")
}
if s.Node.Fs.CapacityBytes == nil {
return fmt.Errorf(nonNilValue, "node fs capacity")
}
if *s.Node.Fs.CapacityBytes == 0 {
return fmt.Errorf(nonZeroValue, "node fs capacity")
}
if s.Node.Fs.UsedBytes == nil {
return fmt.Errorf(nonNilValue, "node fs used")
}
if *s.Node.Fs.UsedBytes == 0 {
return fmt.Errorf(nonZeroValue, "node fs used")
}
if s.Node.Runtime == nil {
return fmt.Errorf(nonNilValue, "node runtime")
}
if s.Node.Runtime.ImageFs == nil {
return fmt.Errorf(nonNilValue, "runtime image Fs")
}
if s.Node.Runtime.ImageFs.AvailableBytes == nil {
return fmt.Errorf(nonNilValue, "runtime image Fs available")
}
if *s.Node.Runtime.ImageFs.AvailableBytes == 0 {
return fmt.Errorf(nonZeroValue, "runtime image Fs available")
}
if s.Node.Runtime.ImageFs.CapacityBytes == nil {
return fmt.Errorf(nonNilValue, "runtime image Fs capacity")
}
if *s.Node.Runtime.ImageFs.CapacityBytes == 0 {
return fmt.Errorf(nonZeroValue, "runtime image Fs capacity")
}
if s.Node.Runtime.ImageFs.UsedBytes == nil {
return fmt.Errorf(nonNilValue, "runtime image Fs usage")
}
if *s.Node.Runtime.ImageFs.UsedBytes == 0 {
return fmt.Errorf(nonZeroValue, "runtime image Fs usage")
}
sysContainers := map[string]stats.ContainerStats{}
for _, container := range s.Node.SystemContainers {
sysContainers[container.Name] = container
if err := expectContainerStatsNotEmpty(&container); err != nil {
return err
}
}
if _, exists := sysContainers["kubelet"]; !exists {
return fmt.Errorf("expected metrics for kubelet")
}
if _, exists := sysContainers["runtime"]; !exists {
return fmt.Errorf("expected metrics for runtime")
}
// Verify Pods Stats are present
podsList := []string{}
By("Having resources for pods")
for _, pod := range s.Pods {
if !strings.HasPrefix(pod.PodRef.Name, podNamePrefix) {
// Ignore pods created outside this test
continue
}
podsList = append(podsList, pod.PodRef.Name)
if len(pod.Containers) != 1 {
return fmt.Errorf("expected only one container")
}
container := pod.Containers[0]
if container.Name != (pod.PodRef.Name + containerSuffix) {
return fmt.Errorf("unexpected container name - %q", container.Name)
}
if err := expectContainerStatsNotEmpty(&container); err != nil {
return err
}
// emptydir volume
foundExpectedVolume := false
for _, vs := range pod.VolumeStats {
if *vs.CapacityBytes == 0 {
return fmt.Errorf(nonZeroValue, "volume capacity")
}
if *vs.AvailableBytes == 0 {
return fmt.Errorf(nonZeroValue, "volume available")
}
if *vs.UsedBytes == 0 {
return fmt.Errorf(nonZeroValue, "volume used")
}
if vs.Name == "test-empty-dir" {
foundExpectedVolume = true
}
}
if !foundExpectedVolume {
return fmt.Errorf("expected 'test-empty-dir' volume")
}
// fs usage (not for system containers)
if container.Rootfs == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container root fs")
}
if container.Rootfs.AvailableBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container root fs available")
}
if *container.Rootfs.AvailableBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container root fs available")
}
if container.Rootfs.CapacityBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container root fs capacity")
}
if *container.Rootfs.CapacityBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container root fs capacity")
}
if container.Rootfs.UsedBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container root fs usage")
}
if *container.Rootfs.UsedBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container root fs usage")
}
if container.Logs == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container logs")
}
if container.Logs.AvailableBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container logs available")
}
if *container.Logs.AvailableBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container logs available")
}
if container.Logs.CapacityBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container logs capacity")
}
if *container.Logs.CapacityBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container logs capacity")
}
if container.Logs.UsedBytes == nil {
return fmt.Errorf(nonNilValue+" - "+spew.Sdump(container), "container logs usage")
}
if *container.Logs.UsedBytes == 0 {
return fmt.Errorf(nonZeroValue+" - "+spew.Sdump(container), "container logs usage")
}
}
return nil
}
func expectContainerStatsNotEmpty(container *stats.ContainerStats) error {
// TODO: Test Network
if container.CPU == nil {
return fmt.Errorf("expected container cpu to be not nil - %q", spew.Sdump(container))
}
if container.CPU.UsageCoreNanoSeconds == nil {
return fmt.Errorf("expected container cpu instantaneous usage to be not nil - %q", spew.Sdump(container))
}
if *container.CPU.UsageCoreNanoSeconds == 0 {
return fmt.Errorf("expected container cpu instantaneous usage to be non zero - %q", spew.Sdump(container))
}
if container.Memory == nil {
return fmt.Errorf("expected container memory to be not nil - %q", spew.Sdump(container))
}
if container.Memory.UsageBytes == nil {
return fmt.Errorf("expected container memory usage to be not nil - %q", spew.Sdump(container))
}
if *container.Memory.UsageBytes == 0 {
return fmt.Errorf("expected container memory usage to be non zero - %q", spew.Sdump(container))
}
if container.Memory.WorkingSetBytes == nil {
return fmt.Errorf("expected container memory working set to be not nil - %q", spew.Sdump(container))
}
if *container.Memory.WorkingSetBytes == 0 {
return fmt.Errorf("expected container memory working set to be non zero - %q", spew.Sdump(container))
}
return nil
}

View File

@ -0,0 +1,279 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e_node
import (
"fmt"
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/gstruct"
"github.com/onsi/gomega/types"
)
// TODO(timstclair): Move this test out of the flaky suite once it has demonstrated stability.
var _ = framework.KubeDescribe("Summary API [Flaky]", func() {
f := framework.NewDefaultFramework("summary-test")
Context("when querying /stats/summary", func() {
It("should report resource usage through the stats api", func() {
const pod0 = "stats-busybox-0"
const pod1 = "stats-busybox-1"
By("Creating test pods")
createSummaryTestPods(f, pod0, pod1)
// Wait for cAdvisor to collect 2 stats points
time.Sleep(15 * time.Second)
// Setup expectations.
const (
kb = 1000
mb = 1000 * kb
gb = 1000 * mb
tb = 1000 * gb
maxStartAge = time.Hour * 24 * 365 // 1 year
maxStatsAge = time.Minute
)
fsCapacityBounds := bounded(100*mb, 100*gb)
// Expectations for system containers.
sysContExpectations := gstruct.MatchAllFields(gstruct.Fields{
"Name": gstruct.Ignore(),
"StartTime": recent(maxStartAge),
"CPU": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"UsageNanoCores": bounded(100000, 2E9),
"UsageCoreNanoSeconds": bounded(10000000, 1E15),
}),
"Memory": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
// We don't limit system container memory.
"AvailableBytes": BeNil(),
"UsageBytes": bounded(5*mb, 1*gb),
"WorkingSetBytes": bounded(5*mb, 1*gb),
"RSSBytes": bounded(5*mb, 1*gb),
"PageFaults": bounded(1000, 1E9),
"MajorPageFaults": bounded(0, 100000),
}),
// TODO(#31999): Don't report FS stats for system containers.
"Rootfs": gstruct.Ignore(),
"Logs": gstruct.Ignore(),
"UserDefinedMetrics": BeEmpty(),
})
// Expectations for pods.
podExpectations := gstruct.MatchAllFields(gstruct.Fields{
"PodRef": gstruct.Ignore(),
"StartTime": recent(maxStartAge),
"Containers": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{
"busybox-container": gstruct.MatchAllFields(gstruct.Fields{
"Name": Equal("busybox-container"),
"StartTime": recent(maxStartAge),
"CPU": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"UsageNanoCores": bounded(100000, 100000000),
"UsageCoreNanoSeconds": bounded(10000000, 1000000000),
}),
"Memory": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"AvailableBytes": bounded(1*mb, 10*mb),
"UsageBytes": bounded(10*kb, mb),
"WorkingSetBytes": bounded(10*kb, mb),
"RSSBytes": bounded(1*kb, mb),
"PageFaults": bounded(100, 100000),
"MajorPageFaults": bounded(0, 10),
}),
"Rootfs": ptrMatchAllFields(gstruct.Fields{
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
"UsedBytes": bounded(kb, 10*mb),
"InodesFree": bounded(1E4, 1E8),
"Inodes": bounded(1E4, 1E8),
}),
"Logs": ptrMatchAllFields(gstruct.Fields{
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
"UsedBytes": bounded(kb, 10*mb),
"InodesFree": bounded(1E4, 1E8),
"Inodes": bounded(1E4, 1E8),
}),
"UserDefinedMetrics": BeEmpty(),
}),
}),
"Network": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"RxBytes": bounded(10, 10*mb),
"RxErrors": bounded(0, 1000),
"TxBytes": bounded(10, 10*mb),
"TxErrors": bounded(0, 1000),
}),
"VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{
"test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{
"Name": Equal("test-empty-dir"),
"FsStats": gstruct.MatchAllFields(gstruct.Fields{
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
"UsedBytes": bounded(kb, 1*mb),
// Inodes are not reported for Volumes.
"InodesFree": BeNil(),
"Inodes": BeNil(),
}),
}),
}),
})
matchExpectations := ptrMatchAllFields(gstruct.Fields{
"Node": gstruct.MatchAllFields(gstruct.Fields{
"NodeName": Equal(framework.TestContext.NodeName),
"StartTime": recent(maxStartAge),
"SystemContainers": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
"kubelet": sysContExpectations,
"runtime": sysContExpectations,
}),
"CPU": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"UsageNanoCores": bounded(100E3, 2E9),
"UsageCoreNanoSeconds": bounded(1E9, 1E15),
}),
"Memory": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"AvailableBytes": bounded(100*mb, 100*gb),
"UsageBytes": bounded(10*mb, 10*gb),
"WorkingSetBytes": bounded(10*mb, 10*gb),
"RSSBytes": bounded(1*kb, 1*gb),
"PageFaults": bounded(1000, 1E9),
"MajorPageFaults": bounded(0, 100000),
}),
// TODO(#28407): Handle non-eth0 network interface names.
"Network": Or(BeNil(), ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"RxBytes": bounded(1*mb, 100*gb),
"RxErrors": bounded(0, 100000),
"TxBytes": bounded(10*kb, 10*gb),
"TxErrors": bounded(0, 100000),
})),
"Fs": ptrMatchAllFields(gstruct.Fields{
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
"UsedBytes": bounded(kb, 10*gb),
"InodesFree": bounded(1E4, 1E8),
"Inodes": bounded(1E4, 1E8),
}),
"Runtime": ptrMatchAllFields(gstruct.Fields{
"ImageFs": ptrMatchAllFields(gstruct.Fields{
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
"UsedBytes": bounded(kb, 10*gb),
"InodesFree": bounded(1E4, 1E8),
"Inodes": bounded(1E4, 1E8),
}),
}),
}),
// Ignore extra pods since the tests run in parallel.
"Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): podExpectations,
fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): podExpectations,
}),
})
By("Validating /stats/summary")
// Give pods a minute to actually start up.
Eventually(getNodeSummary, 1*time.Minute, 15*time.Second).Should(matchExpectations)
// Then the summary should match the expectations a few more times.
Consistently(getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations)
})
})
})
func createSummaryTestPods(f *framework.Framework, names ...string) {
pods := make([]*api.Pod, 0, len(names))
for _, name := range names {
pods = append(pods, &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: name,
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
Containers: []api.Container{
{
Name: "busybox-container",
Image: "gcr.io/google_containers/busybox:1.24",
Command: []string{"sh", "-c", "ping -c 1 google.com; while true; do echo 'hello world' | tee /test-empty-dir-mnt/file ; sleep 1; done"},
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
// Must set memory limit to get MemoryStats.AvailableBytes
api.ResourceMemory: resource.MustParse("10M"),
},
},
VolumeMounts: []api.VolumeMount{
{MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"},
},
},
},
SecurityContext: &api.PodSecurityContext{
SELinuxOptions: &api.SELinuxOptions{
Level: "s0",
},
},
Volumes: []api.Volume{
// TODO(#28393): Test secret volumes
// TODO(#28394): Test hostpath volumes
{Name: "test-empty-dir", VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}}},
},
},
})
}
f.PodClient().CreateBatch(pods)
}
// Mapping function for gstruct.MatchAllElements
func summaryObjectID(element interface{}) string {
switch el := element.(type) {
case stats.PodStats:
return fmt.Sprintf("%s::%s", el.PodRef.Namespace, el.PodRef.Name)
case stats.ContainerStats:
return el.Name
case stats.VolumeStats:
return el.Name
case stats.UserDefinedMetric:
return el.Name
default:
framework.Failf("Unknown type: %T", el)
return "???"
}
}
// Convenience functions for common matcher combinations.
func ptrMatchAllFields(fields gstruct.Fields) types.GomegaMatcher {
return gstruct.PointTo(gstruct.MatchAllFields(fields))
}
func bounded(lower, upper interface{}) types.GomegaMatcher {
return gstruct.PointTo(And(
BeNumerically(">=", lower),
BeNumerically("<=", upper)))
}
func recent(d time.Duration) types.GomegaMatcher {
return And(
BeTemporally(">=", time.Now().Add(-d)),
// Now() is the test start time, not the match time, so permit a few extra minutes.
BeTemporally("<", time.Now().Add(2*time.Minute)))
}