diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 6e79a6eb051..1a2ac69d933 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -1043,6 +1043,11 @@ "Comment": "v0.23.2-79-gc6c06d4", "Rev": "c6c06d440ab2fcaae9211dda6dcbbaa1e98a054b" }, + { + "ImportPath": "github.com/google/cadvisor/client/v2", + "Comment": "v0.23.2-79-gc6c06d4", + "Rev": "c6c06d440ab2fcaae9211dda6dcbbaa1e98a054b" + }, { "ImportPath": "github.com/google/cadvisor/collector", "Comment": "v0.23.2-79-gc6c06d4", diff --git a/Godeps/LICENSES b/Godeps/LICENSES index af322ca35d5..0c358708663 100644 --- a/Godeps/LICENSES +++ b/Godeps/LICENSES @@ -33571,6 +33571,204 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ +================================================================================ += vendor/github.com/google/cadvisor/client/v2 licensed under: = + + Copyright 2014 The cAdvisor Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + += vendor/github.com/google/cadvisor/LICENSE e7790b946bfacb700e8a8f2baedb3205 - +================================================================================ + + ================================================================================ = vendor/github.com/google/cadvisor/collector licensed under: = diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go new file mode 100644 index 00000000000..a8ce0f1c7ea --- /dev/null +++ b/test/e2e_node/density_test.go @@ -0,0 +1,432 @@ +// +build linux + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "errors" + "fmt" + "sort" + "strconv" + "sync" + "time" + + "k8s.io/kubernetes/pkg/api" + apierrors "k8s.io/kubernetes/pkg/api/errors" + "k8s.io/kubernetes/pkg/api/unversioned" + "k8s.io/kubernetes/pkg/client/cache" + controllerframework "k8s.io/kubernetes/pkg/controller/framework" + "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" + kubemetrics "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/kubernetes/pkg/labels" + "k8s.io/kubernetes/pkg/metrics" + "k8s.io/kubernetes/pkg/runtime" + "k8s.io/kubernetes/pkg/watch" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +const ( + kubeletAddr = "localhost:10255" +) + +var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { + const ( + // the data collection time of `resource collector' and the standalone cadvisor + // is not synchronizated. Therefore `resource collector' may miss data or + // collect duplicated data + monitoringInterval = 500 * time.Millisecond + sleepBeforeCreatePods = 30 * time.Second + ) + + var ( + ns string + nodeName string + ) + + f := framework.NewDefaultFramework("density-test") + podType := "density_test_pod" + + BeforeEach(func() { + ns = f.Namespace.Name + nodeName = framework.TestContext.NodeName + }) + + AfterEach(func() { + }) + + Context("create a batch of pods", func() { + densityTests := []DensityTest{ + { + podsNr: 10, + interval: 0 * time.Millisecond, + cpuLimits: framework.ContainersCPUSummary{ + stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.30}, + stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60}, + }, + memLimits: framework.ResourceUsagePerContainer{ + stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024}, + stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024}, + }, + // percentile limit of single pod startup latency + podStartupLimits: framework.LatencyMetric{ + Perc50: 10 * time.Second, + Perc90: 15 * time.Second, + Perc99: 20 * time.Second, + }, + // upbound of startup latency of a batch of pods + podBatchStartupLimit: 25 * time.Second, + }, + } + + for _, testArg := range densityTests { + itArg := testArg + It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", + itArg.podsNr, itArg.interval), func() { + var ( + mutex = &sync.Mutex{} + watchTimes = make(map[string]unversioned.Time, 0) + stopCh = make(chan struct{}) + ) + + // create specifications of the test pods + pods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType) + + // start a standalone cadvisor pod + // it uses `createSync', so the pod is running when it returns + createCadvisorPod(f) + + // `resource collector' monitoring fine-grain CPU/memory usage by a standalone Cadvisor with + // 1s housingkeeping interval + rc := NewResourceCollector(monitoringInterval) + + // the controller watches the change of pod status + controller := newInformerWatchPod(f, mutex, watchTimes, podType) + go controller.Run(stopCh) + + // Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop' + // does not response to pod creation immediately. Creating the first pod has a delay around 5s. + // The node status has been `ready' so `wait and check node being ready' does not help here. + // Now wait here for a grace period to have `syncLoop' be ready + time.Sleep(sleepBeforeCreatePods) + + // the density test only monitors the overhead of creating pod + // or start earliest and call `rc.Reset()' here to clear the buffer + rc.Start() + + By("Creating a batch of pods") + // it returns a map[`pod name']`creation time' as the creation timestamps + createTimes := createBatchPodWithRateControl(f, pods, itArg.interval) + + By("Waiting for all Pods to be observed by the watch...") + // checks every 10s util all pods are running. it times out ater 10min + Eventually(func() bool { + return len(watchTimes) == itArg.podsNr + }, 10*time.Minute, 10*time.Second).Should(BeTrue()) + + if len(watchTimes) < itArg.podsNr { + framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.") + } + + // stop the watching controller, and the resource collector + close(stopCh) + rc.Stop() + + // data analyis + var ( + firstCreate unversioned.Time + lastRunning unversioned.Time + init = true + e2eLags = make([]framework.PodLatencyData, 0) + ) + + for name, create := range createTimes { + watch, ok := watchTimes[name] + Expect(ok).To(Equal(true)) + + e2eLags = append(e2eLags, + framework.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)}) + + if !init { + if firstCreate.Time.After(create.Time) { + firstCreate = create + } + if lastRunning.Time.Before(watch.Time) { + lastRunning = watch + } + } else { + init = false + firstCreate, lastRunning = create, watch + } + } + + sort.Sort(framework.LatencySlice(e2eLags)) + + // verify latency + By("Verifying latency") + verifyLatency(lastRunning.Time.Sub(firstCreate.Time), e2eLags, itArg) + + // verify resource + By("Verifying resource") + verifyResource(f, testArg, rc) + }) + } + }) + + Context("create a sequence of pods", func() { + densityTests := []DensityTest{ + { + podsNr: 10, + bgPodsNr: 10, + cpuLimits: framework.ContainersCPUSummary{ + stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25}, + stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60}, + }, + memLimits: framework.ResourceUsagePerContainer{ + stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024}, + stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024}, + }, + podStartupLimits: framework.LatencyMetric{ + Perc50: 3000 * time.Millisecond, + Perc90: 4000 * time.Millisecond, + Perc99: 5000 * time.Millisecond, + }, + }, + } + + for _, testArg := range densityTests { + itArg := testArg + It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", + itArg.podsNr, itArg.bgPodsNr), func() { + bgPods := newTestPods(itArg.bgPodsNr, ImageRegistry[pauseImage], "background_pod") + testPods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType) + + createCadvisorPod(f) + rc := NewResourceCollector(monitoringInterval) + + By("Creating a batch of background pods") + // creatBatch is synchronized + // all pods are running when it returns + f.PodClient().CreateBatch(bgPods) + + time.Sleep(sleepBeforeCreatePods) + + // starting resource monitoring + rc.Start() + + // do a sequential creation of pod (back to back) + batchlag, e2eLags := createBatchPodSequential(f, testPods) + + rc.Stop() + + // verify latency + By("Verifying latency") + verifyLatency(batchlag, e2eLags, itArg) + + // verify resource + By("Verifying resource") + verifyResource(f, testArg, rc) + }) + } + }) +}) + +type DensityTest struct { + // number of pods + podsNr int + // number of background pods + bgPodsNr int + // interval between creating pod (rate control) + interval time.Duration + // resource bound + cpuLimits framework.ContainersCPUSummary + memLimits framework.ResourceUsagePerContainer + podStartupLimits framework.LatencyMetric + podBatchStartupLimit time.Duration +} + +// it creates a batch of pods concurrently, uses one goroutine for each creation. +// between creations there is an interval for throughput control +func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, interval time.Duration) map[string]unversioned.Time { + createTimes := make(map[string]unversioned.Time) + for _, pod := range pods { + createTimes[pod.ObjectMeta.Name] = unversioned.Now() + go f.PodClient().Create(pod) + time.Sleep(interval) + } + return createTimes +} + +func checkPodDeleted(f *framework.Framework, podName string) error { + ns := f.Namespace.Name + _, err := f.Client.Pods(ns).Get(podName) + if apierrors.IsNotFound(err) { + return nil + } + return errors.New("Pod Not Deleted") +} + +// get prometheus metric `pod start latency' from kubelet +func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) { + latencyMetrics := framework.KubeletLatencyMetrics{} + ms, err := metrics.GrabKubeletMetricsWithoutProxy(node) + Expect(err).NotTo(HaveOccurred()) + + for _, samples := range ms { + for _, sample := range samples { + if sample.Metric["__name__"] == kubemetrics.KubeletSubsystem+"_"+kubemetrics.PodStartLatencyKey { + quantile, _ := strconv.ParseFloat(string(sample.Metric["quantile"]), 64) + latencyMetrics = append(latencyMetrics, + framework.KubeletLatencyMetric{ + Quantile: quantile, + Method: kubemetrics.PodStartLatencyKey, + Latency: time.Duration(int(sample.Value)) * time.Microsecond}) + } + } + } + return latencyMetrics, nil +} + +// Verifies whether 50, 90 and 99th percentiles of PodStartupLatency are +// within the threshold. +func verifyPodStartupLatency(expect, actual framework.LatencyMetric) error { + if actual.Perc50 > expect.Perc50 { + return fmt.Errorf("too high pod startup latency 50th percentile: %v", actual.Perc50) + } + if actual.Perc90 > expect.Perc90 { + return fmt.Errorf("too high pod startup latency 90th percentile: %v", actual.Perc90) + } + if actual.Perc99 > actual.Perc99 { + return fmt.Errorf("too high pod startup latency 99th percentil: %v", actual.Perc99) + } + return nil +} + +func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes map[string]unversioned.Time, + podType string) *controllerframework.Controller { + ns := f.Namespace.Name + checkPodRunning := func(p *api.Pod) { + mutex.Lock() + defer mutex.Unlock() + defer GinkgoRecover() + + if p.Status.Phase == api.PodRunning { + if _, found := watchTimes[p.Name]; !found { + watchTimes[p.Name] = unversioned.Now() + } + } + } + + _, controller := controllerframework.NewInformer( + &cache.ListWatch{ + ListFunc: func(options api.ListOptions) (runtime.Object, error) { + options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": podType}) + return f.Client.Pods(ns).List(options) + }, + WatchFunc: func(options api.ListOptions) (watch.Interface, error) { + options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": podType}) + return f.Client.Pods(ns).Watch(options) + }, + }, + &api.Pod{}, + 0, + controllerframework.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + p, ok := obj.(*api.Pod) + Expect(ok).To(Equal(true)) + go checkPodRunning(p) + }, + UpdateFunc: func(oldObj, newObj interface{}) { + p, ok := newObj.(*api.Pod) + Expect(ok).To(Equal(true)) + go checkPodRunning(p) + }, + }, + ) + return controller +} + +func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg DensityTest) { + framework.PrintLatencies(e2eLags, "worst client e2e total latencies") + + // Zhou: do not trust `kubelet' metrics since they are not reset! + latencyMetrics, _ := getPodStartLatency(kubeletAddr) + framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics)) + + // check whether e2e pod startup time is acceptable. + podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} + framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency)) + framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency)) + + // check bactch pod creation latency + if testArg.podBatchStartupLimit > 0 { + Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v", + batchLag, testArg.podBatchStartupLimit) + } + + // calculate and log throughput + throughputBatch := float64(testArg.podsNr) / batchLag.Minutes() + framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch) + throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes() + framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential) +} + +func verifyResource(f *framework.Framework, testArg DensityTest, rc *ResourceCollector) { + nodeName := framework.TestContext.NodeName + + // verify and log memory + usagePerContainer, err := rc.GetLatest() + Expect(err).NotTo(HaveOccurred()) + framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) + + usagePerNode := make(framework.ResourceUsagePerNode) + usagePerNode[nodeName] = usagePerContainer + + memPerfData := framework.ResourceUsageToPerfData(usagePerNode) + framework.PrintPerfData(memPerfData) + + verifyMemoryLimits(f.Client, testArg.memLimits, usagePerNode) + + // verify and log cpu + cpuSummary := rc.GetCPUSummary() + framework.Logf("%s", formatCPUSummary(cpuSummary)) + + cpuSummaryPerNode := make(framework.NodesCPUSummary) + cpuSummaryPerNode[nodeName] = cpuSummary + + cpuPerfData := framework.CPUUsageToPerfData(cpuSummaryPerNode) + framework.PrintPerfData(cpuPerfData) + + verifyCPULimits(testArg.cpuLimits, cpuSummaryPerNode) +} + +func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) { + batchStartTime := unversioned.Now() + e2eLags := make([]framework.PodLatencyData, 0) + for _, pod := range pods { + create := unversioned.Now() + f.PodClient().CreateSync(pod) + e2eLags = append(e2eLags, + framework.PodLatencyData{Name: pod.ObjectMeta.Name, Latency: unversioned.Now().Time.Sub(create.Time)}) + } + batchLag := unversioned.Now().Time.Sub(batchStartTime.Time) + sort.Sort(framework.LatencySlice(e2eLags)) + return batchLag, e2eLags +} diff --git a/test/e2e_node/resource_controller.go b/test/e2e_node/resource_controller.go new file mode 100644 index 00000000000..9fb4d7f75eb --- /dev/null +++ b/test/e2e_node/resource_controller.go @@ -0,0 +1,514 @@ +// +build linux + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "sort" + "strconv" + "strings" + "sync" + "text/tabwriter" + "time" + + cadvisorclient "github.com/google/cadvisor/client/v2" + cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "github.com/opencontainers/runc/libcontainer/cgroups" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" + "k8s.io/kubernetes/pkg/labels" + "k8s.io/kubernetes/pkg/util/runtime" + "k8s.io/kubernetes/pkg/util/uuid" + "k8s.io/kubernetes/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/gomega" +) + +const ( + // resource monitoring + cadvisorImageName = "google/cadvisor:latest" + cadvisorPodName = "cadvisor" + cadvisorPort = 8090 + // housekeeping interval of Cadvisor (second) + houseKeepingInterval = 1 +) + +var ( + systemContainers map[string]string +) + +type ResourceCollector struct { + client *cadvisorclient.Client + request *cadvisorapiv2.RequestOptions + + pollingInterval time.Duration + buffers map[string][]*framework.ContainerResourceUsage + lock sync.RWMutex + stopCh chan struct{} +} + +func NewResourceCollector(interval time.Duration) *ResourceCollector { + buffers := make(map[string][]*framework.ContainerResourceUsage) + return &ResourceCollector{ + pollingInterval: interval, + buffers: buffers, + } +} + +func (r *ResourceCollector) Start() { + // Get the cgroup containers for kubelet and docker + kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "") + dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile) + if err == nil { + systemContainers = map[string]string{ + stats.SystemContainerKubelet: kubeletContainer, + stats.SystemContainerRuntime: dockerContainer, + } + } else { + framework.Failf("Failed to get docker container name in test-e2e-node resource collector.") + } + + wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) { + var err error + r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort)) + if err == nil { + return true, nil + } + return false, err + }) + + Expect(r.client).NotTo(BeNil(), "cadvisor client not ready") + + r.request = &cadvisorapiv2.RequestOptions{IdType: "name", Count: 1, Recursive: false} + r.stopCh = make(chan struct{}) + + oldStatsMap := make(map[string]*cadvisorapiv2.ContainerStats) + go wait.Until(func() { r.collectStats(oldStatsMap) }, r.pollingInterval, r.stopCh) +} + +func (r *ResourceCollector) Stop() { + close(r.stopCh) +} + +func (r *ResourceCollector) Reset() { + r.lock.Lock() + defer r.lock.Unlock() + for _, name := range systemContainers { + r.buffers[name] = []*framework.ContainerResourceUsage{} + } +} + +func (r *ResourceCollector) GetCPUSummary() framework.ContainersCPUSummary { + result := make(framework.ContainersCPUSummary) + for key, name := range systemContainers { + data := r.GetBasicCPUStats(name) + result[key] = data + } + return result +} + +func (r *ResourceCollector) LogLatest() { + summary, err := r.GetLatest() + if err != nil { + framework.Logf("%v", err) + } + framework.Logf("%s", formatResourceUsageStats(summary)) +} + +func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.ContainerStats) { + for _, name := range systemContainers { + ret, err := r.client.Stats(name, r.request) + if err != nil { + framework.Logf("Error getting container stats, err: %v", err) + return + } + cStats, ok := ret[name] + if !ok { + framework.Logf("Missing info/stats for container %q", name) + return + } + + newStats := cStats.Stats[0] + + if oldStats, ok := oldStatsMap[name]; ok && oldStats.Timestamp.Before(newStats.Timestamp) { + if oldStats.Timestamp.Equal(newStats.Timestamp) { + continue + } + r.buffers[name] = append(r.buffers[name], computeContainerResourceUsage(name, oldStats, newStats)) + } + oldStatsMap[name] = newStats + } +} + +func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv2.ContainerStats) *framework.ContainerResourceUsage { + return &framework.ContainerResourceUsage{ + Name: name, + Timestamp: newStats.Timestamp, + CPUUsageInCores: float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()), + MemoryUsageInBytes: newStats.Memory.Usage, + MemoryWorkingSetInBytes: newStats.Memory.WorkingSet, + MemoryRSSInBytes: newStats.Memory.RSS, + CPUInterval: newStats.Timestamp.Sub(oldStats.Timestamp), + } +} + +func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, error) { + r.lock.RLock() + defer r.lock.RUnlock() + stats := make(framework.ResourceUsagePerContainer) + for key, name := range systemContainers { + contStats, ok := r.buffers[name] + if !ok || len(contStats) == 0 { + return nil, fmt.Errorf("Resource usage of %s:%s is not ready yet", key, name) + } + stats[key] = contStats[len(contStats)-1] + } + return stats, nil +} + +type resourceUsageByCPU []*framework.ContainerResourceUsage + +func (r resourceUsageByCPU) Len() int { return len(r) } +func (r resourceUsageByCPU) Swap(i, j int) { r[i], r[j] = r[j], r[i] } +func (r resourceUsageByCPU) Less(i, j int) bool { return r[i].CPUUsageInCores < r[j].CPUUsageInCores } + +// The percentiles to report. +var percentiles = [...]float64{0.05, 0.20, 0.50, 0.70, 0.90, 0.95, 0.99} + +// GetBasicCPUStats returns the percentiles the cpu usage in cores for +// containerName. This method examines all data currently in the buffer. +func (r *ResourceCollector) GetBasicCPUStats(containerName string) map[float64]float64 { + r.lock.RLock() + defer r.lock.RUnlock() + result := make(map[float64]float64, len(percentiles)) + usages := r.buffers[containerName] + sort.Sort(resourceUsageByCPU(usages)) + for _, q := range percentiles { + index := int(float64(len(usages))*q) - 1 + if index < 0 { + // We don't have enough data. + result[q] = 0 + continue + } + result[q] = usages[index].CPUUsageInCores + } + return result +} + +func formatResourceUsageStats(containerStats framework.ResourceUsagePerContainer) string { + // Example output: + // + // Resource usage for node "e2e-test-foo-minion-abcde": + // container cpu(cores) memory(MB) + // "/" 0.363 2942.09 + // "/docker-daemon" 0.088 521.80 + // "/kubelet" 0.086 424.37 + // "/system" 0.007 119.88 + buf := &bytes.Buffer{} + w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) + fmt.Fprintf(w, "container\tcpu(cores)\tmemory_working_set(MB)\tmemory_rss(MB)\n") + for name, s := range containerStats { + fmt.Fprintf(w, "%q\t%.3f\t%.2f\t%.2f\n", name, s.CPUUsageInCores, float64(s.MemoryWorkingSetInBytes)/(1024*1024), float64(s.MemoryRSSInBytes)/(1024*1024)) + } + w.Flush() + return fmt.Sprintf("Resource usage:\n%s", buf.String()) +} + +func formatCPUSummary(summary framework.ContainersCPUSummary) string { + // Example output for a node (the percentiles may differ): + // CPU usage of containers on node "e2e-test-foo-minion-0vj7": + // container 5th% 50th% 90th% 95th% + // "/" 0.051 0.159 0.387 0.455 + // "/runtime 0.000 0.000 0.146 0.166 + // "/kubelet" 0.036 0.053 0.091 0.154 + // "/misc" 0.001 0.001 0.001 0.002 + var summaryStrings []string + var header []string + header = append(header, "container") + for _, p := range percentiles { + header = append(header, fmt.Sprintf("%.0fth%%", p*100)) + } + + buf := &bytes.Buffer{} + w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) + fmt.Fprintf(w, "%s\n", strings.Join(header, "\t")) + + for _, containerName := range framework.TargetContainers() { + var s []string + s = append(s, fmt.Sprintf("%q", containerName)) + data, ok := summary[containerName] + for _, p := range percentiles { + value := "N/A" + if ok { + value = fmt.Sprintf("%.3f", data[p]) + } + s = append(s, value) + } + fmt.Fprintf(w, "%s\n", strings.Join(s, "\t")) + } + w.Flush() + summaryStrings = append(summaryStrings, fmt.Sprintf("CPU usage of containers:\n%s", buf.String())) + + return strings.Join(summaryStrings, "\n") +} + +func createCadvisorPod(f *framework.Framework) { + f.PodClient().CreateSync(&api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: cadvisorPodName, + }, + Spec: api.PodSpec{ + // It uses a host port for the tests to collect data. + // Currently we can not use port mapping in test-e2e-node. + SecurityContext: &api.PodSecurityContext{ + HostNetwork: true, + }, + Containers: []api.Container{ + { + Image: cadvisorImageName, + Name: cadvisorPodName, + Ports: []api.ContainerPort{ + { + Name: "http", + HostPort: cadvisorPort, + ContainerPort: cadvisorPort, + Protocol: api.ProtocolTCP, + }, + }, + VolumeMounts: []api.VolumeMount{ + { + Name: "sys", + ReadOnly: true, + MountPath: "/sys", + }, + { + Name: "var-run", + ReadOnly: false, + MountPath: "/var/run", + }, + { + Name: "docker", + ReadOnly: true, + MountPath: "/var/lib/docker/", + }, + { + Name: "rootfs", + ReadOnly: true, + MountPath: "/rootfs", + }, + }, + Args: []string{ + "--profiling", + fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval), + fmt.Sprintf("--port=%d", cadvisorPort), + }, + }, + }, + Volumes: []api.Volume{ + { + Name: "rootfs", + VolumeSource: api.VolumeSource{HostPath: &api.HostPathVolumeSource{Path: "/"}}, + }, + { + Name: "var-run", + VolumeSource: api.VolumeSource{HostPath: &api.HostPathVolumeSource{Path: "/var/run"}}, + }, + { + Name: "sys", + VolumeSource: api.VolumeSource{HostPath: &api.HostPathVolumeSource{Path: "/sys"}}, + }, + { + Name: "docker", + VolumeSource: api.VolumeSource{HostPath: &api.HostPathVolumeSource{Path: "/var/lib/docker"}}, + }, + }, + }, + }) +} + +func deleteBatchPod(f *framework.Framework, pods []*api.Pod) { + ns := f.Namespace.Name + var wg sync.WaitGroup + for _, pod := range pods { + wg.Add(1) + go func(pod *api.Pod) { + defer wg.Done() + + err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(30)) + Expect(err).NotTo(HaveOccurred()) + + Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(), + 30*time.Second, 10*time.Minute)). + NotTo(HaveOccurred()) + }(pod) + } + wg.Wait() + return +} + +func newTestPods(numPods int, imageName, podType string) []*api.Pod { + var pods []*api.Pod + for i := 0; i < numPods; i++ { + podName := "test-" + string(uuid.NewUUID()) + labels := map[string]string{ + "type": podType, + "name": podName, + } + pods = append(pods, + &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: podName, + Labels: labels, + }, + Spec: api.PodSpec{ + // ToDo: restart policy is always + // check whether pods restart at the end of tests + Containers: []api.Container{ + { + Image: imageName, + Name: podName, + }, + }, + }, + }) + } + return pods +} + +// code for getting container name of docker +const ( + kubeletProcessName = "kubelet" + dockerProcessName = "docker" + dockerPidFile = "/var/run/docker.pid" + containerdProcessName = "docker-containerd" + containerdPidFile = "/run/docker/libcontainerd/docker-containerd.pid" +) + +func getContainerNameForProcess(name, pidFile string) (string, error) { + pids, err := getPidsForProcess(name, pidFile) + if err != nil { + return "", fmt.Errorf("failed to detect process id for %q - %v", name, err) + } + if len(pids) == 0 { + return "", nil + } + cont, err := getContainer(pids[0]) + if err != nil { + return "", err + } + return cont, nil +} + +func getPidFromPidFile(pidFile string) (int, error) { + file, err := os.Open(pidFile) + if err != nil { + return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err) + } + defer file.Close() + + data, err := ioutil.ReadAll(file) + if err != nil { + return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err) + } + + pid, err := strconv.Atoi(string(data)) + if err != nil { + return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err) + } + + return pid, nil +} + +func getPidsForProcess(name, pidFile string) ([]int, error) { + if len(pidFile) > 0 { + if pid, err := getPidFromPidFile(pidFile); err == nil { + return []int{pid}, nil + } else { + // log the error and fall back to pidof + runtime.HandleError(err) + } + } + + out, err := exec.Command("pidof", name).Output() + if err != nil { + return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err) + } + + // The output of pidof is a list of pids. + pids := []int{} + for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") { + pid, err := strconv.Atoi(pidStr) + if err != nil { + continue + } + pids = append(pids, pid) + } + return pids, nil +} + +// getContainer returns the cgroup associated with the specified pid. +// It enforces a unified hierarchy for memory and cpu cgroups. +// On systemd environments, it uses the name=systemd cgroup for the specified pid. +func getContainer(pid int) (string, error) { + cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + if err != nil { + return "", err + } + + cpu, found := cgs["cpu"] + if !found { + return "", cgroups.NewNotFoundError("cpu") + } + memory, found := cgs["memory"] + if !found { + return "", cgroups.NewNotFoundError("memory") + } + + // since we use this container for accounting, we need to ensure its a unified hierarchy. + if cpu != memory { + return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified. cpu: %s, memory: %s", cpu, memory) + } + + // on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls) + // cpu and memory accounting is off by default, users may choose to enable it per unit or globally. + // users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true). + // users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true + // we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal. + // for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory + // cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers. + // as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet. + // in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally). + if systemd, found := cgs["name=systemd"]; found { + if systemd != cpu { + log.Printf("CPUAccounting not enabled for pid: %d", pid) + } + if systemd != memory { + log.Printf("MemoryAccounting not enabled for pid: %d", pid) + } + return systemd, nil + } + + return cpu, nil +} diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go new file mode 100644 index 00000000000..f95f7cb0851 --- /dev/null +++ b/test/e2e_node/resource_usage_test.go @@ -0,0 +1,247 @@ +// +build linux + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "fmt" + "strings" + "time" + + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { + const ( + // Interval to poll /stats/container on a node + containerStatsPollingPeriod = 10 * time.Second + // The monitoring time for one test. + monitoringTime = 10 * time.Minute + // The periodic reporting period. + reportingPeriod = 5 * time.Minute + + sleepAfterCreatePods = 10 * time.Second + ) + + var ( + ns string + rc *ResourceCollector + om *framework.RuntimeOperationMonitor + ) + + f := framework.NewDefaultFramework("resource-usage") + + BeforeEach(func() { + ns = f.Namespace.Name + om = framework.NewRuntimeOperationMonitor(f.Client) + }) + + AfterEach(func() { + result := om.GetLatestRuntimeOperationErrorRate() + framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result)) + }) + + // This test measures and verifies the steady resource usage of node is within limit + // It collects data from a standalone Cadvisor with housekeeping interval 1s. + // It verifies CPU percentiles and the lastest memory usage. + Context("regular resource usage tracking", func() { + rTests := []resourceTest{ + { + podsPerNode: 10, + cpuLimits: framework.ContainersCPUSummary{ + stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30}, + stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40}, + }, + // We set the memory limits generously because the distribution + // of the addon pods affect the memory usage on each node. + memLimits: framework.ResourceUsagePerContainer{ + stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024}, + stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024}, + }, + }, + } + + for _, testArg := range rTests { + itArg := testArg + + podsPerNode := itArg.podsPerNode + name := fmt.Sprintf("resource tracking for %d pods per node", podsPerNode) + + It(name, func() { + expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits + + // The test collects resource usage from a standalone Cadvisor pod. + // The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to + // show the resource usage spikes. But changing its interval increases the overhead + // of kubelet. Hence we use a Cadvisor pod. + createCadvisorPod(f) + rc = NewResourceCollector(containerStatsPollingPeriod) + rc.Start() + + By("Creating a batch of Pods") + pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod") + for _, pod := range pods { + f.PodClient().CreateSync(pod) + } + + // wait for a while to let the node be steady + time.Sleep(sleepAfterCreatePods) + + // Log once and flush the stats. + rc.LogLatest() + rc.Reset() + + By("Start monitoring resource usage") + // Periodically dump the cpu summary until the deadline is met. + // Note that without calling framework.ResourceMonitor.Reset(), the stats + // would occupy increasingly more memory. This should be fine + // for the current test duration, but we should reclaim the + // entries if we plan to monitor longer (e.g., 8 hours). + deadline := time.Now().Add(monitoringTime) + for time.Now().Before(deadline) { + timeLeft := deadline.Sub(time.Now()) + framework.Logf("Still running...%v left", timeLeft) + if timeLeft < reportingPeriod { + time.Sleep(timeLeft) + } else { + time.Sleep(reportingPeriod) + } + logPodsOnNode(f.Client) + } + + rc.Stop() + + By("Reporting overall resource usage") + logPodsOnNode(f.Client) + + usagePerContainer, err := rc.GetLatest() + Expect(err).NotTo(HaveOccurred()) + + // TODO(random-liu): Remove the original log when we migrate to new perfdash + nodeName := framework.TestContext.NodeName + framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) + + // Log perf result + usagePerNode := make(framework.ResourceUsagePerNode) + usagePerNode[nodeName] = usagePerContainer + + framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode)) + verifyMemoryLimits(f.Client, expectedMemory, usagePerNode) + + cpuSummary := rc.GetCPUSummary() + framework.Logf("%s", formatCPUSummary(cpuSummary)) + + // Log perf result + cpuSummaryPerNode := make(framework.NodesCPUSummary) + cpuSummaryPerNode[nodeName] = cpuSummary + framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode)) + verifyCPULimits(expectedCPU, cpuSummaryPerNode) + }) + } + }) +}) + +type resourceTest struct { + podsPerNode int + cpuLimits framework.ContainersCPUSummary + memLimits framework.ResourceUsagePerContainer +} + +func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) { + if expected == nil { + return + } + var errList []string + for nodeName, nodeSummary := range actual { + var nodeErrs []string + for cName, expectedResult := range expected { + container, ok := nodeSummary[cName] + if !ok { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName)) + continue + } + + expectedValue := expectedResult.MemoryRSSInBytes + actualValue := container.MemoryRSSInBytes + if expectedValue != 0 && actualValue > expectedValue { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected RSS memory (MB) < %d; got %d", + cName, expectedValue, actualValue)) + } + } + if len(nodeErrs) > 0 { + errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", "))) + heapStats, err := framework.GetKubeletHeapStats(c, nodeName) + if err != nil { + framework.Logf("Unable to get heap stats from %q", nodeName) + } else { + framework.Logf("Heap stats on %q\n:%v", nodeName, heapStats) + } + } + } + if len(errList) > 0 { + framework.Failf("Memory usage exceeding limits:\n %s", strings.Join(errList, "\n")) + } +} + +func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.NodesCPUSummary) { + if expected == nil { + return + } + var errList []string + for nodeName, perNodeSummary := range actual { + var nodeErrs []string + for cName, expectedResult := range expected { + perContainerSummary, ok := perNodeSummary[cName] + if !ok { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName)) + continue + } + for p, expectedValue := range expectedResult { + actualValue, ok := perContainerSummary[p] + if !ok { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing percentile %v", cName, p)) + continue + } + if actualValue > expectedValue { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected %.0fth%% usage < %.3f; got %.3f", + cName, p*100, expectedValue, actualValue)) + } + } + } + if len(nodeErrs) > 0 { + errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", "))) + } + } + if len(errList) > 0 { + framework.Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n")) + } +} + +func logPodsOnNode(c *client.Client) { + nodeName := framework.TestContext.NodeName + podList, err := framework.GetKubeletRunningPods(c, nodeName) + if err != nil { + framework.Logf("Unable to retrieve kubelet pods for node %v", nodeName) + } + framework.Logf("%d pods are running on node %v", len(podList.Items), nodeName) +} diff --git a/vendor/github.com/google/cadvisor/client/v2/README.md b/vendor/github.com/google/cadvisor/client/v2/README.md new file mode 100644 index 00000000000..64cc30d5ce6 --- /dev/null +++ b/vendor/github.com/google/cadvisor/client/v2/README.md @@ -0,0 +1,69 @@ +# Example REST API Client + +This is an implementation of a cAdvisor REST API in Go. You can use it like this: + +```go +client, err := client.NewClient("http://192.168.59.103:8080/") +``` + +Obviously, replace the URL with the path to your actual cAdvisor REST endpoint. + + +### MachineInfo + +```go +client.MachineInfo() +``` + +There is no v2 MachineInfo API, so the v2 client exposes the [v1 MachineInfo](../../info/v1/machine.go#L131) + +``` +(*v1.MachineInfo)(0xc208022b10)({ + NumCores: (int) 4, + MemoryCapacity: (int64) 2106028032, + Filesystems: ([]v1.FsInfo) (len=1 cap=4) { + (v1.FsInfo) { + Device: (string) (len=9) "/dev/sda1", + Capacity: (uint64) 19507089408 + } + } +}) +``` + +You can see the full specification of the [MachineInfo struct in the source](../../info/v1/machine.go#L131) + +### VersionInfo + +```go +client.VersionInfo() +``` + +This method returns the cAdvisor version. + +### Attributes + +```go +client.Attributes() +``` + +This method returns a [cadvisor/info/v2/Attributes](../../info/v2/machine.go#L24) struct with all the fields filled in. Attributes includes hardware attributes (as returned by MachineInfo) as well as software attributes (eg. software versions). Here is an example return value: + +``` +(*v2.Attributes)({ + KernelVersion: (string) (len=17) "3.13.0-44-generic" + ContainerOsVersion: (string) (len=18) "Ubuntu 14.04.1 LTS" + DockerVersion: (string) (len=9) "1.5.0-rc4" + CadvisorVersion: (string) (len=6) "0.10.1" + NumCores: (int) 4, + MemoryCapacity: (int64) 2106028032, + Filesystems: ([]v2.FsInfo) (len=1 cap=4) { + (v2.FsInfo) { + Device: (string) (len=9) "/dev/sda1", + Capacity: (uint64) 19507089408 + } + } +}) +``` + +You can see the full specification of the [Attributes struct in the source](../../info/v2/machine.go#L24) + diff --git a/vendor/github.com/google/cadvisor/client/v2/client.go b/vendor/github.com/google/cadvisor/client/v2/client.go new file mode 100644 index 00000000000..716517c3381 --- /dev/null +++ b/vendor/github.com/google/cadvisor/client/v2/client.go @@ -0,0 +1,176 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Client library to programmatically access cAdvisor API. +package v2 + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "net/url" + "path" + "strconv" + "strings" + + v1 "github.com/google/cadvisor/info/v1" + "github.com/google/cadvisor/info/v2" +) + +// Client represents the base URL for a cAdvisor client. +type Client struct { + baseUrl string +} + +// NewClient returns a new client with the specified base URL. +func NewClient(url string) (*Client, error) { + if !strings.HasSuffix(url, "/") { + url += "/" + } + + return &Client{ + baseUrl: fmt.Sprintf("%sapi/v2.1/", url), + }, nil +} + +// MachineInfo returns the JSON machine information for this client. +// A non-nil error result indicates a problem with obtaining +// the JSON machine information data. +func (self *Client) MachineInfo() (minfo *v1.MachineInfo, err error) { + u := self.machineInfoUrl() + ret := new(v1.MachineInfo) + if err = self.httpGetJsonData(ret, nil, u, "machine info"); err != nil { + return + } + minfo = ret + return +} + +// MachineStats returns the JSON machine statistics for this client. +// A non-nil error result indicates a problem with obtaining +// the JSON machine information data. +func (self *Client) MachineStats() ([]v2.MachineStats, error) { + var ret []v2.MachineStats + u := self.machineStatsUrl() + err := self.httpGetJsonData(&ret, nil, u, "machine stats") + return ret, err +} + +// VersionInfo returns the version info for cAdvisor. +func (self *Client) VersionInfo() (version string, err error) { + u := self.versionInfoUrl() + version, err = self.httpGetString(u, "version info") + return +} + +// Attributes returns hardware and software attributes of the machine. +func (self *Client) Attributes() (attr *v2.Attributes, err error) { + u := self.attributesUrl() + ret := new(v2.Attributes) + if err = self.httpGetJsonData(ret, nil, u, "attributes"); err != nil { + return + } + attr = ret + return +} + +// Stats returns stats for the requested container. +func (self *Client) Stats(name string, request *v2.RequestOptions) (map[string]v2.ContainerInfo, error) { + u := self.statsUrl(name) + ret := make(map[string]v2.ContainerInfo) + data := url.Values{ + "type": []string{request.IdType}, + "count": []string{strconv.Itoa(request.Count)}, + "recursive": []string{strconv.FormatBool(request.Recursive)}, + } + + u = fmt.Sprintf("%s?%s", u, data.Encode()) + if err := self.httpGetJsonData(&ret, nil, u, "stats"); err != nil { + return nil, err + } + return ret, nil +} + +func (self *Client) machineInfoUrl() string { + return self.baseUrl + path.Join("machine") +} + +func (self *Client) machineStatsUrl() string { + return self.baseUrl + path.Join("machinestats") +} + +func (self *Client) versionInfoUrl() string { + return self.baseUrl + path.Join("version") +} + +func (self *Client) attributesUrl() string { + return self.baseUrl + path.Join("attributes") +} + +func (self *Client) statsUrl(name string) string { + return self.baseUrl + path.Join("stats", name) +} + +func (self *Client) httpGetResponse(postData interface{}, urlPath, infoName string) ([]byte, error) { + var resp *http.Response + var err error + + if postData != nil { + data, marshalErr := json.Marshal(postData) + if marshalErr != nil { + return nil, fmt.Errorf("unable to marshal data: %v", marshalErr) + } + resp, err = http.Post(urlPath, "application/json", bytes.NewBuffer(data)) + } else { + resp, err = http.Get(urlPath) + } + if err != nil { + return nil, fmt.Errorf("unable to post %q to %q: %v", infoName, urlPath, err) + } + if resp == nil { + return nil, fmt.Errorf("received empty response for %q from %q", infoName, urlPath) + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + err = fmt.Errorf("unable to read all %q from %q: %v", infoName, urlPath, err) + return nil, err + } + if resp.StatusCode != 200 { + return nil, fmt.Errorf("request %q failed with error: %q", urlPath, strings.TrimSpace(string(body))) + } + return body, nil +} + +func (self *Client) httpGetString(url, infoName string) (string, error) { + body, err := self.httpGetResponse(nil, url, infoName) + if err != nil { + return "", err + } + return string(body), nil +} + +func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName string) error { + body, err := self.httpGetResponse(postData, url, infoName) + if err != nil { + return err + } + if err = json.Unmarshal(body, data); err != nil { + err = fmt.Errorf("unable to unmarshal %q (Body: %q) from %q with error: %v", infoName, string(body), url, err) + return err + } + return nil +}