modify resource_collector.go to get container names of kubelet and docker dynamically

2025-07-22 11:21:47 +00:00 · 2016-07-29 14:25:49 -07:00 · 2016-07-29 14:25:49 -07:00 · f3f3e965cc
commit f3f3e965cc
parent 32e1db16c5
4 changed files with 217 additions and 173 deletions
--- a/test/e2e_node/density_test.go
+++ b/test/e2e_node/density_test.go
@ -1,9 +1,12 @@
 /*
-Copyright 2016 The Kubernetes Authors.
+Copyright 2015 The Kubernetes Authors.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
    http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -42,15 +45,13 @@ const (
 	kubeletAddr = "localhost:10255"
 )

-var _ = framework.KubeDescribe("Density", func() {
+var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
 	const (
 		// the data collection time of `resource collector' and the standalone cadvisor
 		// is not synchronizated. Therefore `resource collector' may miss data or
 		// collect duplicated data
 		monitoringInterval    = 500 * time.Millisecond
-		sleepBeforeEach       = 30 * time.Second
 		sleepBeforeCreatePods = 30 * time.Second
-		sleepAfterDeletePods  = 60 * time.Second
 	)

 	var (
@ -67,7 +68,6 @@ var _ = framework.KubeDescribe("Density", func() {
 	})

 	AfterEach(func() {
-		time.Sleep(sleepAfterDeletePods)
 	})

 	Context("create a batch of pods", func() {
@ -76,41 +76,21 @@ var _ = framework.KubeDescribe("Density", func() {
 				podsNr:   10,
 				interval: 0 * time.Millisecond,
 				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.20},
-					stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.50},
+					stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.30},
+					stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
 				},
 				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 250 * 1024 * 1024},
+					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
+					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 				},
 				// percentile limit of single pod startup latency
 				podStartupLimits: framework.LatencyMetric{
-					Perc50: 7 * time.Second,
-					Perc90: 10 * time.Second,
-					Perc99: 15 * time.Second,
+					Perc50: 10 * time.Second,
+					Perc90: 15 * time.Second,
+					Perc99: 20 * time.Second,
 				},
 				// upbound of startup latency of a batch of pods
-				podBatchStartupLimit: 20 * time.Second,
-			},
-			{
-				podsNr:   30,
-				interval: 0 * time.Millisecond,
-				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.35},
-					stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.70},
-				},
-				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
-				},
-				// percentile limit of single pod startup latency
-				podStartupLimits: framework.LatencyMetric{
-					Perc50: 30 * time.Second,
-					Perc90: 35 * time.Second,
-					Perc99: 40 * time.Second,
-				},
-				// upbound of startup latency of a batch of pods
-				podBatchStartupLimit: 90 * time.Second,
+				podBatchStartupLimit: 25 * time.Second,
 			},
 		}

@ -139,7 +119,7 @@ var _ = framework.KubeDescribe("Density", func() {
 				controller := newInformerWatchPod(f, mutex, watchTimes, podType)
 				go controller.Run(stopCh)

-				// Zhou: In test we see kubelet starts while it is busy on sth, as a result `syncLoop'
+				// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop'
 				// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
 				// The node status has been `ready' so `wait and check node being ready' does not help here.
 				// Now wait here for a grace period to have `syncLoop' be ready
@ -153,14 +133,14 @@ var _ = framework.KubeDescribe("Density", func() {
 				// it returns a map[`pod name']`creation time' as the creation timestamps
 				createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)

-				By("Waiting for all Pods begin observed by the watch...")
-				// checks every 10s util all pods are running. it timeouts ater 10min
+				By("Waiting for all Pods to be observed by the watch...")
+				// checks every 10s util all pods are running. it times out ater 10min
 				Eventually(func() bool {
 					return len(watchTimes) == itArg.podsNr
 				}, 10*time.Minute, 10*time.Second).Should(BeTrue())

 				if len(watchTimes) < itArg.podsNr {
-					framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
+					framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
 				}

 				// stop the watching controller, and the resource collector
@ -204,18 +184,6 @@ var _ = framework.KubeDescribe("Density", func() {
 				// verify resource
 				By("Verifying resource")
 				verifyResource(f, testArg, rm)
-
-				// delete pods
-				By("Deleting a batch of pods")
-				deleteBatchPod(f, pods)
-
-				// tear down cadvisor
-				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
-					NotTo(HaveOccurred())
-
-				Eventually(func() error {
-					return checkPodDeleted(f, cadvisorPodName)
-				}, 10*time.Minute, time.Second*3).Should(BeNil())
 			})
 		}
 	})
@ -226,34 +194,17 @@ var _ = framework.KubeDescribe("Density", func() {
 				podsNr:   10,
 				bgPodsNr: 10,
 				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.12},
-					stats.SystemContainerRuntime: {0.50: 0.16, 0.95: 0.20},
+					stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
+					stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
 				},
 				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
+					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
+					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 				},
 				podStartupLimits: framework.LatencyMetric{
-					Perc50: 1500 * time.Millisecond,
-					Perc90: 2500 * time.Millisecond,
-					Perc99: 3500 * time.Millisecond,
-				},
-			},
-			{
-				podsNr:   10,
-				bgPodsNr: 30,
-				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.15},
-					stats.SystemContainerRuntime: {0.50: 0.22, 0.95: 0.27},
-				},
-				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
-				},
-				podStartupLimits: framework.LatencyMetric{
-					Perc50: 1500 * time.Millisecond,
-					Perc90: 2500 * time.Millisecond,
-					Perc99: 3500 * time.Millisecond,
+					Perc50: 3000 * time.Millisecond,
+					Perc90: 4000 * time.Millisecond,
+					Perc99: 5000 * time.Millisecond,
 				},
 			},
 		}
@ -273,7 +224,7 @@ var _ = framework.KubeDescribe("Density", func() {
 				// all pods are running when it returns
 				f.PodClient().CreateBatch(bgPods)

-				//time.Sleep(sleepBeforeCreatePods)
+				time.Sleep(sleepBeforeCreatePods)

 				// starting resource monitoring
 				rm.Start()
@ -290,18 +241,6 @@ var _ = framework.KubeDescribe("Density", func() {
 				// verify resource
 				By("Verifying resource")
 				verifyResource(f, testArg, rm)
-
-				// delete pods
-				By("Deleting a batch of pods")
-				deleteBatchPod(f, append(bgPods, testPods...))
-
-				// tear down cadvisor
-				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
-					NotTo(HaveOccurred())
-
-				Eventually(func() error {
-					return checkPodDeleted(f, cadvisorPodName)
-				}, 10*time.Minute, time.Second*3).Should(BeNil())
 			})
 		}
 	})
@ -309,7 +248,8 @@ var _ = framework.KubeDescribe("Density", func() {

 type DensityTest struct {
 	// number of pods
-	podsNr   int
+	podsNr int
+	// number of background pods
 	bgPodsNr int
 	// interval between creating pod (rate control)
 	interval time.Duration
--- a/test/e2e_node/e2e_service.go
+++ b/test/e2e_node/e2e_service.go
@ -239,6 +239,10 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
 		}
 	} else {
 		cmdArgs = append(cmdArgs, getKubeletServerBin())
+		cmdArgs = append(cmdArgs,
+			"--kubelet-cgroups=/kubelet",
+			"--runtime-cgroups=/docker-daemon",
+		)
 	}
 	cmdArgs = append(cmdArgs,
 		"--api-servers", "http://127.0.0.1:8080",
@ -252,9 +256,6 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
 		"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
 		"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
 		"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
-		"--cgroup-root=/",
-		"--runtime-cgroups=/docker-daemon",
-		"--kubelet-cgroups=/kubelet",
 	)
 	if es.cgroupsPerQOS {
 		cmdArgs = append(cmdArgs,
--- a/test/e2e_node/resource_controller.go
+++ b/test/e2e_node/resource_controller.go
@ -1,13 +1,16 @@
 /*
-Copyright 2016 The Kubernetes Authors.
+Copyright 2015 The Kubernetes Authors.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
    http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing perissions and
+See the License for the specific language governing permissions and
 limitations under the License.
 */

@ -16,7 +19,12 @@ package e2e_node
 import (
 	"bytes"
 	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
 	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"text/tabwriter"
@ -24,10 +32,12 @@ import (

 	cadvisorclient "github.com/google/cadvisor/client/v2"
 	cadvisorapiv2 "github.com/google/cadvisor/info/v2"
+	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"k8s.io/kubernetes/pkg/api"
 	"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
 	"k8s.io/kubernetes/pkg/labels"
 	"k8s.io/kubernetes/pkg/util"
+	"k8s.io/kubernetes/pkg/util/runtime"
 	"k8s.io/kubernetes/pkg/util/wait"
 	"k8s.io/kubernetes/test/e2e/framework"

@ -39,15 +49,12 @@ const (
 	cadvisorImageName = "google/cadvisor:latest"
 	cadvisorPodName   = "cadvisor"
 	cadvisorPort      = 8090
+	// housekeeping interval of Cadvisor (second)
+	houseKeepingInterval = 1
 )

 var (
-	systemContainers = map[string]string{
-		//"root": "/",
-		//stats.SystemContainerMisc: "misc"
-		stats.SystemContainerKubelet: "kubelet",
-		stats.SystemContainerRuntime: "docker-daemon",
-	}
+	systemContainers map[string]string
 )

 type ResourceCollector struct {
@ -69,6 +76,18 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector {
 }

 func (r *ResourceCollector) Start() {
+	// Get the cgroup containers for kubelet and docker
+	kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "")
+	dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
+	if err == nil {
+		systemContainers = map[string]string{
+			stats.SystemContainerKubelet: kubeletContainer,
+			stats.SystemContainerRuntime: dockerContainer,
+		}
+	} else {
+		framework.Failf("Failed to get docker container name in test-e2e-node resource collector.")
+	}
+
 	wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
 		var err error
 		r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
@ -123,7 +142,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C
 			framework.Logf("Error getting container stats, err: %v", err)
 			return
 		}
-		cStats, ok := ret["/"+name]
+		cStats, ok := ret[name]
 		if !ok {
 			framework.Logf("Missing info/stats for container %q", name)
 			return
@ -160,7 +179,7 @@ func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, er
 	for key, name := range systemContainers {
 		contStats, ok := r.buffers[name]
 		if !ok || len(contStats) == 0 {
-			return nil, fmt.Errorf("Resource usage is not ready yet")
+			return nil, fmt.Errorf("Resource usage of %s:%s is not ready yet", key, name)
 		}
 		stats[key] = contStats[len(contStats)-1]
 	}
@ -257,11 +276,10 @@ func createCadvisorPod(f *framework.Framework) {
 	f.PodClient().CreateSync(&api.Pod{
 		ObjectMeta: api.ObjectMeta{
 			Name: cadvisorPodName,
-			//Labels: map[string]string{"type": cadvisorPodType, "name": cadvisorPodName},
 		},
 		Spec: api.PodSpec{
-			// Don't restart the Pod since it is expected to exit
-			RestartPolicy: api.RestartPolicyNever,
+			// It uses a host port for the tests to collect data.
+			// Currently we can not use port mapping in test-e2e-node.
 			SecurityContext: &api.PodSecurityContext{
 				HostNetwork: true,
 			},
@ -301,7 +319,7 @@ func createCadvisorPod(f *framework.Framework) {
 					},
 					Args: []string{
 						"--profiling",
-						"--housekeeping_interval=1s",
+						fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval),
 						fmt.Sprintf("--port=%d", cadvisorPort),
 					},
 				},
@ -336,7 +354,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
 		go func(pod *api.Pod) {
 			defer wg.Done()

-			err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(60))
+			err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(30))
 			Expect(err).NotTo(HaveOccurred())

 			Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(),
@ -348,9 +366,9 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
 	return
 }

-func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
+func newTestPods(numPods int, imageName, podType string) []*api.Pod {
 	var pods []*api.Pod
-	for i := 0; i < podsPerNode; i++ {
+	for i := 0; i < numPods; i++ {
 		podName := "test-" + string(util.NewUUID())
 		labels := map[string]string{
 			"type": podType,
@ -363,7 +381,8 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
 					Labels: labels,
 				},
 				Spec: api.PodSpec{
-					RestartPolicy: api.RestartPolicyNever,
+					// ToDo: restart policy is always
+					// check whether pods restart at the end of tests
 					Containers: []api.Container{
 						{
 							Image: imageName,
@ -375,3 +394,119 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
 	}
 	return pods
 }
+
+// code for getting container name of docker
+const (
+	kubeletProcessName    = "kubelet"
+	dockerProcessName     = "docker"
+	dockerPidFile         = "/var/run/docker.pid"
+	containerdProcessName = "docker-containerd"
+	containerdPidFile     = "/run/docker/libcontainerd/docker-containerd.pid"
+)
+
+func getContainerNameForProcess(name, pidFile string) (string, error) {
+	pids, err := getPidsForProcess(name, pidFile)
+	if err != nil {
+		return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
+	}
+	if len(pids) == 0 {
+		return "", nil
+	}
+	cont, err := getContainer(pids[0])
+	if err != nil {
+		return "", err
+	}
+	return cont, nil
+}
+
+func getPidFromPidFile(pidFile string) (int, error) {
+	file, err := os.Open(pidFile)
+	if err != nil {
+		return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
+	}
+	defer file.Close()
+
+	data, err := ioutil.ReadAll(file)
+	if err != nil {
+		return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
+	}
+
+	pid, err := strconv.Atoi(string(data))
+	if err != nil {
+		return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
+	}
+
+	return pid, nil
+}
+
+func getPidsForProcess(name, pidFile string) ([]int, error) {
+	if len(pidFile) > 0 {
+		if pid, err := getPidFromPidFile(pidFile); err == nil {
+			return []int{pid}, nil
+		} else {
+			// log the error and fall back to pidof
+			runtime.HandleError(err)
+		}
+	}
+
+	out, err := exec.Command("pidof", name).Output()
+	if err != nil {
+		return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
+	}
+
+	// The output of pidof is a list of pids.
+	pids := []int{}
+	for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
+		pid, err := strconv.Atoi(pidStr)
+		if err != nil {
+			continue
+		}
+		pids = append(pids, pid)
+	}
+	return pids, nil
+}
+
+// getContainer returns the cgroup associated with the specified pid.
+// It enforces a unified hierarchy for memory and cpu cgroups.
+// On systemd environments, it uses the name=systemd cgroup for the specified pid.
+func getContainer(pid int) (string, error) {
+	cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid))
+	if err != nil {
+		return "", err
+	}
+
+	cpu, found := cgs["cpu"]
+	if !found {
+		return "", cgroups.NewNotFoundError("cpu")
+	}
+	memory, found := cgs["memory"]
+	if !found {
+		return "", cgroups.NewNotFoundError("memory")
+	}
+
+	// since we use this container for accounting, we need to ensure its a unified hierarchy.
+	if cpu != memory {
+		return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified.  cpu: %s, memory: %s", cpu, memory)
+	}
+
+	// on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls)
+	// cpu and memory accounting is off by default, users may choose to enable it per unit or globally.
+	// users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true).
+	// users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true
+	// we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal.
+	// for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory
+	// cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers.
+	// as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet.
+	// in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally).
+	if systemd, found := cgs["name=systemd"]; found {
+		if systemd != cpu {
+			log.Printf("CPUAccounting not enabled for pid: %d", pid)
+		}
+		if systemd != memory {
+			log.Printf("MemoryAccounting not enabled for pid: %d", pid)
+		}
+		return systemd, nil
+	}
+
+	return cpu, nil
+}
--- a/test/e2e_node/resource_usage_test.go
+++ b/test/e2e_node/resource_usage_test.go
@ -1,9 +1,12 @@
 /*
-Copyright 2016 The Kubernetes Authors.
+Copyright 2015 The Kubernetes Authors.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
    http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -18,36 +21,33 @@ import (
 	"strings"
 	"time"

-	"k8s.io/kubernetes/pkg/api"
 	client "k8s.io/kubernetes/pkg/client/unversioned"
 	"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
-	"k8s.io/kubernetes/pkg/labels"
 	"k8s.io/kubernetes/test/e2e/framework"

 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
 )

-var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
+var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
 	const (
 		// Interval to poll /stats/container on a node
 		containerStatsPollingPeriod = 10 * time.Second
 		// The monitoring time for one test.
-		monitoringTime = 6 * time.Minute
+		monitoringTime = 10 * time.Minute
 		// The periodic reporting period.
-		reportingPeriod = 3 * time.Minute
+		reportingPeriod = 5 * time.Minute

 		sleepAfterCreatePods = 10 * time.Second
-		sleepAfterDeletePods = 120 * time.Second
 	)

 	var (
 		ns string
-		rm *ResourceCollector
+		rc *ResourceCollector
 		om *framework.RuntimeOperationMonitor
 	)

-	f := framework.NewDefaultFramework("kubelet-perf")
+	f := framework.NewDefaultFramework("resource-usage")

 	BeforeEach(func() {
 		ns = f.Namespace.Name
@ -59,45 +59,22 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 		framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
 	})

+	// This test measures and verifies the steady resource usage of node is within limit
+	// It collects data from a standalone Cadvisor with housekeeping interval 1s.
+	// It verifies CPU percentiles and the lastest memory usage.
 	Context("regular resource usage tracking", func() {
 		rTests := []resourceTest{
 			{
-				podsPerNode: 0,
+				podsPerNode: 10,
 				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.06, 0.95: 0.08},
-					stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.06},
+					stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
+					stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
 				},
 				// We set the memory limits generously because the distribution
 				// of the addon pods affect the memory usage on each node.
 				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
-				},
-			},
-			{
-				podsPerNode: 35,
-				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.14},
-					stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.07},
-				},
-				// We set the memory limits generously because the distribution
-				// of the addon pods affect the memory usage on each node.
-				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 150 * 1024 * 1024},
-				},
-			},
-			{
-				podsPerNode: 100,
-				cpuLimits: framework.ContainersCPUSummary{
-					stats.SystemContainerKubelet: {0.50: 0.17, 0.95: 0.22},
-					stats.SystemContainerRuntime: {0.50: 0.06, 0.95: 0.09},
-				},
-				// We set the memory limits generously because the distribution
-				// of the addon pods affect the memory usage on each node.
-				memLimits: framework.ResourceUsagePerContainer{
-					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 80 * 1024 * 1024},
-					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
+					stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
+					stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
 				},
 			},
 		}
@ -111,9 +88,13 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 			It(name, func() {
 				expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits

+				// The test collects resource usage from a standalone Cadvisor pod.
+				// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
+				// show the resource usage spikes. But changing its interval increases the overhead
+				// of kubelet. Hence we use a Cadvisor pod.
 				createCadvisorPod(f)
-				rm = NewResourceCollector(containerStatsPollingPeriod)
-				rm.Start()
+				rc = NewResourceCollector(containerStatsPollingPeriod)
+				rc.Start()

 				By("Creating a batch of Pods")
 				pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
@ -125,8 +106,8 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 				time.Sleep(sleepAfterCreatePods)

 				// Log once and flush the stats.
-				rm.LogLatest()
-				rm.Reset()
+				rc.LogLatest()
+				rc.Reset()

 				By("Start monitoring resource usage")
 				// Periodically dump the cpu summary until the deadline is met.
@ -143,13 +124,15 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 					} else {
 						time.Sleep(reportingPeriod)
 					}
-					logPodsOnNodes(f.Client)
+					logPodsOnNode(f.Client)
 				}

-				By("Reporting overall resource usage")
-				logPodsOnNodes(f.Client)
+				rc.Stop()

-				usagePerContainer, err := rm.GetLatest()
+				By("Reporting overall resource usage")
+				logPodsOnNode(f.Client)
+
+				usagePerContainer, err := rc.GetLatest()
 				Expect(err).NotTo(HaveOccurred())

 				// TODO(random-liu): Remove the original log when we migrate to new perfdash
@ -163,7 +146,7 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 				framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
 				verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)

-				cpuSummary := rm.GetCPUSummary()
+				cpuSummary := rc.GetCPUSummary()
 				framework.Logf("%s", formatCPUSummary(cpuSummary))

 				// Log perf result
@ -171,21 +154,6 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
 				cpuSummaryPerNode[nodeName] = cpuSummary
 				framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
 				verifyCPULimits(expectedCPU, cpuSummaryPerNode)
-
-				// delete pods
-				By("Deleting a batch of pods")
-				deleteBatchPod(f, pods)
-
-				rm.Stop()
-
-				// tear down cadvisor
-				Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
-					NotTo(HaveOccurred())
-				Expect(framework.WaitForPodToDisappear(f.Client, ns, cadvisorPodName, labels.Everything(),
-					3*time.Second, 10*time.Minute)).
-					NotTo(HaveOccurred())
-
-				time.Sleep(sleepAfterDeletePods)
 			})
 		}
 	})
@ -267,7 +235,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N
 	}
 }

-func logPodsOnNodes(c *client.Client) {
+func logPodsOnNode(c *client.Client) {
 	nodeName := framework.TestContext.NodeName
 	podList, err := framework.GetKubeletRunningPods(c, nodeName)
 	if err != nil {