mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
modify resource_collector.go to get container names of kubelet and docker dynamically
This commit is contained in:
parent
32e1db16c5
commit
f3f3e965cc
@ -1,9 +1,12 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ -42,15 +45,13 @@ const (
|
||||
kubeletAddr = "localhost:10255"
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("Density", func() {
|
||||
var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||
const (
|
||||
// the data collection time of `resource collector' and the standalone cadvisor
|
||||
// is not synchronizated. Therefore `resource collector' may miss data or
|
||||
// collect duplicated data
|
||||
monitoringInterval = 500 * time.Millisecond
|
||||
sleepBeforeEach = 30 * time.Second
|
||||
sleepBeforeCreatePods = 30 * time.Second
|
||||
sleepAfterDeletePods = 60 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
@ -67,7 +68,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
time.Sleep(sleepAfterDeletePods)
|
||||
})
|
||||
|
||||
Context("create a batch of pods", func() {
|
||||
@ -76,41 +76,21 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
podsNr: 10,
|
||||
interval: 0 * time.Millisecond,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.20},
|
||||
stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.50},
|
||||
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.30},
|
||||
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
||||
},
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 250 * 1024 * 1024},
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||
},
|
||||
// percentile limit of single pod startup latency
|
||||
podStartupLimits: framework.LatencyMetric{
|
||||
Perc50: 7 * time.Second,
|
||||
Perc90: 10 * time.Second,
|
||||
Perc99: 15 * time.Second,
|
||||
Perc50: 10 * time.Second,
|
||||
Perc90: 15 * time.Second,
|
||||
Perc99: 20 * time.Second,
|
||||
},
|
||||
// upbound of startup latency of a batch of pods
|
||||
podBatchStartupLimit: 20 * time.Second,
|
||||
},
|
||||
{
|
||||
podsNr: 30,
|
||||
interval: 0 * time.Millisecond,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.35},
|
||||
stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.70},
|
||||
},
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
||||
},
|
||||
// percentile limit of single pod startup latency
|
||||
podStartupLimits: framework.LatencyMetric{
|
||||
Perc50: 30 * time.Second,
|
||||
Perc90: 35 * time.Second,
|
||||
Perc99: 40 * time.Second,
|
||||
},
|
||||
// upbound of startup latency of a batch of pods
|
||||
podBatchStartupLimit: 90 * time.Second,
|
||||
podBatchStartupLimit: 25 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
@ -139,7 +119,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
controller := newInformerWatchPod(f, mutex, watchTimes, podType)
|
||||
go controller.Run(stopCh)
|
||||
|
||||
// Zhou: In test we see kubelet starts while it is busy on sth, as a result `syncLoop'
|
||||
// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop'
|
||||
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
|
||||
// The node status has been `ready' so `wait and check node being ready' does not help here.
|
||||
// Now wait here for a grace period to have `syncLoop' be ready
|
||||
@ -153,14 +133,14 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
// it returns a map[`pod name']`creation time' as the creation timestamps
|
||||
createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)
|
||||
|
||||
By("Waiting for all Pods begin observed by the watch...")
|
||||
// checks every 10s util all pods are running. it timeouts ater 10min
|
||||
By("Waiting for all Pods to be observed by the watch...")
|
||||
// checks every 10s util all pods are running. it times out ater 10min
|
||||
Eventually(func() bool {
|
||||
return len(watchTimes) == itArg.podsNr
|
||||
}, 10*time.Minute, 10*time.Second).Should(BeTrue())
|
||||
|
||||
if len(watchTimes) < itArg.podsNr {
|
||||
framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
|
||||
framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
|
||||
}
|
||||
|
||||
// stop the watching controller, and the resource collector
|
||||
@ -204,18 +184,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
// verify resource
|
||||
By("Verifying resource")
|
||||
verifyResource(f, testArg, rm)
|
||||
|
||||
// delete pods
|
||||
By("Deleting a batch of pods")
|
||||
deleteBatchPod(f, pods)
|
||||
|
||||
// tear down cadvisor
|
||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
||||
NotTo(HaveOccurred())
|
||||
|
||||
Eventually(func() error {
|
||||
return checkPodDeleted(f, cadvisorPodName)
|
||||
}, 10*time.Minute, time.Second*3).Should(BeNil())
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -226,34 +194,17 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
podsNr: 10,
|
||||
bgPodsNr: 10,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.12},
|
||||
stats.SystemContainerRuntime: {0.50: 0.16, 0.95: 0.20},
|
||||
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
|
||||
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
||||
},
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||
},
|
||||
podStartupLimits: framework.LatencyMetric{
|
||||
Perc50: 1500 * time.Millisecond,
|
||||
Perc90: 2500 * time.Millisecond,
|
||||
Perc99: 3500 * time.Millisecond,
|
||||
},
|
||||
},
|
||||
{
|
||||
podsNr: 10,
|
||||
bgPodsNr: 30,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.15},
|
||||
stats.SystemContainerRuntime: {0.50: 0.22, 0.95: 0.27},
|
||||
},
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
||||
},
|
||||
podStartupLimits: framework.LatencyMetric{
|
||||
Perc50: 1500 * time.Millisecond,
|
||||
Perc90: 2500 * time.Millisecond,
|
||||
Perc99: 3500 * time.Millisecond,
|
||||
Perc50: 3000 * time.Millisecond,
|
||||
Perc90: 4000 * time.Millisecond,
|
||||
Perc99: 5000 * time.Millisecond,
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -273,7 +224,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
// all pods are running when it returns
|
||||
f.PodClient().CreateBatch(bgPods)
|
||||
|
||||
//time.Sleep(sleepBeforeCreatePods)
|
||||
time.Sleep(sleepBeforeCreatePods)
|
||||
|
||||
// starting resource monitoring
|
||||
rm.Start()
|
||||
@ -290,18 +241,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
// verify resource
|
||||
By("Verifying resource")
|
||||
verifyResource(f, testArg, rm)
|
||||
|
||||
// delete pods
|
||||
By("Deleting a batch of pods")
|
||||
deleteBatchPod(f, append(bgPods, testPods...))
|
||||
|
||||
// tear down cadvisor
|
||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
||||
NotTo(HaveOccurred())
|
||||
|
||||
Eventually(func() error {
|
||||
return checkPodDeleted(f, cadvisorPodName)
|
||||
}, 10*time.Minute, time.Second*3).Should(BeNil())
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -309,7 +248,8 @@ var _ = framework.KubeDescribe("Density", func() {
|
||||
|
||||
type DensityTest struct {
|
||||
// number of pods
|
||||
podsNr int
|
||||
podsNr int
|
||||
// number of background pods
|
||||
bgPodsNr int
|
||||
// interval between creating pod (rate control)
|
||||
interval time.Duration
|
||||
|
@ -239,6 +239,10 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
||||
}
|
||||
} else {
|
||||
cmdArgs = append(cmdArgs, getKubeletServerBin())
|
||||
cmdArgs = append(cmdArgs,
|
||||
"--kubelet-cgroups=/kubelet",
|
||||
"--runtime-cgroups=/docker-daemon",
|
||||
)
|
||||
}
|
||||
cmdArgs = append(cmdArgs,
|
||||
"--api-servers", "http://127.0.0.1:8080",
|
||||
@ -252,9 +256,6 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
||||
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
|
||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
|
||||
"--cgroup-root=/",
|
||||
"--runtime-cgroups=/docker-daemon",
|
||||
"--kubelet-cgroups=/kubelet",
|
||||
)
|
||||
if es.cgroupsPerQOS {
|
||||
cmdArgs = append(cmdArgs,
|
||||
|
@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing perissions and
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
@ -16,7 +19,12 @@ package e2e_node
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/tabwriter"
|
||||
@ -24,10 +32,12 @@ import (
|
||||
|
||||
cadvisorclient "github.com/google/cadvisor/client/v2"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/runtime"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
@ -39,15 +49,12 @@ const (
|
||||
cadvisorImageName = "google/cadvisor:latest"
|
||||
cadvisorPodName = "cadvisor"
|
||||
cadvisorPort = 8090
|
||||
// housekeeping interval of Cadvisor (second)
|
||||
houseKeepingInterval = 1
|
||||
)
|
||||
|
||||
var (
|
||||
systemContainers = map[string]string{
|
||||
//"root": "/",
|
||||
//stats.SystemContainerMisc: "misc"
|
||||
stats.SystemContainerKubelet: "kubelet",
|
||||
stats.SystemContainerRuntime: "docker-daemon",
|
||||
}
|
||||
systemContainers map[string]string
|
||||
)
|
||||
|
||||
type ResourceCollector struct {
|
||||
@ -69,6 +76,18 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector {
|
||||
}
|
||||
|
||||
func (r *ResourceCollector) Start() {
|
||||
// Get the cgroup containers for kubelet and docker
|
||||
kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "")
|
||||
dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
||||
if err == nil {
|
||||
systemContainers = map[string]string{
|
||||
stats.SystemContainerKubelet: kubeletContainer,
|
||||
stats.SystemContainerRuntime: dockerContainer,
|
||||
}
|
||||
} else {
|
||||
framework.Failf("Failed to get docker container name in test-e2e-node resource collector.")
|
||||
}
|
||||
|
||||
wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
|
||||
var err error
|
||||
r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
|
||||
@ -123,7 +142,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C
|
||||
framework.Logf("Error getting container stats, err: %v", err)
|
||||
return
|
||||
}
|
||||
cStats, ok := ret["/"+name]
|
||||
cStats, ok := ret[name]
|
||||
if !ok {
|
||||
framework.Logf("Missing info/stats for container %q", name)
|
||||
return
|
||||
@ -160,7 +179,7 @@ func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, er
|
||||
for key, name := range systemContainers {
|
||||
contStats, ok := r.buffers[name]
|
||||
if !ok || len(contStats) == 0 {
|
||||
return nil, fmt.Errorf("Resource usage is not ready yet")
|
||||
return nil, fmt.Errorf("Resource usage of %s:%s is not ready yet", key, name)
|
||||
}
|
||||
stats[key] = contStats[len(contStats)-1]
|
||||
}
|
||||
@ -257,11 +276,10 @@ func createCadvisorPod(f *framework.Framework) {
|
||||
f.PodClient().CreateSync(&api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: cadvisorPodName,
|
||||
//Labels: map[string]string{"type": cadvisorPodType, "name": cadvisorPodName},
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
// Don't restart the Pod since it is expected to exit
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
// It uses a host port for the tests to collect data.
|
||||
// Currently we can not use port mapping in test-e2e-node.
|
||||
SecurityContext: &api.PodSecurityContext{
|
||||
HostNetwork: true,
|
||||
},
|
||||
@ -301,7 +319,7 @@ func createCadvisorPod(f *framework.Framework) {
|
||||
},
|
||||
Args: []string{
|
||||
"--profiling",
|
||||
"--housekeeping_interval=1s",
|
||||
fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval),
|
||||
fmt.Sprintf("--port=%d", cadvisorPort),
|
||||
},
|
||||
},
|
||||
@ -336,7 +354,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
|
||||
go func(pod *api.Pod) {
|
||||
defer wg.Done()
|
||||
|
||||
err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(60))
|
||||
err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(30))
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(),
|
||||
@ -348,9 +366,9 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
|
||||
return
|
||||
}
|
||||
|
||||
func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
||||
func newTestPods(numPods int, imageName, podType string) []*api.Pod {
|
||||
var pods []*api.Pod
|
||||
for i := 0; i < podsPerNode; i++ {
|
||||
for i := 0; i < numPods; i++ {
|
||||
podName := "test-" + string(util.NewUUID())
|
||||
labels := map[string]string{
|
||||
"type": podType,
|
||||
@ -363,7 +381,8 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
||||
Labels: labels,
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
// ToDo: restart policy is always
|
||||
// check whether pods restart at the end of tests
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Image: imageName,
|
||||
@ -375,3 +394,119 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
// code for getting container name of docker
|
||||
const (
|
||||
kubeletProcessName = "kubelet"
|
||||
dockerProcessName = "docker"
|
||||
dockerPidFile = "/var/run/docker.pid"
|
||||
containerdProcessName = "docker-containerd"
|
||||
containerdPidFile = "/run/docker/libcontainerd/docker-containerd.pid"
|
||||
)
|
||||
|
||||
func getContainerNameForProcess(name, pidFile string) (string, error) {
|
||||
pids, err := getPidsForProcess(name, pidFile)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
|
||||
}
|
||||
if len(pids) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
cont, err := getContainer(pids[0])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return cont, nil
|
||||
}
|
||||
|
||||
func getPidFromPidFile(pidFile string) (int, error) {
|
||||
file, err := os.Open(pidFile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
data, err := ioutil.ReadAll(file)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
|
||||
}
|
||||
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||
if len(pidFile) > 0 {
|
||||
if pid, err := getPidFromPidFile(pidFile); err == nil {
|
||||
return []int{pid}, nil
|
||||
} else {
|
||||
// log the error and fall back to pidof
|
||||
runtime.HandleError(err)
|
||||
}
|
||||
}
|
||||
|
||||
out, err := exec.Command("pidof", name).Output()
|
||||
if err != nil {
|
||||
return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
|
||||
}
|
||||
|
||||
// The output of pidof is a list of pids.
|
||||
pids := []int{}
|
||||
for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
|
||||
pid, err := strconv.Atoi(pidStr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
pids = append(pids, pid)
|
||||
}
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
// getContainer returns the cgroup associated with the specified pid.
|
||||
// It enforces a unified hierarchy for memory and cpu cgroups.
|
||||
// On systemd environments, it uses the name=systemd cgroup for the specified pid.
|
||||
func getContainer(pid int) (string, error) {
|
||||
cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cpu, found := cgs["cpu"]
|
||||
if !found {
|
||||
return "", cgroups.NewNotFoundError("cpu")
|
||||
}
|
||||
memory, found := cgs["memory"]
|
||||
if !found {
|
||||
return "", cgroups.NewNotFoundError("memory")
|
||||
}
|
||||
|
||||
// since we use this container for accounting, we need to ensure its a unified hierarchy.
|
||||
if cpu != memory {
|
||||
return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified. cpu: %s, memory: %s", cpu, memory)
|
||||
}
|
||||
|
||||
// on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls)
|
||||
// cpu and memory accounting is off by default, users may choose to enable it per unit or globally.
|
||||
// users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true).
|
||||
// users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true
|
||||
// we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal.
|
||||
// for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory
|
||||
// cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers.
|
||||
// as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet.
|
||||
// in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally).
|
||||
if systemd, found := cgs["name=systemd"]; found {
|
||||
if systemd != cpu {
|
||||
log.Printf("CPUAccounting not enabled for pid: %d", pid)
|
||||
}
|
||||
if systemd != memory {
|
||||
log.Printf("MemoryAccounting not enabled for pid: %d", pid)
|
||||
}
|
||||
return systemd, nil
|
||||
}
|
||||
|
||||
return cpu, nil
|
||||
}
|
||||
|
@ -1,9 +1,12 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ -18,36 +21,33 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||
const (
|
||||
// Interval to poll /stats/container on a node
|
||||
containerStatsPollingPeriod = 10 * time.Second
|
||||
// The monitoring time for one test.
|
||||
monitoringTime = 6 * time.Minute
|
||||
monitoringTime = 10 * time.Minute
|
||||
// The periodic reporting period.
|
||||
reportingPeriod = 3 * time.Minute
|
||||
reportingPeriod = 5 * time.Minute
|
||||
|
||||
sleepAfterCreatePods = 10 * time.Second
|
||||
sleepAfterDeletePods = 120 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
ns string
|
||||
rm *ResourceCollector
|
||||
rc *ResourceCollector
|
||||
om *framework.RuntimeOperationMonitor
|
||||
)
|
||||
|
||||
f := framework.NewDefaultFramework("kubelet-perf")
|
||||
f := framework.NewDefaultFramework("resource-usage")
|
||||
|
||||
BeforeEach(func() {
|
||||
ns = f.Namespace.Name
|
||||
@ -59,45 +59,22 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
|
||||
})
|
||||
|
||||
// This test measures and verifies the steady resource usage of node is within limit
|
||||
// It collects data from a standalone Cadvisor with housekeeping interval 1s.
|
||||
// It verifies CPU percentiles and the lastest memory usage.
|
||||
Context("regular resource usage tracking", func() {
|
||||
rTests := []resourceTest{
|
||||
{
|
||||
podsPerNode: 0,
|
||||
podsPerNode: 10,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.06, 0.95: 0.08},
|
||||
stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.06},
|
||||
stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
|
||||
stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
|
||||
},
|
||||
// We set the memory limits generously because the distribution
|
||||
// of the addon pods affect the memory usage on each node.
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
{
|
||||
podsPerNode: 35,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.14},
|
||||
stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.07},
|
||||
},
|
||||
// We set the memory limits generously because the distribution
|
||||
// of the addon pods affect the memory usage on each node.
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 150 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
{
|
||||
podsPerNode: 100,
|
||||
cpuLimits: framework.ContainersCPUSummary{
|
||||
stats.SystemContainerKubelet: {0.50: 0.17, 0.95: 0.22},
|
||||
stats.SystemContainerRuntime: {0.50: 0.06, 0.95: 0.09},
|
||||
},
|
||||
// We set the memory limits generously because the distribution
|
||||
// of the addon pods affect the memory usage on each node.
|
||||
memLimits: framework.ResourceUsagePerContainer{
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 80 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -111,9 +88,13 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
It(name, func() {
|
||||
expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits
|
||||
|
||||
// The test collects resource usage from a standalone Cadvisor pod.
|
||||
// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
|
||||
// show the resource usage spikes. But changing its interval increases the overhead
|
||||
// of kubelet. Hence we use a Cadvisor pod.
|
||||
createCadvisorPod(f)
|
||||
rm = NewResourceCollector(containerStatsPollingPeriod)
|
||||
rm.Start()
|
||||
rc = NewResourceCollector(containerStatsPollingPeriod)
|
||||
rc.Start()
|
||||
|
||||
By("Creating a batch of Pods")
|
||||
pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
|
||||
@ -125,8 +106,8 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
time.Sleep(sleepAfterCreatePods)
|
||||
|
||||
// Log once and flush the stats.
|
||||
rm.LogLatest()
|
||||
rm.Reset()
|
||||
rc.LogLatest()
|
||||
rc.Reset()
|
||||
|
||||
By("Start monitoring resource usage")
|
||||
// Periodically dump the cpu summary until the deadline is met.
|
||||
@ -143,13 +124,15 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
} else {
|
||||
time.Sleep(reportingPeriod)
|
||||
}
|
||||
logPodsOnNodes(f.Client)
|
||||
logPodsOnNode(f.Client)
|
||||
}
|
||||
|
||||
By("Reporting overall resource usage")
|
||||
logPodsOnNodes(f.Client)
|
||||
rc.Stop()
|
||||
|
||||
usagePerContainer, err := rm.GetLatest()
|
||||
By("Reporting overall resource usage")
|
||||
logPodsOnNode(f.Client)
|
||||
|
||||
usagePerContainer, err := rc.GetLatest()
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// TODO(random-liu): Remove the original log when we migrate to new perfdash
|
||||
@ -163,7 +146,7 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
||||
verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)
|
||||
|
||||
cpuSummary := rm.GetCPUSummary()
|
||||
cpuSummary := rc.GetCPUSummary()
|
||||
framework.Logf("%s", formatCPUSummary(cpuSummary))
|
||||
|
||||
// Log perf result
|
||||
@ -171,21 +154,6 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
||||
cpuSummaryPerNode[nodeName] = cpuSummary
|
||||
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
||||
verifyCPULimits(expectedCPU, cpuSummaryPerNode)
|
||||
|
||||
// delete pods
|
||||
By("Deleting a batch of pods")
|
||||
deleteBatchPod(f, pods)
|
||||
|
||||
rm.Stop()
|
||||
|
||||
// tear down cadvisor
|
||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
||||
NotTo(HaveOccurred())
|
||||
Expect(framework.WaitForPodToDisappear(f.Client, ns, cadvisorPodName, labels.Everything(),
|
||||
3*time.Second, 10*time.Minute)).
|
||||
NotTo(HaveOccurred())
|
||||
|
||||
time.Sleep(sleepAfterDeletePods)
|
||||
})
|
||||
}
|
||||
})
|
||||
@ -267,7 +235,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N
|
||||
}
|
||||
}
|
||||
|
||||
func logPodsOnNodes(c *client.Client) {
|
||||
func logPodsOnNode(c *client.Client) {
|
||||
nodeName := framework.TestContext.NodeName
|
||||
podList, err := framework.GetKubeletRunningPods(c, nodeName)
|
||||
if err != nil {
|
Loading…
Reference in New Issue
Block a user