mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
modify resource_collector.go to get container names of kubelet and docker dynamically
This commit is contained in:
parent
32e1db16c5
commit
f3f3e965cc
@ -1,9 +1,12 @@
|
|||||||
/*
|
/*
|
||||||
Copyright 2016 The Kubernetes Authors.
|
Copyright 2015 The Kubernetes Authors.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
You may obtain a copy of the License at
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
@ -42,15 +45,13 @@ const (
|
|||||||
kubeletAddr = "localhost:10255"
|
kubeletAddr = "localhost:10255"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ = framework.KubeDescribe("Density", func() {
|
var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||||
const (
|
const (
|
||||||
// the data collection time of `resource collector' and the standalone cadvisor
|
// the data collection time of `resource collector' and the standalone cadvisor
|
||||||
// is not synchronizated. Therefore `resource collector' may miss data or
|
// is not synchronizated. Therefore `resource collector' may miss data or
|
||||||
// collect duplicated data
|
// collect duplicated data
|
||||||
monitoringInterval = 500 * time.Millisecond
|
monitoringInterval = 500 * time.Millisecond
|
||||||
sleepBeforeEach = 30 * time.Second
|
|
||||||
sleepBeforeCreatePods = 30 * time.Second
|
sleepBeforeCreatePods = 30 * time.Second
|
||||||
sleepAfterDeletePods = 60 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -67,7 +68,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
time.Sleep(sleepAfterDeletePods)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
Context("create a batch of pods", func() {
|
Context("create a batch of pods", func() {
|
||||||
@ -76,41 +76,21 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
podsNr: 10,
|
podsNr: 10,
|
||||||
interval: 0 * time.Millisecond,
|
interval: 0 * time.Millisecond,
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
cpuLimits: framework.ContainersCPUSummary{
|
||||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.20},
|
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.30},
|
||||||
stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.50},
|
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
||||||
},
|
},
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
memLimits: framework.ResourceUsagePerContainer{
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 250 * 1024 * 1024},
|
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||||
},
|
},
|
||||||
// percentile limit of single pod startup latency
|
// percentile limit of single pod startup latency
|
||||||
podStartupLimits: framework.LatencyMetric{
|
podStartupLimits: framework.LatencyMetric{
|
||||||
Perc50: 7 * time.Second,
|
Perc50: 10 * time.Second,
|
||||||
Perc90: 10 * time.Second,
|
Perc90: 15 * time.Second,
|
||||||
Perc99: 15 * time.Second,
|
Perc99: 20 * time.Second,
|
||||||
},
|
},
|
||||||
// upbound of startup latency of a batch of pods
|
// upbound of startup latency of a batch of pods
|
||||||
podBatchStartupLimit: 20 * time.Second,
|
podBatchStartupLimit: 25 * time.Second,
|
||||||
},
|
|
||||||
{
|
|
||||||
podsNr: 30,
|
|
||||||
interval: 0 * time.Millisecond,
|
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
|
||||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.35},
|
|
||||||
stats.SystemContainerRuntime: {0.50: 0.10, 0.95: 0.70},
|
|
||||||
},
|
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
|
||||||
},
|
|
||||||
// percentile limit of single pod startup latency
|
|
||||||
podStartupLimits: framework.LatencyMetric{
|
|
||||||
Perc50: 30 * time.Second,
|
|
||||||
Perc90: 35 * time.Second,
|
|
||||||
Perc99: 40 * time.Second,
|
|
||||||
},
|
|
||||||
// upbound of startup latency of a batch of pods
|
|
||||||
podBatchStartupLimit: 90 * time.Second,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,7 +119,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
controller := newInformerWatchPod(f, mutex, watchTimes, podType)
|
controller := newInformerWatchPod(f, mutex, watchTimes, podType)
|
||||||
go controller.Run(stopCh)
|
go controller.Run(stopCh)
|
||||||
|
|
||||||
// Zhou: In test we see kubelet starts while it is busy on sth, as a result `syncLoop'
|
// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop'
|
||||||
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
|
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
|
||||||
// The node status has been `ready' so `wait and check node being ready' does not help here.
|
// The node status has been `ready' so `wait and check node being ready' does not help here.
|
||||||
// Now wait here for a grace period to have `syncLoop' be ready
|
// Now wait here for a grace period to have `syncLoop' be ready
|
||||||
@ -153,14 +133,14 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
// it returns a map[`pod name']`creation time' as the creation timestamps
|
// it returns a map[`pod name']`creation time' as the creation timestamps
|
||||||
createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)
|
createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)
|
||||||
|
|
||||||
By("Waiting for all Pods begin observed by the watch...")
|
By("Waiting for all Pods to be observed by the watch...")
|
||||||
// checks every 10s util all pods are running. it timeouts ater 10min
|
// checks every 10s util all pods are running. it times out ater 10min
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
return len(watchTimes) == itArg.podsNr
|
return len(watchTimes) == itArg.podsNr
|
||||||
}, 10*time.Minute, 10*time.Second).Should(BeTrue())
|
}, 10*time.Minute, 10*time.Second).Should(BeTrue())
|
||||||
|
|
||||||
if len(watchTimes) < itArg.podsNr {
|
if len(watchTimes) < itArg.podsNr {
|
||||||
framework.Failf("Timeout reached waiting for all Pods being observed by the watch.")
|
framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
|
||||||
}
|
}
|
||||||
|
|
||||||
// stop the watching controller, and the resource collector
|
// stop the watching controller, and the resource collector
|
||||||
@ -204,18 +184,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
// verify resource
|
// verify resource
|
||||||
By("Verifying resource")
|
By("Verifying resource")
|
||||||
verifyResource(f, testArg, rm)
|
verifyResource(f, testArg, rm)
|
||||||
|
|
||||||
// delete pods
|
|
||||||
By("Deleting a batch of pods")
|
|
||||||
deleteBatchPod(f, pods)
|
|
||||||
|
|
||||||
// tear down cadvisor
|
|
||||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
|
||||||
NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
Eventually(func() error {
|
|
||||||
return checkPodDeleted(f, cadvisorPodName)
|
|
||||||
}, 10*time.Minute, time.Second*3).Should(BeNil())
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -226,34 +194,17 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
podsNr: 10,
|
podsNr: 10,
|
||||||
bgPodsNr: 10,
|
bgPodsNr: 10,
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
cpuLimits: framework.ContainersCPUSummary{
|
||||||
stats.SystemContainerKubelet: {0.50: 0.10, 0.95: 0.12},
|
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
|
||||||
stats.SystemContainerRuntime: {0.50: 0.16, 0.95: 0.20},
|
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
||||||
},
|
},
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
memLimits: framework.ResourceUsagePerContainer{
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||||
},
|
},
|
||||||
podStartupLimits: framework.LatencyMetric{
|
podStartupLimits: framework.LatencyMetric{
|
||||||
Perc50: 1500 * time.Millisecond,
|
Perc50: 3000 * time.Millisecond,
|
||||||
Perc90: 2500 * time.Millisecond,
|
Perc90: 4000 * time.Millisecond,
|
||||||
Perc99: 3500 * time.Millisecond,
|
Perc99: 5000 * time.Millisecond,
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
podsNr: 10,
|
|
||||||
bgPodsNr: 30,
|
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
|
||||||
stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.15},
|
|
||||||
stats.SystemContainerRuntime: {0.50: 0.22, 0.95: 0.27},
|
|
||||||
},
|
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 40 * 1024 * 1024},
|
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
|
||||||
},
|
|
||||||
podStartupLimits: framework.LatencyMetric{
|
|
||||||
Perc50: 1500 * time.Millisecond,
|
|
||||||
Perc90: 2500 * time.Millisecond,
|
|
||||||
Perc99: 3500 * time.Millisecond,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -273,7 +224,7 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
// all pods are running when it returns
|
// all pods are running when it returns
|
||||||
f.PodClient().CreateBatch(bgPods)
|
f.PodClient().CreateBatch(bgPods)
|
||||||
|
|
||||||
//time.Sleep(sleepBeforeCreatePods)
|
time.Sleep(sleepBeforeCreatePods)
|
||||||
|
|
||||||
// starting resource monitoring
|
// starting resource monitoring
|
||||||
rm.Start()
|
rm.Start()
|
||||||
@ -290,18 +241,6 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
// verify resource
|
// verify resource
|
||||||
By("Verifying resource")
|
By("Verifying resource")
|
||||||
verifyResource(f, testArg, rm)
|
verifyResource(f, testArg, rm)
|
||||||
|
|
||||||
// delete pods
|
|
||||||
By("Deleting a batch of pods")
|
|
||||||
deleteBatchPod(f, append(bgPods, testPods...))
|
|
||||||
|
|
||||||
// tear down cadvisor
|
|
||||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
|
||||||
NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
Eventually(func() error {
|
|
||||||
return checkPodDeleted(f, cadvisorPodName)
|
|
||||||
}, 10*time.Minute, time.Second*3).Should(BeNil())
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -309,7 +248,8 @@ var _ = framework.KubeDescribe("Density", func() {
|
|||||||
|
|
||||||
type DensityTest struct {
|
type DensityTest struct {
|
||||||
// number of pods
|
// number of pods
|
||||||
podsNr int
|
podsNr int
|
||||||
|
// number of background pods
|
||||||
bgPodsNr int
|
bgPodsNr int
|
||||||
// interval between creating pod (rate control)
|
// interval between creating pod (rate control)
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
|
@ -239,6 +239,10 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cmdArgs = append(cmdArgs, getKubeletServerBin())
|
cmdArgs = append(cmdArgs, getKubeletServerBin())
|
||||||
|
cmdArgs = append(cmdArgs,
|
||||||
|
"--kubelet-cgroups=/kubelet",
|
||||||
|
"--runtime-cgroups=/docker-daemon",
|
||||||
|
)
|
||||||
}
|
}
|
||||||
cmdArgs = append(cmdArgs,
|
cmdArgs = append(cmdArgs,
|
||||||
"--api-servers", "http://127.0.0.1:8080",
|
"--api-servers", "http://127.0.0.1:8080",
|
||||||
@ -252,9 +256,6 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
|||||||
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
|
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
|
||||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||||
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
|
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
|
||||||
"--cgroup-root=/",
|
|
||||||
"--runtime-cgroups=/docker-daemon",
|
|
||||||
"--kubelet-cgroups=/kubelet",
|
|
||||||
)
|
)
|
||||||
if es.cgroupsPerQOS {
|
if es.cgroupsPerQOS {
|
||||||
cmdArgs = append(cmdArgs,
|
cmdArgs = append(cmdArgs,
|
||||||
|
@ -1,13 +1,16 @@
|
|||||||
/*
|
/*
|
||||||
Copyright 2016 The Kubernetes Authors.
|
Copyright 2015 The Kubernetes Authors.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
You may obtain a copy of the License at
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
See the License for the specific language governing perissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -16,7 +19,12 @@ package e2e_node
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"text/tabwriter"
|
"text/tabwriter"
|
||||||
@ -24,10 +32,12 @@ import (
|
|||||||
|
|
||||||
cadvisorclient "github.com/google/cadvisor/client/v2"
|
cadvisorclient "github.com/google/cadvisor/client/v2"
|
||||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
"k8s.io/kubernetes/pkg/util"
|
"k8s.io/kubernetes/pkg/util"
|
||||||
|
"k8s.io/kubernetes/pkg/util/runtime"
|
||||||
"k8s.io/kubernetes/pkg/util/wait"
|
"k8s.io/kubernetes/pkg/util/wait"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
|
||||||
@ -39,15 +49,12 @@ const (
|
|||||||
cadvisorImageName = "google/cadvisor:latest"
|
cadvisorImageName = "google/cadvisor:latest"
|
||||||
cadvisorPodName = "cadvisor"
|
cadvisorPodName = "cadvisor"
|
||||||
cadvisorPort = 8090
|
cadvisorPort = 8090
|
||||||
|
// housekeeping interval of Cadvisor (second)
|
||||||
|
houseKeepingInterval = 1
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
systemContainers = map[string]string{
|
systemContainers map[string]string
|
||||||
//"root": "/",
|
|
||||||
//stats.SystemContainerMisc: "misc"
|
|
||||||
stats.SystemContainerKubelet: "kubelet",
|
|
||||||
stats.SystemContainerRuntime: "docker-daemon",
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type ResourceCollector struct {
|
type ResourceCollector struct {
|
||||||
@ -69,6 +76,18 @@ func NewResourceCollector(interval time.Duration) *ResourceCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *ResourceCollector) Start() {
|
func (r *ResourceCollector) Start() {
|
||||||
|
// Get the cgroup containers for kubelet and docker
|
||||||
|
kubeletContainer, err := getContainerNameForProcess(kubeletProcessName, "")
|
||||||
|
dockerContainer, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
||||||
|
if err == nil {
|
||||||
|
systemContainers = map[string]string{
|
||||||
|
stats.SystemContainerKubelet: kubeletContainer,
|
||||||
|
stats.SystemContainerRuntime: dockerContainer,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
framework.Failf("Failed to get docker container name in test-e2e-node resource collector.")
|
||||||
|
}
|
||||||
|
|
||||||
wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
|
wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
|
||||||
var err error
|
var err error
|
||||||
r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
|
r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
|
||||||
@ -123,7 +142,7 @@ func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.C
|
|||||||
framework.Logf("Error getting container stats, err: %v", err)
|
framework.Logf("Error getting container stats, err: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
cStats, ok := ret["/"+name]
|
cStats, ok := ret[name]
|
||||||
if !ok {
|
if !ok {
|
||||||
framework.Logf("Missing info/stats for container %q", name)
|
framework.Logf("Missing info/stats for container %q", name)
|
||||||
return
|
return
|
||||||
@ -160,7 +179,7 @@ func (r *ResourceCollector) GetLatest() (framework.ResourceUsagePerContainer, er
|
|||||||
for key, name := range systemContainers {
|
for key, name := range systemContainers {
|
||||||
contStats, ok := r.buffers[name]
|
contStats, ok := r.buffers[name]
|
||||||
if !ok || len(contStats) == 0 {
|
if !ok || len(contStats) == 0 {
|
||||||
return nil, fmt.Errorf("Resource usage is not ready yet")
|
return nil, fmt.Errorf("Resource usage of %s:%s is not ready yet", key, name)
|
||||||
}
|
}
|
||||||
stats[key] = contStats[len(contStats)-1]
|
stats[key] = contStats[len(contStats)-1]
|
||||||
}
|
}
|
||||||
@ -257,11 +276,10 @@ func createCadvisorPod(f *framework.Framework) {
|
|||||||
f.PodClient().CreateSync(&api.Pod{
|
f.PodClient().CreateSync(&api.Pod{
|
||||||
ObjectMeta: api.ObjectMeta{
|
ObjectMeta: api.ObjectMeta{
|
||||||
Name: cadvisorPodName,
|
Name: cadvisorPodName,
|
||||||
//Labels: map[string]string{"type": cadvisorPodType, "name": cadvisorPodName},
|
|
||||||
},
|
},
|
||||||
Spec: api.PodSpec{
|
Spec: api.PodSpec{
|
||||||
// Don't restart the Pod since it is expected to exit
|
// It uses a host port for the tests to collect data.
|
||||||
RestartPolicy: api.RestartPolicyNever,
|
// Currently we can not use port mapping in test-e2e-node.
|
||||||
SecurityContext: &api.PodSecurityContext{
|
SecurityContext: &api.PodSecurityContext{
|
||||||
HostNetwork: true,
|
HostNetwork: true,
|
||||||
},
|
},
|
||||||
@ -301,7 +319,7 @@ func createCadvisorPod(f *framework.Framework) {
|
|||||||
},
|
},
|
||||||
Args: []string{
|
Args: []string{
|
||||||
"--profiling",
|
"--profiling",
|
||||||
"--housekeeping_interval=1s",
|
fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval),
|
||||||
fmt.Sprintf("--port=%d", cadvisorPort),
|
fmt.Sprintf("--port=%d", cadvisorPort),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -336,7 +354,7 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
|
|||||||
go func(pod *api.Pod) {
|
go func(pod *api.Pod) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(60))
|
err := f.Client.Pods(ns).Delete(pod.ObjectMeta.Name, api.NewDeleteOptions(30))
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(),
|
Expect(framework.WaitForPodToDisappear(f.Client, ns, pod.ObjectMeta.Name, labels.Everything(),
|
||||||
@ -348,9 +366,9 @@ func deleteBatchPod(f *framework.Framework, pods []*api.Pod) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
func newTestPods(numPods int, imageName, podType string) []*api.Pod {
|
||||||
var pods []*api.Pod
|
var pods []*api.Pod
|
||||||
for i := 0; i < podsPerNode; i++ {
|
for i := 0; i < numPods; i++ {
|
||||||
podName := "test-" + string(util.NewUUID())
|
podName := "test-" + string(util.NewUUID())
|
||||||
labels := map[string]string{
|
labels := map[string]string{
|
||||||
"type": podType,
|
"type": podType,
|
||||||
@ -363,7 +381,8 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
|||||||
Labels: labels,
|
Labels: labels,
|
||||||
},
|
},
|
||||||
Spec: api.PodSpec{
|
Spec: api.PodSpec{
|
||||||
RestartPolicy: api.RestartPolicyNever,
|
// ToDo: restart policy is always
|
||||||
|
// check whether pods restart at the end of tests
|
||||||
Containers: []api.Container{
|
Containers: []api.Container{
|
||||||
{
|
{
|
||||||
Image: imageName,
|
Image: imageName,
|
||||||
@ -375,3 +394,119 @@ func newTestPods(podsPerNode int, imageName, podType string) []*api.Pod {
|
|||||||
}
|
}
|
||||||
return pods
|
return pods
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// code for getting container name of docker
|
||||||
|
const (
|
||||||
|
kubeletProcessName = "kubelet"
|
||||||
|
dockerProcessName = "docker"
|
||||||
|
dockerPidFile = "/var/run/docker.pid"
|
||||||
|
containerdProcessName = "docker-containerd"
|
||||||
|
containerdPidFile = "/run/docker/libcontainerd/docker-containerd.pid"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getContainerNameForProcess(name, pidFile string) (string, error) {
|
||||||
|
pids, err := getPidsForProcess(name, pidFile)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
|
||||||
|
}
|
||||||
|
if len(pids) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
cont, err := getContainer(pids[0])
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return cont, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPidFromPidFile(pidFile string) (int, error) {
|
||||||
|
file, err := os.Open(pidFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
data, err := ioutil.ReadAll(file)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pid, err := strconv.Atoi(string(data))
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pid, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||||
|
if len(pidFile) > 0 {
|
||||||
|
if pid, err := getPidFromPidFile(pidFile); err == nil {
|
||||||
|
return []int{pid}, nil
|
||||||
|
} else {
|
||||||
|
// log the error and fall back to pidof
|
||||||
|
runtime.HandleError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := exec.Command("pidof", name).Output()
|
||||||
|
if err != nil {
|
||||||
|
return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The output of pidof is a list of pids.
|
||||||
|
pids := []int{}
|
||||||
|
for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
|
||||||
|
pid, err := strconv.Atoi(pidStr)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pids = append(pids, pid)
|
||||||
|
}
|
||||||
|
return pids, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getContainer returns the cgroup associated with the specified pid.
|
||||||
|
// It enforces a unified hierarchy for memory and cpu cgroups.
|
||||||
|
// On systemd environments, it uses the name=systemd cgroup for the specified pid.
|
||||||
|
func getContainer(pid int) (string, error) {
|
||||||
|
cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid))
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu, found := cgs["cpu"]
|
||||||
|
if !found {
|
||||||
|
return "", cgroups.NewNotFoundError("cpu")
|
||||||
|
}
|
||||||
|
memory, found := cgs["memory"]
|
||||||
|
if !found {
|
||||||
|
return "", cgroups.NewNotFoundError("memory")
|
||||||
|
}
|
||||||
|
|
||||||
|
// since we use this container for accounting, we need to ensure its a unified hierarchy.
|
||||||
|
if cpu != memory {
|
||||||
|
return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified. cpu: %s, memory: %s", cpu, memory)
|
||||||
|
}
|
||||||
|
|
||||||
|
// on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls)
|
||||||
|
// cpu and memory accounting is off by default, users may choose to enable it per unit or globally.
|
||||||
|
// users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true).
|
||||||
|
// users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true
|
||||||
|
// we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal.
|
||||||
|
// for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory
|
||||||
|
// cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers.
|
||||||
|
// as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet.
|
||||||
|
// in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally).
|
||||||
|
if systemd, found := cgs["name=systemd"]; found {
|
||||||
|
if systemd != cpu {
|
||||||
|
log.Printf("CPUAccounting not enabled for pid: %d", pid)
|
||||||
|
}
|
||||||
|
if systemd != memory {
|
||||||
|
log.Printf("MemoryAccounting not enabled for pid: %d", pid)
|
||||||
|
}
|
||||||
|
return systemd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return cpu, nil
|
||||||
|
}
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
/*
|
/*
|
||||||
Copyright 2016 The Kubernetes Authors.
|
Copyright 2015 The Kubernetes Authors.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
You may obtain a copy of the License at
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
@ -18,36 +21,33 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
|
||||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||||
const (
|
const (
|
||||||
// Interval to poll /stats/container on a node
|
// Interval to poll /stats/container on a node
|
||||||
containerStatsPollingPeriod = 10 * time.Second
|
containerStatsPollingPeriod = 10 * time.Second
|
||||||
// The monitoring time for one test.
|
// The monitoring time for one test.
|
||||||
monitoringTime = 6 * time.Minute
|
monitoringTime = 10 * time.Minute
|
||||||
// The periodic reporting period.
|
// The periodic reporting period.
|
||||||
reportingPeriod = 3 * time.Minute
|
reportingPeriod = 5 * time.Minute
|
||||||
|
|
||||||
sleepAfterCreatePods = 10 * time.Second
|
sleepAfterCreatePods = 10 * time.Second
|
||||||
sleepAfterDeletePods = 120 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ns string
|
ns string
|
||||||
rm *ResourceCollector
|
rc *ResourceCollector
|
||||||
om *framework.RuntimeOperationMonitor
|
om *framework.RuntimeOperationMonitor
|
||||||
)
|
)
|
||||||
|
|
||||||
f := framework.NewDefaultFramework("kubelet-perf")
|
f := framework.NewDefaultFramework("resource-usage")
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
ns = f.Namespace.Name
|
ns = f.Namespace.Name
|
||||||
@ -59,45 +59,22 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
|
framework.Logf("runtime operation error metrics:\n%s", framework.FormatRuntimeOperationErrorRate(result))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// This test measures and verifies the steady resource usage of node is within limit
|
||||||
|
// It collects data from a standalone Cadvisor with housekeeping interval 1s.
|
||||||
|
// It verifies CPU percentiles and the lastest memory usage.
|
||||||
Context("regular resource usage tracking", func() {
|
Context("regular resource usage tracking", func() {
|
||||||
rTests := []resourceTest{
|
rTests := []resourceTest{
|
||||||
{
|
{
|
||||||
podsPerNode: 0,
|
podsPerNode: 10,
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
cpuLimits: framework.ContainersCPUSummary{
|
||||||
stats.SystemContainerKubelet: {0.50: 0.06, 0.95: 0.08},
|
stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
|
||||||
stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.06},
|
stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
|
||||||
},
|
},
|
||||||
// We set the memory limits generously because the distribution
|
// We set the memory limits generously because the distribution
|
||||||
// of the addon pods affect the memory usage on each node.
|
// of the addon pods affect the memory usage on each node.
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
memLimits: framework.ResourceUsagePerContainer{
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 85 * 1024 * 1024},
|
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
podsPerNode: 35,
|
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
|
||||||
stats.SystemContainerKubelet: {0.50: 0.12, 0.95: 0.14},
|
|
||||||
stats.SystemContainerRuntime: {0.50: 0.05, 0.95: 0.07},
|
|
||||||
},
|
|
||||||
// We set the memory limits generously because the distribution
|
|
||||||
// of the addon pods affect the memory usage on each node.
|
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 70 * 1024 * 1024},
|
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 150 * 1024 * 1024},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
podsPerNode: 100,
|
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
|
||||||
stats.SystemContainerKubelet: {0.50: 0.17, 0.95: 0.22},
|
|
||||||
stats.SystemContainerRuntime: {0.50: 0.06, 0.95: 0.09},
|
|
||||||
},
|
|
||||||
// We set the memory limits generously because the distribution
|
|
||||||
// of the addon pods affect the memory usage on each node.
|
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 80 * 1024 * 1024},
|
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 300 * 1024 * 1024},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -111,9 +88,13 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
It(name, func() {
|
It(name, func() {
|
||||||
expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits
|
expectedCPU, expectedMemory := itArg.cpuLimits, itArg.memLimits
|
||||||
|
|
||||||
|
// The test collects resource usage from a standalone Cadvisor pod.
|
||||||
|
// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
|
||||||
|
// show the resource usage spikes. But changing its interval increases the overhead
|
||||||
|
// of kubelet. Hence we use a Cadvisor pod.
|
||||||
createCadvisorPod(f)
|
createCadvisorPod(f)
|
||||||
rm = NewResourceCollector(containerStatsPollingPeriod)
|
rc = NewResourceCollector(containerStatsPollingPeriod)
|
||||||
rm.Start()
|
rc.Start()
|
||||||
|
|
||||||
By("Creating a batch of Pods")
|
By("Creating a batch of Pods")
|
||||||
pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
|
pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
|
||||||
@ -125,8 +106,8 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
time.Sleep(sleepAfterCreatePods)
|
time.Sleep(sleepAfterCreatePods)
|
||||||
|
|
||||||
// Log once and flush the stats.
|
// Log once and flush the stats.
|
||||||
rm.LogLatest()
|
rc.LogLatest()
|
||||||
rm.Reset()
|
rc.Reset()
|
||||||
|
|
||||||
By("Start monitoring resource usage")
|
By("Start monitoring resource usage")
|
||||||
// Periodically dump the cpu summary until the deadline is met.
|
// Periodically dump the cpu summary until the deadline is met.
|
||||||
@ -143,13 +124,15 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
} else {
|
} else {
|
||||||
time.Sleep(reportingPeriod)
|
time.Sleep(reportingPeriod)
|
||||||
}
|
}
|
||||||
logPodsOnNodes(f.Client)
|
logPodsOnNode(f.Client)
|
||||||
}
|
}
|
||||||
|
|
||||||
By("Reporting overall resource usage")
|
rc.Stop()
|
||||||
logPodsOnNodes(f.Client)
|
|
||||||
|
|
||||||
usagePerContainer, err := rm.GetLatest()
|
By("Reporting overall resource usage")
|
||||||
|
logPodsOnNode(f.Client)
|
||||||
|
|
||||||
|
usagePerContainer, err := rc.GetLatest()
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
// TODO(random-liu): Remove the original log when we migrate to new perfdash
|
// TODO(random-liu): Remove the original log when we migrate to new perfdash
|
||||||
@ -163,7 +146,7 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
||||||
verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)
|
verifyMemoryLimits(f.Client, expectedMemory, usagePerNode)
|
||||||
|
|
||||||
cpuSummary := rm.GetCPUSummary()
|
cpuSummary := rc.GetCPUSummary()
|
||||||
framework.Logf("%s", formatCPUSummary(cpuSummary))
|
framework.Logf("%s", formatCPUSummary(cpuSummary))
|
||||||
|
|
||||||
// Log perf result
|
// Log perf result
|
||||||
@ -171,21 +154,6 @@ var _ = framework.KubeDescribe("Kubelet-perf [Serial] [Slow]", func() {
|
|||||||
cpuSummaryPerNode[nodeName] = cpuSummary
|
cpuSummaryPerNode[nodeName] = cpuSummary
|
||||||
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
||||||
verifyCPULimits(expectedCPU, cpuSummaryPerNode)
|
verifyCPULimits(expectedCPU, cpuSummaryPerNode)
|
||||||
|
|
||||||
// delete pods
|
|
||||||
By("Deleting a batch of pods")
|
|
||||||
deleteBatchPod(f, pods)
|
|
||||||
|
|
||||||
rm.Stop()
|
|
||||||
|
|
||||||
// tear down cadvisor
|
|
||||||
Expect(f.Client.Pods(ns).Delete(cadvisorPodName, api.NewDeleteOptions(30))).
|
|
||||||
NotTo(HaveOccurred())
|
|
||||||
Expect(framework.WaitForPodToDisappear(f.Client, ns, cadvisorPodName, labels.Everything(),
|
|
||||||
3*time.Second, 10*time.Minute)).
|
|
||||||
NotTo(HaveOccurred())
|
|
||||||
|
|
||||||
time.Sleep(sleepAfterDeletePods)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -267,7 +235,7 @@ func verifyCPULimits(expected framework.ContainersCPUSummary, actual framework.N
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func logPodsOnNodes(c *client.Client) {
|
func logPodsOnNode(c *client.Client) {
|
||||||
nodeName := framework.TestContext.NodeName
|
nodeName := framework.TestContext.NodeName
|
||||||
podList, err := framework.GetKubeletRunningPods(c, nodeName)
|
podList, err := framework.GetKubeletRunningPods(c, nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
Loading…
Reference in New Issue
Block a user