Merge pull request #55642 from dashpole/disable_cadvisor_disk_for_cri

Automatic merge from submit-queue (batch tested with PRs 55642, 55897, 55835, 55496, 55313). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Disable container disk metrics when using the CRI stats integration

Issue: https://github.com/kubernetes/kubernetes/issues/51798

As explained in the issue, runtimes which make use of the CRI Stats API still have the performance overhead of collecting those same stats through cAdvisor.
The CRI Stats API has metrics for CPU, Memory, and Disk.  This PR significantly reduces the added overhead due to collecting these stats in both cAdvisor and in the runtime.
This PR disables container disk metrics, which are very expensive to collect.

This PR does not disable node-level disk stats, as the "Raw" container handler does not currently respect ignoring DiskUsageMetrics.
This PR factors out the logic for determining whether or not to use the CRI stats provider into a helper function, as cAdvisor is instantiated before it is passed to the kubelet as a dependency.

cc @kubernetes/sig-node-pr-reviews @derekwaynecarr  
/kind feature
/sig node

/assign @Random-Liu @derekwaynecarr
This commit is contained in:
Kubernetes Submit Queue 2017-11-18 10:46:30 -08:00 committed by GitHub
commit ef3b27cbd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 27 additions and 14 deletions

View File

@ -414,7 +414,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
if kubeDeps.CAdvisorInterface == nil {
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.ContainerRuntime, s.RemoteRuntimeEndpoint)
kubeDeps.CAdvisorInterface, err = cadvisor.New(s.Address, uint(s.CAdvisorPort), imageFsInfoProvider, s.RootDirectory)
kubeDeps.CAdvisorInterface, err = cadvisor.New(s.Address, uint(s.CAdvisorPort), imageFsInfoProvider, s.RootDirectory, cadvisor.UsingLegacyCadvisorStats(s.ContainerRuntime, s.RemoteRuntimeEndpoint))
if err != nil {
return err
}

View File

@ -28,6 +28,7 @@ go_library(
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//vendor/github.com/google/cadvisor/events:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
@ -36,7 +37,6 @@ go_library(
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/kubelet/types:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/google/cadvisor/cache/memory:go_default_library",
"//vendor/github.com/google/cadvisor/container:go_default_library",

View File

@ -105,11 +105,16 @@ func containerLabels(c *cadvisorapi.ContainerInfo) map[string]string {
}
// New creates a cAdvisor and exports its API on the specified port if port > 0.
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string) (Interface, error) {
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
sysFs := sysfs.NewRealSysFs()
ignoreMetrics := cadvisormetrics.MetricSet{cadvisormetrics.NetworkTcpUsageMetrics: struct{}{}, cadvisormetrics.NetworkUdpUsageMetrics: struct{}{}}
if !usingLegacyStats {
ignoreMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
}
// Create and start the cAdvisor container manager.
m, err := manager.New(memory.New(statsCacheDuration, nil), sysFs, maxHousekeepingInterval, allowDynamicHousekeeping, cadvisormetrics.MetricSet{cadvisormetrics.NetworkTcpUsageMetrics: struct{}{}, cadvisormetrics.NetworkUdpUsageMetrics: struct{}{}}, http.DefaultClient)
m, err := manager.New(memory.New(statsCacheDuration, nil), sysFs, maxHousekeepingInterval, allowDynamicHousekeeping, ignoreMetrics, http.DefaultClient)
if err != nil {
return nil, err
}

View File

@ -17,6 +17,8 @@ limitations under the License.
package cadvisor
import (
goruntime "runtime"
cadvisorapi "github.com/google/cadvisor/info/v1"
cadvisorapi2 "github.com/google/cadvisor/info/v2"
"k8s.io/api/core/v1"
@ -24,6 +26,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
func CapacityFromMachineInfo(info *cadvisorapi.MachineInfo) v1.ResourceList {
@ -57,3 +60,16 @@ func EphemeralStorageCapacityFromFsInfo(info cadvisorapi2.FsInfo) v1.ResourceLis
}
return c
}
// CRI integrations should get container metrics via CRI. Docker
// uses the built-in cadvisor to gather such metrics on Linux for
// historical reasons.
// cri-o relies on cadvisor as a temporary workaround. The code should
// be removed. Related issue:
// https://github.com/kubernetes/kubernetes/issues/51798
// UsingLegacyCadvisorStats returns true if container stats are provided by cadvisor instead of through the CRI
func UsingLegacyCadvisorStats(runtime, runtimeEndpoint string) bool {
return runtime == kubetypes.RktContainerRuntime ||
(runtime == kubetypes.DockerContainerRuntime && goruntime.GOOS == "linux") ||
runtimeEndpoint == "/var/run/crio.sock"
}

View File

@ -24,7 +24,6 @@ import (
"net/url"
"os"
"path"
goruntime "runtime"
"sort"
"strings"
"sync"
@ -681,14 +680,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.containerRuntime = runtime
klet.runner = runtime
// CRI integrations should get container metrics via CRI. Docker
// uses the built-in cadvisor to gather such metrics on Linux for
// historical reasons.
// cri-o relies on cadvisor as a temporary workaround. The code should
// be removed. Related issue:
// https://github.com/kubernetes/kubernetes/issues/51798
if (containerRuntime == kubetypes.DockerContainerRuntime &&
goruntime.GOOS == "linux") || remoteRuntimeEndpoint == "/var/run/crio.sock" {
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
klet.StatsProvider = stats.NewCadvisorStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,

View File

@ -99,7 +99,7 @@ func containerRuntime() error {
}
// Setup cadvisor to check the container environment
c, err := cadvisor.New("", 0 /*don't start the http server*/, cadvisor.NewImageFsInfoProvider("docker", ""), "/var/lib/kubelet")
c, err := cadvisor.New("", 0 /*don't start the http server*/, cadvisor.NewImageFsInfoProvider("docker", ""), "/var/lib/kubelet", false)
if err != nil {
return printError("Container Runtime Check: %s Could not start cadvisor %v", failed, err)
}