Merge pull request #60106 from dashpole/cadvisor_godep

Automatic merge from submit-queue (batch tested with PRs 60106, 59510, 60263, 60063, 59088). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Update cadvisor godeps to v0.29.0 and ignore per-cpu metrics **What this PR does / why we need it**: Updates the cAdvisor dependency to the cAdvisor release associated with the kubernetes 1.10 release. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #60052 **Special notes for your reviewer**: This PR also adds per-cpu metrics to the ignoreMetrics list. This is a new metric that can be ignored in the most recent cAdvisor release. The reason for not collecting per-cpu metrics is that it can cause severe scalability issues. For example, if using a 128 core machine, and running 100 containers, we have 12800 different streams of metrics just for per-cpu metrics which cAdvisor needs to process and transmit. Additionally, per-cpu metrics are not used by any kubernetes components, and if a user needs these metrics, they can run cAdvisor as a daemonset. **Release note**: ```release-note Disable per-cpu metrics by default for scalability. Fix inaccurate disk usage monitoring of overlayFs. Retry docker connection on startup timeout to avoid permanent loss of metrics. ``` /assign @dchen1107
2025-10-22 15:09:39 +00:00 · 2018-02-23 02:59:38 -08:00
parent 0cb15453da 65394fe18c
commit 6af0768768
5 changed files with 110 additions and 94 deletions
--- a/vendor/github.com/google/cadvisor/container/factory.go
+++ b/vendor/github.com/google/cadvisor/container/factory.go
@@ -42,6 +42,7 @@ type MetricKind string

 const (
 	CpuUsageMetrics        MetricKind = "cpu"
+	PerCpuUsageMetrics     MetricKind = "percpu"
 	MemoryUsageMetrics     MetricKind = "memory"
 	CpuLoadMetrics         MetricKind = "cpuLoad"
 	DiskIOMetrics          MetricKind = "diskIO"
--- a/vendor/github.com/google/cadvisor/container/libcontainer/helpers.go
+++ b/vendor/github.com/google/cadvisor/container/libcontainer/helpers.go
@@ -113,7 +113,8 @@ func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int, ignoreMetri
 	libcontainerStats := &libcontainer.Stats{
 		CgroupStats: cgroupStats,
 	}
-	stats := newContainerStats(libcontainerStats)
+	withPerCPU := !ignoreMetrics.Has(container.PerCpuUsageMetrics)
+	stats := newContainerStats(libcontainerStats, withPerCPU)

 	// If we know the pid then get network stats from /proc/<pid>/net/dev
 	if pid == 0 {
@@ -467,14 +468,17 @@ func minUint32(x, y uint32) uint32 {
 var numCpusFunc = getNumberOnlineCPUs

 // Convert libcontainer stats to info.ContainerStats.
-func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) {
+func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
 	ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode
 	ret.Cpu.Usage.System = s.CpuStats.CpuUsage.UsageInKernelmode
-	ret.Cpu.Usage.Total = 0
+	ret.Cpu.Usage.Total = s.CpuStats.CpuUsage.TotalUsage
 	ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods
 	ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
 	ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime

+	if !withPerCPU {
+		return
+	}
 	if len(s.CpuStats.CpuUsage.PercpuUsage) == 0 {
 		// libcontainer's 'GetStats' can leave 'PercpuUsage' nil if it skipped the
 		// cpuacct subsystem.
@@ -501,7 +505,6 @@ func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) {

 	for i := uint32(0); i < numActual; i++ {
 		ret.Cpu.Usage.PerCpu[i] = s.CpuStats.CpuUsage.PercpuUsage[i]
-		ret.Cpu.Usage.Total += s.CpuStats.CpuUsage.PercpuUsage[i]
 	}

 }
@@ -587,13 +590,13 @@ func setNetworkStats(libcontainerStats *libcontainer.Stats, ret *info.ContainerS
 	}
 }

-func newContainerStats(libcontainerStats *libcontainer.Stats) *info.ContainerStats {
+func newContainerStats(libcontainerStats *libcontainer.Stats, withPerCPU bool) *info.ContainerStats {
 	ret := &info.ContainerStats{
 		Timestamp: time.Now(),
 	}

 	if s := libcontainerStats.CgroupStats; s != nil {
-		setCpuStats(s, ret)
+		setCpuStats(s, ret, withPerCPU)
 		setDiskIoStats(s, ret)
 		setMemoryStats(s, ret)
 	}
--- a/vendor/github.com/google/cadvisor/metrics/prometheus.go
+++ b/vendor/github.com/google/cadvisor/metrics/prometheus.go
@@ -150,10 +150,18 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
 				},
 			}, {
 				name:        "container_cpu_usage_seconds_total",
-				help:        "Cumulative cpu time consumed per cpu in seconds.",
+				help:        "Cumulative cpu time consumed in seconds.",
 				valueType:   prometheus.CounterValue,
 				extraLabels: []string{"cpu"},
 				getValues: func(s *info.ContainerStats) metricValues {
+					if len(s.Cpu.Usage.PerCpu) == 0 {
+						if s.Cpu.Usage.Total > 0 {
+							return metricValues{{
+								value:  float64(s.Cpu.Usage.Total) / float64(time.Second),
+								labels: []string{"total"},
+							}}
+						}
+					}
 					values := make(metricValues, 0, len(s.Cpu.Usage.PerCpu))
 					for i, value := range s.Cpu.Usage.PerCpu {
 						if value > 0 {