From d29bdab95104fbe7f5eee7445338a8708e791d97 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 24 Jun 2024 12:25:03 +0800 Subject: [PATCH] feat(kubelet/stats): match cadvisor error to lower not found stats log level This "RecentStats: unable to find data in memory cache" error is not actionable, in terms of kubelt, if the entry is not found in the memory cache. Thus, proposing it to lower the log level to info. Signed-off-by: Gyuho Lee --- pkg/kubelet/server/stats/summary_sys_containers.go | 9 ++++++++- pkg/kubelet/stats/cadvisor_stats_provider.go | 2 +- pkg/kubelet/stats/helper.go | 2 +- pkg/kubelet/stats/provider.go | 8 ++++++++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/server/stats/summary_sys_containers.go b/pkg/kubelet/server/stats/summary_sys_containers.go index 3edbffa5836..380a6987dcb 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers.go +++ b/pkg/kubelet/server/stats/summary_sys_containers.go @@ -20,8 +20,11 @@ limitations under the License. package stats import ( + "errors" + "k8s.io/klog/v2" + cadvisormemory "github.com/google/cadvisor/cache/memory" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" @@ -80,7 +83,11 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c } s, err := sp.provider.GetCgroupCPUAndMemoryStats(cont.name, cont.forceStatsUpdate) if err != nil { - klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + klog.InfoS("cgroup stats not found in memory cache", "containerName", cont.name) + } else { + klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) + } continue } s.Name = sys diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 1fdf6ddfda4..e46a073a718 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -499,7 +499,7 @@ func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.C // response. klog.ErrorS(err, "Partial failure issuing cadvisor.ContainerInfoV2") } else { - return nil, fmt.Errorf("failed to get root cgroup stats: %v", err) + return nil, fmt.Errorf("failed to get root cgroup stats: %w", err) } } return infos, nil diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index c6ca3a064e0..36e8a3030cb 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -318,7 +318,7 @@ func getCgroupInfo(cadvisor cadvisor.Interface, containerName string, updateStat MaxAge: maxAge, }) if err != nil { - return nil, fmt.Errorf("failed to get container info for %q: %v", containerName, err) + return nil, fmt.Errorf("failed to get container info for %q: %w", containerName, err) } if len(infoMap) != 1 { return nil, fmt.Errorf("unexpected number of containers: %v", len(infoMap)) diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index dc51c1b25f5..d3a63d37bf7 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -18,8 +18,10 @@ package stats import ( "context" + "errors" "fmt" + cadvisormemory "github.com/google/cadvisor/cache/memory" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" internalapi "k8s.io/cri-api/pkg/apis" @@ -113,6 +115,9 @@ func (p *Provider) RlimitStats() (*statsapi.RlimitStats, error) { func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + return nil, nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) + } return nil, nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err) } // Rootfs and imagefs doesn't make sense for raw cgroup. @@ -126,6 +131,9 @@ func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsap func (p *Provider) GetCgroupCPUAndMemoryStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + return nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) + } return nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err) } // Rootfs and imagefs doesn't make sense for raw cgroup.