From d29bdab95104fbe7f5eee7445338a8708e791d97 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Mon, 24 Jun 2024 12:25:03 +0800 Subject: [PATCH 1/3] feat(kubelet/stats): match cadvisor error to lower not found stats log level This "RecentStats: unable to find data in memory cache" error is not actionable, in terms of kubelt, if the entry is not found in the memory cache. Thus, proposing it to lower the log level to info. Signed-off-by: Gyuho Lee --- pkg/kubelet/server/stats/summary_sys_containers.go | 9 ++++++++- pkg/kubelet/stats/cadvisor_stats_provider.go | 2 +- pkg/kubelet/stats/helper.go | 2 +- pkg/kubelet/stats/provider.go | 8 ++++++++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/server/stats/summary_sys_containers.go b/pkg/kubelet/server/stats/summary_sys_containers.go index 3edbffa5836..380a6987dcb 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers.go +++ b/pkg/kubelet/server/stats/summary_sys_containers.go @@ -20,8 +20,11 @@ limitations under the License. package stats import ( + "errors" + "k8s.io/klog/v2" + cadvisormemory "github.com/google/cadvisor/cache/memory" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" @@ -80,7 +83,11 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c } s, err := sp.provider.GetCgroupCPUAndMemoryStats(cont.name, cont.forceStatsUpdate) if err != nil { - klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + klog.InfoS("cgroup stats not found in memory cache", "containerName", cont.name) + } else { + klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) + } continue } s.Name = sys diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 1fdf6ddfda4..e46a073a718 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -499,7 +499,7 @@ func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.C // response. klog.ErrorS(err, "Partial failure issuing cadvisor.ContainerInfoV2") } else { - return nil, fmt.Errorf("failed to get root cgroup stats: %v", err) + return nil, fmt.Errorf("failed to get root cgroup stats: %w", err) } } return infos, nil diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index c6ca3a064e0..36e8a3030cb 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -318,7 +318,7 @@ func getCgroupInfo(cadvisor cadvisor.Interface, containerName string, updateStat MaxAge: maxAge, }) if err != nil { - return nil, fmt.Errorf("failed to get container info for %q: %v", containerName, err) + return nil, fmt.Errorf("failed to get container info for %q: %w", containerName, err) } if len(infoMap) != 1 { return nil, fmt.Errorf("unexpected number of containers: %v", len(infoMap)) diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index dc51c1b25f5..d3a63d37bf7 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -18,8 +18,10 @@ package stats import ( "context" + "errors" "fmt" + cadvisormemory "github.com/google/cadvisor/cache/memory" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" internalapi "k8s.io/cri-api/pkg/apis" @@ -113,6 +115,9 @@ func (p *Provider) RlimitStats() (*statsapi.RlimitStats, error) { func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + return nil, nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) + } return nil, nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err) } // Rootfs and imagefs doesn't make sense for raw cgroup. @@ -126,6 +131,9 @@ func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsap func (p *Provider) GetCgroupCPUAndMemoryStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { + if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + return nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) + } return nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err) } // Rootfs and imagefs doesn't make sense for raw cgroup. From 1e3dc23e16851db126ea2ec75973a5febf599fc2 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Thu, 27 Jun 2024 00:13:45 +0800 Subject: [PATCH 2/3] v4 logging Signed-off-by: Gyuho Lee --- pkg/kubelet/server/stats/summary_sys_containers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/server/stats/summary_sys_containers.go b/pkg/kubelet/server/stats/summary_sys_containers.go index 380a6987dcb..04f141dae00 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers.go +++ b/pkg/kubelet/server/stats/summary_sys_containers.go @@ -84,7 +84,7 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c s, err := sp.provider.GetCgroupCPUAndMemoryStats(cont.name, cont.forceStatsUpdate) if err != nil { if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { - klog.InfoS("cgroup stats not found in memory cache", "containerName", cont.name) + klog.V(4).InfoS("cgroup stats not found in memory cache", "containerName", cont.name) } else { klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) } From ac992f9a925b16020a0da01186cc71668a93fd6e Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 3 Jul 2024 21:31:57 +0800 Subject: [PATCH 3/3] remove unncessary errors.Unwrap calls Signed-off-by: Gyuho Lee --- pkg/kubelet/server/stats/summary_sys_containers.go | 2 +- pkg/kubelet/stats/provider.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/server/stats/summary_sys_containers.go b/pkg/kubelet/server/stats/summary_sys_containers.go index 04f141dae00..35d1135c630 100644 --- a/pkg/kubelet/server/stats/summary_sys_containers.go +++ b/pkg/kubelet/server/stats/summary_sys_containers.go @@ -83,7 +83,7 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c } s, err := sp.provider.GetCgroupCPUAndMemoryStats(cont.name, cont.forceStatsUpdate) if err != nil { - if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + if errors.Is(err, cadvisormemory.ErrDataNotFound) { klog.V(4).InfoS("cgroup stats not found in memory cache", "containerName", cont.name) } else { klog.ErrorS(err, "Failed to get system container stats", "containerName", cont.name) diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index d3a63d37bf7..4b747c42a0c 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -115,7 +115,7 @@ func (p *Provider) RlimitStats() (*statsapi.RlimitStats, error) { func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { - if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + if errors.Is(err, cadvisormemory.ErrDataNotFound) { return nil, nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) } return nil, nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err) @@ -131,7 +131,7 @@ func (p *Provider) GetCgroupStats(cgroupName string, updateStats bool) (*statsap func (p *Provider) GetCgroupCPUAndMemoryStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, error) { info, err := getCgroupInfo(p.cadvisor, cgroupName, updateStats) if err != nil { - if errors.Is(errors.Unwrap(err), cadvisormemory.ErrDataNotFound) { + if errors.Is(err, cadvisormemory.ErrDataNotFound) { return nil, fmt.Errorf("cgroup stats not found for %q: %w", cgroupName, cadvisormemory.ErrDataNotFound) } return nil, fmt.Errorf("failed to get cgroup stats for %q: %v", cgroupName, err)