From 39dcad8a19ea125eda64a0425ec5d84765f26d41 Mon Sep 17 00:00:00 2001 From: Sascha Grunert Date: Mon, 16 Oct 2023 12:40:52 +0200 Subject: [PATCH] Populate CRI filesystem info error Usually we just log the error but since it's used by the GC we now populate it up the call stack. Signed-off-by: Sascha Grunert --- pkg/kubelet/stats/cri_stats_provider.go | 39 +++++++++++++------ pkg/kubelet/stats/cri_stats_provider_linux.go | 9 ++++- .../stats/cri_stats_provider_others.go | 3 +- .../stats/cri_stats_provider_windows.go | 22 ++++++++--- 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index b294ba75e83..affb2092d17 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -204,7 +204,10 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd } // Fill available stats for full set of required pod stats - cs := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage) + cs, err := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage) + if err != nil { + return nil, fmt.Errorf("make container stats: %w", err) + } p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID]) p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) @@ -249,7 +252,9 @@ func (p *criStatsProvider) listPodStatsStrictlyFromCRI(ctx context.Context, upda continue } ps := buildPodStats(podSandbox) - p.addCRIPodContainerStats(criSandboxStat, ps, fsIDtoInfo, containerMap, podSandbox, rootFsInfo, updateCPUNanoCoreUsage) + if err := p.addCRIPodContainerStats(criSandboxStat, ps, fsIDtoInfo, containerMap, podSandbox, rootFsInfo, updateCPUNanoCoreUsage); err != nil { + return nil, fmt.Errorf("add CRI pod container stats: %w", err) + } addCRIPodNetworkStats(ps, criSandboxStat) addCRIPodCPUStats(ps, criSandboxStat) addCRIPodMemoryStats(ps, criSandboxStat) @@ -401,7 +406,10 @@ func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats, if fs.InodesUsed != nil { s.InodesUsed = &fs.InodesUsed.Value } - imageFsInfo := p.getFsInfo(fs.GetFsId()) + imageFsInfo, err := p.getFsInfo(fs.GetFsId()) + if err != nil { + return nil, fmt.Errorf("get filesystem info: %w", err) + } if imageFsInfo != nil { // The image filesystem id is unknown to the local node or there's // an error on retrieving the stats. In these cases, we omit those @@ -423,7 +431,10 @@ func (p *criStatsProvider) ImageFsDevice(ctx context.Context) (string, error) { return "", err } for _, fs := range resp { - fsInfo := p.getFsInfo(fs.GetFsId()) + fsInfo, err := p.getFsInfo(fs.GetFsId()) + if err != nil { + return "", fmt.Errorf("get filesystem info: %w", err) + } if fsInfo != nil { return fsInfo.Device, nil } @@ -434,10 +445,10 @@ func (p *criStatsProvider) ImageFsDevice(ctx context.Context) (string, error) { // getFsInfo returns the information of the filesystem with the specified // fsID. If any error occurs, this function logs the error and returns // nil. -func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) *cadvisorapiv2.FsInfo { +func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) (*cadvisorapiv2.FsInfo, error) { if fsID == nil { klog.V(2).InfoS("Failed to get filesystem info: fsID is nil") - return nil + return nil, nil } mountpoint := fsID.GetMountpoint() fsInfo, err := p.cadvisor.GetDirFsInfo(mountpoint) @@ -449,10 +460,11 @@ func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) *cad klog.V(2).InfoS(msg, "mountpoint", mountpoint, "err", err) } else { klog.ErrorS(err, msg, "mountpoint", mountpoint) + return nil, fmt.Errorf("%s: %w", msg, err) } - return nil + return nil, nil } - return &fsInfo + return &fsInfo, nil } // buildPodStats returns a PodStats that identifies the Pod managing cinfo @@ -590,7 +602,7 @@ func (p *criStatsProvider) makeContainerStats( fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, meta *runtimeapi.PodSandboxMetadata, updateCPUNanoCoreUsage bool, -) *statsapi.ContainerStats { +) (*statsapi.ContainerStats, error) { result := &statsapi.ContainerStats{ Name: stats.Attributes.Metadata.Name, // The StartTime in the summary API is the container creation time. @@ -652,10 +664,14 @@ func (p *criStatsProvider) makeContainerStats( } } fsID := stats.GetWritableLayer().GetFsId() + var err error if fsID != nil { imageFsInfo, found := fsIDtoInfo[*fsID] if !found { - imageFsInfo = p.getFsInfo(fsID) + imageFsInfo, err = p.getFsInfo(fsID) + if err != nil { + return nil, fmt.Errorf("get filesystem info: %w", err) + } fsIDtoInfo[*fsID] = imageFsInfo } if imageFsInfo != nil { @@ -672,12 +688,11 @@ func (p *criStatsProvider) makeContainerStats( // NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers // using old log path, empty log stats are returned. This is fine, because we don't // officially support in-place upgrade anyway. - var err error result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo) if err != nil { klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName()) } - return result + return result, nil } func (p *criStatsProvider) makeContainerCPUAndMemoryStats( diff --git a/pkg/kubelet/stats/cri_stats_provider_linux.go b/pkg/kubelet/stats/cri_stats_provider_linux.go index 2bfa0fd48f3..9d8a0e479fa 100644 --- a/pkg/kubelet/stats/cri_stats_provider_linux.go +++ b/pkg/kubelet/stats/cri_stats_provider_linux.go @@ -20,6 +20,7 @@ limitations under the License. package stats import ( + "fmt" "time" cadvisorapiv2 "github.com/google/cadvisor/info/v2" @@ -32,17 +33,21 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, containerMap map[string]*runtimeapi.Container, podSandbox *runtimeapi.PodSandbox, - rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) { + rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) error { for _, criContainerStat := range criSandboxStat.Linux.Containers { container, found := containerMap[criContainerStat.Attributes.Id] if !found { continue } // Fill available stats for full set of required pod stats - cs := p.makeContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), + cs, err := p.makeContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage) + if err != nil { + return fmt.Errorf("make container stats: %w", err) + } ps.Containers = append(ps.Containers, *cs) } + return nil } func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { diff --git a/pkg/kubelet/stats/cri_stats_provider_others.go b/pkg/kubelet/stats/cri_stats_provider_others.go index fb18e89f16c..55ed8e7a20b 100644 --- a/pkg/kubelet/stats/cri_stats_provider_others.go +++ b/pkg/kubelet/stats/cri_stats_provider_others.go @@ -35,7 +35,8 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, containerMap map[string]*runtimeapi.Container, podSandbox *runtimeapi.PodSandbox, - rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) { + rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) error { + return nil } func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { diff --git a/pkg/kubelet/stats/cri_stats_provider_windows.go b/pkg/kubelet/stats/cri_stats_provider_windows.go index e64da34c4db..0ea6a864103 100644 --- a/pkg/kubelet/stats/cri_stats_provider_windows.go +++ b/pkg/kubelet/stats/cri_stats_provider_windows.go @@ -20,6 +20,7 @@ limitations under the License. package stats import ( + "fmt" "time" "github.com/Microsoft/hcsshim" @@ -86,16 +87,22 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po containerMap map[string]*runtimeapi.Container, podSandbox *runtimeapi.PodSandbox, rootFsInfo *cadvisorapiv2.FsInfo, - updateCPUNanoCoreUsage bool) { + updateCPUNanoCoreUsage bool) error { for _, criContainerStat := range criSandboxStat.Windows.Containers { container, found := containerMap[criContainerStat.Attributes.Id] if !found { continue } // Fill available stats for full set of required pod stats - cs := p.makeWinContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata()) + cs, err := p.makeWinContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata()) + if err != nil { + return fmt.Errorf("make container stats: %w", err) + + } ps.Containers = append(ps.Containers, *cs) } + + return nil } func (p *criStatsProvider) makeWinContainerStats( @@ -103,7 +110,7 @@ func (p *criStatsProvider) makeWinContainerStats( container *runtimeapi.Container, rootFsInfo *cadvisorapiv2.FsInfo, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, - meta *runtimeapi.PodSandboxMetadata) *statsapi.ContainerStats { + meta *runtimeapi.PodSandboxMetadata) (*statsapi.ContainerStats, error) { result := &statsapi.ContainerStats{ Name: stats.Attributes.Metadata.Name, // The StartTime in the summary API is the container creation time. @@ -149,11 +156,15 @@ func (p *criStatsProvider) makeWinContainerStats( result.Rootfs.UsedBytes = &stats.WritableLayer.UsedBytes.Value } } + var err error fsID := stats.GetWritableLayer().GetFsId() if fsID != nil { imageFsInfo, found := fsIDtoInfo[*fsID] if !found { - imageFsInfo = p.getFsInfo(fsID) + imageFsInfo, err = p.getFsInfo(fsID) + if err != nil { + return nil, fmt.Errorf("get filesystem info: %w", err) + } fsIDtoInfo[*fsID] = imageFsInfo } if imageFsInfo != nil { @@ -168,12 +179,11 @@ func (p *criStatsProvider) makeWinContainerStats( // NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers // using old log path, empty log stats are returned. This is fine, because we don't // officially support in-place upgrade anyway. - var err error result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo) if err != nil { klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName()) } - return result + return result, nil } // hcsStatsToNetworkStats converts hcsshim.Statistics.Network to statsapi.NetworkStats