Populate CRI filesystem info error

Usually we just log the error but since it's used by the GC we now
populate it up the call stack.

Signed-off-by: Sascha Grunert <sgrunert@redhat.com>
This commit is contained in:
Sascha Grunert 2023-10-16 12:40:52 +02:00
parent afc302c2d2
commit 39dcad8a19
No known key found for this signature in database
GPG Key ID: 09D97D153EF94D93
4 changed files with 52 additions and 21 deletions

View File

@ -204,7 +204,10 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd
} }
// Fill available stats for full set of required pod stats // Fill available stats for full set of required pod stats
cs := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage) cs, err := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage)
if err != nil {
return nil, fmt.Errorf("make container stats: %w", err)
}
p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID]) p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID])
p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
@ -249,7 +252,9 @@ func (p *criStatsProvider) listPodStatsStrictlyFromCRI(ctx context.Context, upda
continue continue
} }
ps := buildPodStats(podSandbox) ps := buildPodStats(podSandbox)
p.addCRIPodContainerStats(criSandboxStat, ps, fsIDtoInfo, containerMap, podSandbox, rootFsInfo, updateCPUNanoCoreUsage) if err := p.addCRIPodContainerStats(criSandboxStat, ps, fsIDtoInfo, containerMap, podSandbox, rootFsInfo, updateCPUNanoCoreUsage); err != nil {
return nil, fmt.Errorf("add CRI pod container stats: %w", err)
}
addCRIPodNetworkStats(ps, criSandboxStat) addCRIPodNetworkStats(ps, criSandboxStat)
addCRIPodCPUStats(ps, criSandboxStat) addCRIPodCPUStats(ps, criSandboxStat)
addCRIPodMemoryStats(ps, criSandboxStat) addCRIPodMemoryStats(ps, criSandboxStat)
@ -401,7 +406,10 @@ func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats,
if fs.InodesUsed != nil { if fs.InodesUsed != nil {
s.InodesUsed = &fs.InodesUsed.Value s.InodesUsed = &fs.InodesUsed.Value
} }
imageFsInfo := p.getFsInfo(fs.GetFsId()) imageFsInfo, err := p.getFsInfo(fs.GetFsId())
if err != nil {
return nil, fmt.Errorf("get filesystem info: %w", err)
}
if imageFsInfo != nil { if imageFsInfo != nil {
// The image filesystem id is unknown to the local node or there's // The image filesystem id is unknown to the local node or there's
// an error on retrieving the stats. In these cases, we omit those // an error on retrieving the stats. In these cases, we omit those
@ -423,7 +431,10 @@ func (p *criStatsProvider) ImageFsDevice(ctx context.Context) (string, error) {
return "", err return "", err
} }
for _, fs := range resp { for _, fs := range resp {
fsInfo := p.getFsInfo(fs.GetFsId()) fsInfo, err := p.getFsInfo(fs.GetFsId())
if err != nil {
return "", fmt.Errorf("get filesystem info: %w", err)
}
if fsInfo != nil { if fsInfo != nil {
return fsInfo.Device, nil return fsInfo.Device, nil
} }
@ -434,10 +445,10 @@ func (p *criStatsProvider) ImageFsDevice(ctx context.Context) (string, error) {
// getFsInfo returns the information of the filesystem with the specified // getFsInfo returns the information of the filesystem with the specified
// fsID. If any error occurs, this function logs the error and returns // fsID. If any error occurs, this function logs the error and returns
// nil. // nil.
func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) *cadvisorapiv2.FsInfo { func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) (*cadvisorapiv2.FsInfo, error) {
if fsID == nil { if fsID == nil {
klog.V(2).InfoS("Failed to get filesystem info: fsID is nil") klog.V(2).InfoS("Failed to get filesystem info: fsID is nil")
return nil return nil, nil
} }
mountpoint := fsID.GetMountpoint() mountpoint := fsID.GetMountpoint()
fsInfo, err := p.cadvisor.GetDirFsInfo(mountpoint) fsInfo, err := p.cadvisor.GetDirFsInfo(mountpoint)
@ -449,10 +460,11 @@ func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) *cad
klog.V(2).InfoS(msg, "mountpoint", mountpoint, "err", err) klog.V(2).InfoS(msg, "mountpoint", mountpoint, "err", err)
} else { } else {
klog.ErrorS(err, msg, "mountpoint", mountpoint) klog.ErrorS(err, msg, "mountpoint", mountpoint)
return nil, fmt.Errorf("%s: %w", msg, err)
} }
return nil return nil, nil
} }
return &fsInfo return &fsInfo, nil
} }
// buildPodStats returns a PodStats that identifies the Pod managing cinfo // buildPodStats returns a PodStats that identifies the Pod managing cinfo
@ -590,7 +602,7 @@ func (p *criStatsProvider) makeContainerStats(
fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
meta *runtimeapi.PodSandboxMetadata, meta *runtimeapi.PodSandboxMetadata,
updateCPUNanoCoreUsage bool, updateCPUNanoCoreUsage bool,
) *statsapi.ContainerStats { ) (*statsapi.ContainerStats, error) {
result := &statsapi.ContainerStats{ result := &statsapi.ContainerStats{
Name: stats.Attributes.Metadata.Name, Name: stats.Attributes.Metadata.Name,
// The StartTime in the summary API is the container creation time. // The StartTime in the summary API is the container creation time.
@ -652,10 +664,14 @@ func (p *criStatsProvider) makeContainerStats(
} }
} }
fsID := stats.GetWritableLayer().GetFsId() fsID := stats.GetWritableLayer().GetFsId()
var err error
if fsID != nil { if fsID != nil {
imageFsInfo, found := fsIDtoInfo[*fsID] imageFsInfo, found := fsIDtoInfo[*fsID]
if !found { if !found {
imageFsInfo = p.getFsInfo(fsID) imageFsInfo, err = p.getFsInfo(fsID)
if err != nil {
return nil, fmt.Errorf("get filesystem info: %w", err)
}
fsIDtoInfo[*fsID] = imageFsInfo fsIDtoInfo[*fsID] = imageFsInfo
} }
if imageFsInfo != nil { if imageFsInfo != nil {
@ -672,12 +688,11 @@ func (p *criStatsProvider) makeContainerStats(
// NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers // NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers
// using old log path, empty log stats are returned. This is fine, because we don't // using old log path, empty log stats are returned. This is fine, because we don't
// officially support in-place upgrade anyway. // officially support in-place upgrade anyway.
var err error
result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo) result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo)
if err != nil { if err != nil {
klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName()) klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName())
} }
return result return result, nil
} }
func (p *criStatsProvider) makeContainerCPUAndMemoryStats( func (p *criStatsProvider) makeContainerCPUAndMemoryStats(

View File

@ -20,6 +20,7 @@ limitations under the License.
package stats package stats
import ( import (
"fmt"
"time" "time"
cadvisorapiv2 "github.com/google/cadvisor/info/v2" cadvisorapiv2 "github.com/google/cadvisor/info/v2"
@ -32,17 +33,21 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po
ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
containerMap map[string]*runtimeapi.Container, containerMap map[string]*runtimeapi.Container,
podSandbox *runtimeapi.PodSandbox, podSandbox *runtimeapi.PodSandbox,
rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) { rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) error {
for _, criContainerStat := range criSandboxStat.Linux.Containers { for _, criContainerStat := range criSandboxStat.Linux.Containers {
container, found := containerMap[criContainerStat.Attributes.Id] container, found := containerMap[criContainerStat.Attributes.Id]
if !found { if !found {
continue continue
} }
// Fill available stats for full set of required pod stats // Fill available stats for full set of required pod stats
cs := p.makeContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), cs, err := p.makeContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(),
updateCPUNanoCoreUsage) updateCPUNanoCoreUsage)
if err != nil {
return fmt.Errorf("make container stats: %w", err)
}
ps.Containers = append(ps.Containers, *cs) ps.Containers = append(ps.Containers, *cs)
} }
return nil
} }
func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) {

View File

@ -35,7 +35,8 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po
ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, ps *statsapi.PodStats, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
containerMap map[string]*runtimeapi.Container, containerMap map[string]*runtimeapi.Container,
podSandbox *runtimeapi.PodSandbox, podSandbox *runtimeapi.PodSandbox,
rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) { rootFsInfo *cadvisorapiv2.FsInfo, updateCPUNanoCoreUsage bool) error {
return nil
} }
func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { func addCRIPodNetworkStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) {

View File

@ -20,6 +20,7 @@ limitations under the License.
package stats package stats
import ( import (
"fmt"
"time" "time"
"github.com/Microsoft/hcsshim" "github.com/Microsoft/hcsshim"
@ -86,16 +87,22 @@ func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.Po
containerMap map[string]*runtimeapi.Container, containerMap map[string]*runtimeapi.Container,
podSandbox *runtimeapi.PodSandbox, podSandbox *runtimeapi.PodSandbox,
rootFsInfo *cadvisorapiv2.FsInfo, rootFsInfo *cadvisorapiv2.FsInfo,
updateCPUNanoCoreUsage bool) { updateCPUNanoCoreUsage bool) error {
for _, criContainerStat := range criSandboxStat.Windows.Containers { for _, criContainerStat := range criSandboxStat.Windows.Containers {
container, found := containerMap[criContainerStat.Attributes.Id] container, found := containerMap[criContainerStat.Attributes.Id]
if !found { if !found {
continue continue
} }
// Fill available stats for full set of required pod stats // Fill available stats for full set of required pod stats
cs := p.makeWinContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata()) cs, err := p.makeWinContainerStats(criContainerStat, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata())
if err != nil {
return fmt.Errorf("make container stats: %w", err)
}
ps.Containers = append(ps.Containers, *cs) ps.Containers = append(ps.Containers, *cs)
} }
return nil
} }
func (p *criStatsProvider) makeWinContainerStats( func (p *criStatsProvider) makeWinContainerStats(
@ -103,7 +110,7 @@ func (p *criStatsProvider) makeWinContainerStats(
container *runtimeapi.Container, container *runtimeapi.Container,
rootFsInfo *cadvisorapiv2.FsInfo, rootFsInfo *cadvisorapiv2.FsInfo,
fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
meta *runtimeapi.PodSandboxMetadata) *statsapi.ContainerStats { meta *runtimeapi.PodSandboxMetadata) (*statsapi.ContainerStats, error) {
result := &statsapi.ContainerStats{ result := &statsapi.ContainerStats{
Name: stats.Attributes.Metadata.Name, Name: stats.Attributes.Metadata.Name,
// The StartTime in the summary API is the container creation time. // The StartTime in the summary API is the container creation time.
@ -149,11 +156,15 @@ func (p *criStatsProvider) makeWinContainerStats(
result.Rootfs.UsedBytes = &stats.WritableLayer.UsedBytes.Value result.Rootfs.UsedBytes = &stats.WritableLayer.UsedBytes.Value
} }
} }
var err error
fsID := stats.GetWritableLayer().GetFsId() fsID := stats.GetWritableLayer().GetFsId()
if fsID != nil { if fsID != nil {
imageFsInfo, found := fsIDtoInfo[*fsID] imageFsInfo, found := fsIDtoInfo[*fsID]
if !found { if !found {
imageFsInfo = p.getFsInfo(fsID) imageFsInfo, err = p.getFsInfo(fsID)
if err != nil {
return nil, fmt.Errorf("get filesystem info: %w", err)
}
fsIDtoInfo[*fsID] = imageFsInfo fsIDtoInfo[*fsID] = imageFsInfo
} }
if imageFsInfo != nil { if imageFsInfo != nil {
@ -168,12 +179,11 @@ func (p *criStatsProvider) makeWinContainerStats(
// NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers // NOTE: This doesn't support the old pod log path, `/var/log/pods/UID`. For containers
// using old log path, empty log stats are returned. This is fine, because we don't // using old log path, empty log stats are returned. This is fine, because we don't
// officially support in-place upgrade anyway. // officially support in-place upgrade anyway.
var err error
result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo) result.Logs, err = p.hostStatsProvider.getPodContainerLogStats(meta.GetNamespace(), meta.GetName(), types.UID(meta.GetUid()), container.GetMetadata().GetName(), rootFsInfo)
if err != nil { if err != nil {
klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName()) klog.ErrorS(err, "Unable to fetch container log stats", "containerName", container.GetMetadata().GetName())
} }
return result return result, nil
} }
// hcsStatsToNetworkStats converts hcsshim.Statistics.Network to statsapi.NetworkStats // hcsStatsToNetworkStats converts hcsshim.Statistics.Network to statsapi.NetworkStats