From 6bbaf8cb10459f15f61a26db035e79c7baf00437 Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Thu, 20 Mar 2025 19:28:45 +0000 Subject: [PATCH] Extend CRI stats provider to support PSI --- pkg/kubelet/stats/cri_stats_provider.go | 81 ++++++++++++++++++- pkg/kubelet/stats/cri_stats_provider_linux.go | 18 +++++ .../stats/cri_stats_provider_others.go | 3 + .../stats/cri_stats_provider_windows.go | 3 + 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index bb42e8470d4..3df4c988ebc 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -33,11 +33,13 @@ import ( "google.golang.org/grpc/status" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" "k8s.io/klog/v2" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" kubetypes "k8s.io/kubelet/pkg/types" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/server/stats" "k8s.io/utils/clock" @@ -211,6 +213,7 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID]) p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) + p.addIOStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) // If cadvisor stats is available for the container, use it to populate // container stats @@ -260,6 +263,7 @@ func (p *criStatsProvider) listPodStatsStrictlyFromCRI(ctx context.Context, upda addCRIPodCPUStats(ps, criSandboxStat) addCRIPodMemoryStats(ps, criSandboxStat) addCRIPodProcessStats(ps, criSandboxStat) + addCRIPodIOStats(ps, criSandboxStat) makePodStorageStats(ps, rootFsInfo, p.resourceAnalyzer, p.hostStatsProvider, true) summarySandboxStats = append(summarySandboxStats, *ps) } @@ -535,6 +539,7 @@ func (p *criStatsProvider) addPodCPUMemoryStats( usageNanoCores := getUint64Value(cs.CPU.UsageNanoCores) + getUint64Value(ps.CPU.UsageNanoCores) ps.CPU.UsageCoreNanoSeconds = &usageCoreNanoSeconds ps.CPU.UsageNanoCores = &usageNanoCores + // Pod level PSI stats cannot be calculated from container level } if cs.Memory != nil { @@ -555,6 +560,7 @@ func (p *criStatsProvider) addPodCPUMemoryStats( ps.Memory.RSSBytes = &rSSBytes ps.Memory.PageFaults = &pageFaults ps.Memory.MajorPageFaults = &majorPageFaults + // Pod level PSI stats cannot be calculated from container level } } @@ -564,14 +570,14 @@ func (p *criStatsProvider) addSwapStats( allInfos map[string]cadvisorapiv2.ContainerInfo, cs *statsapi.ContainerStats, ) { - // try get cpu and memory stats from cadvisor first. + // try get swap stats from cadvisor first. podCgroupInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos) if podCgroupInfo != nil { ps.Swap = cadvisorInfoToSwapStats(podCgroupInfo) return } - // Sum Pod cpu and memory stats from containers stats. + // Sum Pod swap stats from containers stats. if cs.Swap != nil { if ps.Swap == nil { ps.Swap = &statsapi.SwapStats{Time: cs.Swap.Time} @@ -583,6 +589,30 @@ func (p *criStatsProvider) addSwapStats( } } +func (p *criStatsProvider) addIOStats( + ps *statsapi.PodStats, + podUID types.UID, + allInfos map[string]cadvisorapiv2.ContainerInfo, + cs *statsapi.ContainerStats, +) { + if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + return + } + // try get IO stats from cadvisor first. + podCgroupInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos) + if podCgroupInfo != nil { + ps.IO = cadvisorInfoToIOStats(podCgroupInfo) + return + } + + if cs.IO != nil { + if ps.IO == nil { + ps.IO = &statsapi.IOStats{Time: cs.IO.Time} + } + // Pod level PSI stats cannot be calculated from container level + } +} + func (p *criStatsProvider) addProcessStats( ps *statsapi.PodStats, container *cadvisorapiv2.ContainerInfo, @@ -624,6 +654,7 @@ func (p *criStatsProvider) makeContainerStats( if usageNanoCores != nil { result.CPU.UsageNanoCores = usageNanoCores } + result.CPU.PSI = makePSIStats(stats.Cpu.Psi) } else { result.CPU.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano())) result.CPU.UsageCoreNanoSeconds = uint64Ptr(0) @@ -634,6 +665,7 @@ func (p *criStatsProvider) makeContainerStats( if stats.Memory.WorkingSetBytes != nil { result.Memory.WorkingSetBytes = &stats.Memory.WorkingSetBytes.Value } + result.Memory.PSI = makePSIStats(stats.Memory.Psi) } else { result.Memory.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano())) result.Memory.WorkingSetBytes = uint64Ptr(0) @@ -651,6 +683,15 @@ func (p *criStatsProvider) makeContainerStats( result.Swap.SwapUsageBytes = uint64Ptr(0) result.Swap.SwapAvailableBytes = uint64Ptr(0) } + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + result.IO = &statsapi.IOStats{} + if stats.Io != nil { + result.IO.Time = metav1.NewTime(time.Unix(0, stats.Io.Timestamp)) + result.IO.PSI = makePSIStats(stats.Io.Psi) + } else { + result.IO.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano())) + } + } if stats.WritableLayer != nil { result.Rootfs.Time = metav1.NewTime(time.Unix(0, stats.WritableLayer.Timestamp)) if stats.WritableLayer.UsedBytes != nil { @@ -714,6 +755,7 @@ func (p *criStatsProvider) makeContainerCPUAndMemoryStats( if usageNanoCores != nil { result.CPU.UsageNanoCores = usageNanoCores } + result.CPU.PSI = makePSIStats(stats.Cpu.Psi) } else { result.CPU.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano())) result.CPU.UsageCoreNanoSeconds = uint64Ptr(0) @@ -724,6 +766,7 @@ func (p *criStatsProvider) makeContainerCPUAndMemoryStats( if stats.Memory.WorkingSetBytes != nil { result.Memory.WorkingSetBytes = &stats.Memory.WorkingSetBytes.Value } + result.Memory.PSI = makePSIStats(stats.Memory.Psi) } else { result.Memory.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano())) result.Memory.WorkingSetBytes = uint64Ptr(0) @@ -732,6 +775,33 @@ func (p *criStatsProvider) makeContainerCPUAndMemoryStats( return result } +func makePSIStats(stats *runtimeapi.PsiStats) *statsapi.PSIStats { + if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + return nil + } + if stats == nil { + return nil + } + result := &statsapi.PSIStats{} + if stats.Full != nil { + result.Full = statsapi.PSIData{ + Total: stats.Full.Total, + Avg10: stats.Full.Avg10, + Avg60: stats.Full.Avg60, + Avg300: stats.Full.Avg300, + } + } + if stats.Some != nil { + result.Some = statsapi.PSIData{ + Total: stats.Some.Total, + Avg10: stats.Some.Avg10, + Avg60: stats.Some.Avg60, + Avg300: stats.Some.Avg300, + } + } + return result +} + // getContainerUsageNanoCores first attempts to get the usage nano cores from the stats reported // by the CRI. If it is unable to, it gets the information from the cache instead. func (p *criStatsProvider) getContainerUsageNanoCores(stats *runtimeapi.ContainerStats) *uint64 { @@ -910,6 +980,13 @@ func (p *criStatsProvider) addCadvisorContainerStats( if swap != nil { cs.Swap = swap } + + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + io := cadvisorInfoToIOStats(caPodStats) + if io != nil { + cs.IO = io + } + } } func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats( diff --git a/pkg/kubelet/stats/cri_stats_provider_linux.go b/pkg/kubelet/stats/cri_stats_provider_linux.go index 9d8a0e479fa..953ce522c8d 100644 --- a/pkg/kubelet/stats/cri_stats_provider_linux.go +++ b/pkg/kubelet/stats/cri_stats_provider_linux.go @@ -25,8 +25,10 @@ import ( cadvisorapiv2 "github.com/google/cadvisor/info/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/features" ) func (p *criStatsProvider) addCRIPodContainerStats(criSandboxStat *runtimeapi.PodSandboxStats, @@ -79,6 +81,7 @@ func addCRIPodMemoryStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandb RSSBytes: valueOfUInt64Value(criMemory.RssBytes), PageFaults: valueOfUInt64Value(criMemory.PageFaults), MajorPageFaults: valueOfUInt64Value(criMemory.MajorPageFaults), + PSI: makePSIStats(criMemory.Psi), } } @@ -91,6 +94,21 @@ func addCRIPodCPUStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxS Time: metav1.NewTime(time.Unix(0, criCPU.Timestamp)), UsageNanoCores: valueOfUInt64Value(criCPU.UsageNanoCores), UsageCoreNanoSeconds: valueOfUInt64Value(criCPU.UsageCoreNanoSeconds), + PSI: makePSIStats(criCPU.Psi), + } +} + +func addCRIPodIOStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { + if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + return + } + if criPodStat == nil || criPodStat.Linux == nil || criPodStat.Linux.Io == nil { + return + } + criIO := criPodStat.Linux.Io + ps.IO = &statsapi.IOStats{ + Time: metav1.NewTime(time.Unix(0, criIO.Timestamp)), + PSI: makePSIStats(criIO.Psi), } } diff --git a/pkg/kubelet/stats/cri_stats_provider_others.go b/pkg/kubelet/stats/cri_stats_provider_others.go index 55ed8e7a20b..6d4a6ede772 100644 --- a/pkg/kubelet/stats/cri_stats_provider_others.go +++ b/pkg/kubelet/stats/cri_stats_provider_others.go @@ -50,3 +50,6 @@ func addCRIPodCPUStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxS func addCRIPodProcessStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { } + +func addCRIPodIOStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { +} diff --git a/pkg/kubelet/stats/cri_stats_provider_windows.go b/pkg/kubelet/stats/cri_stats_provider_windows.go index 16788a9092a..33c3f6ccd8b 100644 --- a/pkg/kubelet/stats/cri_stats_provider_windows.go +++ b/pkg/kubelet/stats/cri_stats_provider_windows.go @@ -236,6 +236,9 @@ func addCRIPodMemoryStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandb } } +func addCRIPodIOStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { +} + func addCRIPodProcessStats(ps *statsapi.PodStats, criPodStat *runtimeapi.PodSandboxStats) { if criPodStat == nil || criPodStat.Windows == nil || criPodStat.Windows.Process == nil { return