From 92a42e65ec2fad1ba636d6a53d682ad88c88526a Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Thu, 20 Mar 2025 19:27:49 +0000 Subject: [PATCH] Surface cadvisor PSI metrics to summary API and prometheus metrics --- pkg/kubelet/cadvisor/cadvisor_linux.go | 6 +++ pkg/kubelet/server/server.go | 5 ++ pkg/kubelet/server/stats/summary.go | 5 ++ pkg/kubelet/stats/cadvisor_stats_provider.go | 3 ++ pkg/kubelet/stats/helper.go | 49 ++++++++++++++++++++ 5 files changed, 68 insertions(+) diff --git a/pkg/kubelet/cadvisor/cadvisor_linux.go b/pkg/kubelet/cadvisor/cadvisor_linux.go index c2756c6f123..2120adfcf9a 100644 --- a/pkg/kubelet/cadvisor/cadvisor_linux.go +++ b/pkg/kubelet/cadvisor/cadvisor_linux.go @@ -39,7 +39,9 @@ import ( cadvisorapiv2 "github.com/google/cadvisor/info/v2" "github.com/google/cadvisor/manager" "github.com/google/cadvisor/utils/sysfs" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" "k8s.io/utils/ptr" ) @@ -93,6 +95,10 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [ cadvisormetrics.OOMMetrics: struct{}{}, } + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + includedMetrics[cadvisormetrics.PressureMetrics] = struct{}{} + } + if usingLegacyStats || localStorageCapacityIsolation { includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{} } diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index c88e1d60d81..83d344d17b5 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -455,6 +455,11 @@ func (s *Server) InstallAuthNotRequiredHandlers() { cadvisormetrics.ProcessMetrics: struct{}{}, cadvisormetrics.OOMMetrics: struct{}{}, } + + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + includedMetrics[cadvisormetrics.PressureMetrics] = struct{}{} + } + // cAdvisor metrics are exposed under the secured handler as well r := compbasemetrics.NewKubeRegistry() r.RawMustRegister(metrics.NewPrometheusMachineCollector(prometheusHostAdapter{s.host}, includedMetrics)) diff --git a/pkg/kubelet/server/stats/summary.go b/pkg/kubelet/server/stats/summary.go index afc2f475a65..0e21717ddd0 100644 --- a/pkg/kubelet/server/stats/summary.go +++ b/pkg/kubelet/server/stats/summary.go @@ -24,7 +24,9 @@ import ( "k8s.io/klog/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/util" ) @@ -113,6 +115,9 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat Rlimit: rlimit, SystemContainers: sp.GetSystemContainersStats(nodeConfig, podStats, updateStats), } + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + nodeStats.IO = rootStats.IO + } summary := statsapi.Summary{ Node: nodeStats, Pods: podStats, diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 7813efbd0bc..52ad55a8b1f 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -156,6 +156,9 @@ func (p *cadvisorStatsProvider) ListPodStats(ctx context.Context) ([]statsapi.Po podStats.CPU = cpu podStats.Memory = memory podStats.Swap = cadvisorInfoToSwapStats(podInfo) + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + podStats.IO = cadvisorInfoToIOStats(podInfo) + } // ProcessStats were accumulated as the containers were iterated. } diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index c51d6e4f276..8099be21e7e 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -24,8 +24,10 @@ import ( cadvisorapiv2 "github.com/google/cadvisor/info/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/server/stats" ) @@ -53,6 +55,9 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa } if cstat.Cpu != nil { cpuStats.UsageCoreNanoSeconds = &cstat.Cpu.Usage.Total + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + cpuStats.PSI = cadvisorPSIToStatsPSI(&cstat.Cpu.PSI) + } } } if info.Spec.HasMemory && cstat.Memory != nil { @@ -71,6 +76,9 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet memoryStats.AvailableBytes = &availableBytes } + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + memoryStats.PSI = cadvisorPSIToStatsPSI(&cstat.Memory.PSI) + } } else { memoryStats = &statsapi.MemoryStats{ Time: metav1.NewTime(cstat.Timestamp), @@ -96,6 +104,9 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo result.CPU = cpu result.Memory = memory result.Swap = cadvisorInfoToSwapStats(info) + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletPSI) { + result.IO = cadvisorInfoToIOStats(info) + } // NOTE: if they can be found, log stats will be overwritten // by the caller, as it knows more information about the pod, @@ -307,6 +318,24 @@ func cadvisorInfoToSwapStats(info *cadvisorapiv2.ContainerInfo) *statsapi.SwapSt return swapStats } +func cadvisorInfoToIOStats(info *cadvisorapiv2.ContainerInfo) *statsapi.IOStats { + cstat, found := latestContainerStats(info) + if !found { + return nil + } + + var ioStats *statsapi.IOStats + + if info.Spec.HasDiskIo && cstat.DiskIo != nil { + ioStats = &statsapi.IOStats{ + Time: metav1.NewTime(cstat.Timestamp), + PSI: cadvisorPSIToStatsPSI(&cstat.DiskIo.PSI), + } + } + + return ioStats +} + // latestContainerStats returns the latest container stats from cadvisor, or nil if none exist func latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) { stats := info.Stats @@ -493,3 +522,23 @@ func makePodStorageStats(s *statsapi.PodStats, rootFsInfo *cadvisorapiv2.FsInfo, } s.EphemeralStorage = calcEphemeralStorage(s.Containers, ephemeralStats, rootFsInfo, logStats, etcHostsStats, isCRIStatsProvider) } + +func cadvisorPSIToStatsPSI(psi *cadvisorapiv1.PSIStats) *statsapi.PSIStats { + if psi == nil { + return nil + } + return &statsapi.PSIStats{ + Full: statsapi.PSIData{ + Total: psi.Full.Total, + Avg10: psi.Full.Avg10, + Avg60: psi.Full.Avg60, + Avg300: psi.Full.Avg300, + }, + Some: statsapi.PSIData{ + Total: psi.Some.Total, + Avg10: psi.Some.Avg10, + Avg60: psi.Some.Avg60, + Avg300: psi.Some.Avg300, + }, + } +}