From f58b46cb97ff9007e4d7f5cf9994675d79060a0b Mon Sep 17 00:00:00 2001 From: David Porter Date: Fri, 13 Jan 2023 15:31:23 -0800 Subject: [PATCH 1/5] fix process stats Signed-off-by: David Porter --- pkg/kubelet/stats/cri_stats_provider.go | 17 ++++------ pkg/kubelet/stats/helper.go | 25 ++++++++++++++ pkg/kubelet/stats/helper_test.go | 43 +++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 243e8eeb590..0ecbc1d53c9 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -211,7 +211,6 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID]) p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) - p.addProcessStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) // If cadvisor stats is available for the container, use it to populate // container stats @@ -220,7 +219,9 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd klog.V(5).InfoS("Unable to find cadvisor stats for container", "containerID", containerID) } else { p.addCadvisorContainerStats(cs, &caStats) + p.addProcessStats(ps, &caStats) } + ps.Containers = append(ps.Containers, *cs) } // cleanup outdated caches. @@ -584,16 +585,12 @@ func (p *criStatsProvider) addSwapStats( func (p *criStatsProvider) addProcessStats( ps *statsapi.PodStats, - podUID types.UID, - allInfos map[string]cadvisorapiv2.ContainerInfo, - cs *statsapi.ContainerStats, + container *cadvisorapiv2.ContainerInfo, ) { - // try get process stats from cadvisor only. - info := getCadvisorPodInfoFromPodUID(podUID, allInfos) - if info != nil { - ps.ProcessStats = cadvisorInfoToProcessStats(info) - return - } + processStats := cadvisorInfoToProcessStats(container) + // Sum up all of the process stats for each of the containers to obtain the cumulative pod level process count + ps.ProcessStats = mergeProcessStats(ps.ProcessStats, processStats) + return } func (p *criStatsProvider) makeContainerStats( diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index c6ca3a064e0..27882df9359 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -168,6 +168,31 @@ func cadvisorInfoToProcessStats(info *cadvisorapiv2.ContainerInfo) *statsapi.Pro return &statsapi.ProcessStats{ProcessCount: uint64Ptr(num)} } +func mergeProcessStats(first *statsapi.ProcessStats, second *statsapi.ProcessStats) *statsapi.ProcessStats { + if first == nil && second == nil { + return nil + } + + if first == nil { + return second + } + if second == nil { + return first + } + + firstProcessCount := uint64(0) + if first.ProcessCount != nil { + firstProcessCount = *first.ProcessCount + } + + secondProcessCount := uint64(0) + if second.ProcessCount != nil { + secondProcessCount = *second.ProcessCount + } + + return &statsapi.ProcessStats{ProcessCount: uint64Ptr(firstProcessCount + secondProcessCount)} +} + // cadvisorInfoToNetworkStats returns the statsapi.NetworkStats converted from // the container info from cadvisor. func cadvisorInfoToNetworkStats(info *cadvisorapiv2.ContainerInfo) *statsapi.NetworkStats { diff --git a/pkg/kubelet/stats/helper_test.go b/pkg/kubelet/stats/helper_test.go index 38486bd9f85..050a8102c9c 100644 --- a/pkg/kubelet/stats/helper_test.go +++ b/pkg/kubelet/stats/helper_test.go @@ -22,10 +22,12 @@ import ( cadvisorapiv1 "github.com/google/cadvisor/info/v1" cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/utils/pointer" ) func TestCustomMetrics(t *testing.T) { @@ -97,3 +99,44 @@ func TestCustomMetrics(t *testing.T) { Value: 2.1, }) } + +func TestMergeProcessStats(t *testing.T) { + for _, tc := range []struct { + desc string + first *statsapi.ProcessStats + second *statsapi.ProcessStats + expected *statsapi.ProcessStats + }{ + { + desc: "both nil", + first: nil, + second: nil, + expected: nil, + }, + { + desc: "first non-nil, second not", + first: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + second: nil, + expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + }, + { + desc: "first nil, second non-nil", + first: nil, + second: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + }, + { + desc: "both non nill", + first: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + second: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(200)}, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + got := mergeProcessStats(tc.first, tc.second) + if diff := cmp.Diff(tc.expected, got); diff != "" { + t.Fatalf("Unexpected diff on process stats (-want,+got):\n%s", diff) + } + }) + } +} From 6e6b2b76a30936e99a8379d24db006b984a154eb Mon Sep 17 00:00:00 2001 From: David Porter Date: Thu, 19 Jan 2023 15:15:24 -0800 Subject: [PATCH 2/5] test: Update summary test to check for process count The process count is expected to always be >= 1 for pods in the test. Let's check it's >= 1, so we can catch issues if the proecss count is not reported. Signed-off-by: David Porter Signed-off-by: Paco Xu --- pkg/kubelet/stats/cri_stats_provider.go | 1 - pkg/kubelet/stats/helper_test.go | 16 ++++++++-------- test/e2e_node/summary_test.go | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 0ecbc1d53c9..9d8091e62c1 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -590,7 +590,6 @@ func (p *criStatsProvider) addProcessStats( processStats := cadvisorInfoToProcessStats(container) // Sum up all of the process stats for each of the containers to obtain the cumulative pod level process count ps.ProcessStats = mergeProcessStats(ps.ProcessStats, processStats) - return } func (p *criStatsProvider) makeContainerStats( diff --git a/pkg/kubelet/stats/helper_test.go b/pkg/kubelet/stats/helper_test.go index 050a8102c9c..d0bc23b69fa 100644 --- a/pkg/kubelet/stats/helper_test.go +++ b/pkg/kubelet/stats/helper_test.go @@ -27,7 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" ) func TestCustomMetrics(t *testing.T) { @@ -115,21 +115,21 @@ func TestMergeProcessStats(t *testing.T) { }, { desc: "first non-nil, second not", - first: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, second: nil, - expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, }, { desc: "first nil, second non-nil", first: nil, - second: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, - expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, + second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, }, { desc: "both non nill", - first: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, - second: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(100)}, - expected: &statsapi.ProcessStats{ProcessCount: pointer.Uint64(200)}, + first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](200)}, }, } { t.Run(tc.desc, func(t *testing.T) { diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 948d3605cf5..77c2fa394a6 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -259,7 +259,7 @@ var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() { "InodesUsed": bounded(0, 1e8), }), "ProcessStats": ptrMatchAllFields(gstruct.Fields{ - "ProcessCount": bounded(0, 1e8), + "ProcessCount": bounded(1, 1e8), }), }) From 7d8ba7849b1f50a9fb5042f085b9490e374732e8 Mon Sep 17 00:00:00 2001 From: Kevin Hannon Date: Mon, 12 Feb 2024 12:03:57 -0500 Subject: [PATCH 3/5] priority pid tests should match on processes pids 0 process should not be nonzero --- pkg/kubelet/eviction/helpers.go | 1 - test/e2e_node/eviction_test.go | 7 ++++--- test/e2e_node/summary_test.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 8050613e672..8c57bc2d25d 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -738,7 +738,6 @@ func process(stats statsFunc) cmpFunc { p1Process := processUsage(p1Stats.ProcessStats) p2Process := processUsage(p2Stats.ProcessStats) - // prioritize evicting the pod which has the larger consumption of process return int(p2Process - p1Process) } } diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 2f3375e3e65..c518fdb42cb 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -475,6 +475,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo highPriorityClassName := f.BaseName + "-high-priority" highPriority := int32(999999999) + processes := 30000 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { @@ -497,7 +498,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo specs := []podEvictSpec{ { evictionPriority: 2, - pod: pidConsumingPod("fork-bomb-container-with-low-priority", 12000), + pod: pidConsumingPod("fork-bomb-container-with-low-priority", processes), }, { evictionPriority: 0, @@ -505,7 +506,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo }, { evictionPriority: 1, - pod: pidConsumingPod("fork-bomb-container-with-high-priority", 12000), + pod: pidConsumingPod("fork-bomb-container-with-high-priority", processes), }, } specs[1].pod.Spec.PriorityClassName = highPriorityClassName @@ -524,7 +525,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo specs := []podEvictSpec{ { evictionPriority: 1, - pod: pidConsumingPod("fork-bomb-container", 30000), + pod: pidConsumingPod("fork-bomb-container", processes), wantPodDisruptionCondition: ptr.To(v1.DisruptionTarget), }, } diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 77c2fa394a6..948d3605cf5 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -259,7 +259,7 @@ var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() { "InodesUsed": bounded(0, 1e8), }), "ProcessStats": ptrMatchAllFields(gstruct.Fields{ - "ProcessCount": bounded(1, 1e8), + "ProcessCount": bounded(0, 1e8), }), }) From 5fd7219cf494df0f30c331b5395ff6b108f1fe73 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Thu, 28 Mar 2024 12:41:54 -0400 Subject: [PATCH 4/5] kubelet/stats: fix pid stats for cadvisor stats provider the process stats aren't correct coming from only the pod stats. They need to be summed for all of the containers, as cadvisor is only reading per pid (per container process) Signed-off-by: Peter Hunt --- pkg/kubelet/stats/cadvisor_stats_provider.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 1fdf6ddfda4..6413e871d5f 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -139,6 +139,8 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS } podStats.Containers = append(podStats.Containers, *containerStat) } + // Either way, collect process stats + podStats.ProcessStats = mergeProcessStats(podStats.ProcessStats, cadvisorInfoToProcessStats(&cinfo)) } // Add each PodStats to the result. @@ -154,7 +156,7 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS podStats.CPU = cpu podStats.Memory = memory podStats.Swap = cadvisorInfoToSwapStats(podInfo) - podStats.ProcessStats = cadvisorInfoToProcessStats(podInfo) + // ProcessStats were accumulated as the containers were iterated. } status, found := p.statusProvider.GetPodStatus(podUID) From 0979ba9cb8c48a073f354f5ed49d29b4e803bc3a Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Thu, 28 Mar 2024 13:22:10 -0400 Subject: [PATCH 5/5] kubelet/stats: verify there is at least one process in each container 0 processes is too low a bar to be meaningfully testing that the process stats are being reported. Signed-off-by: Peter Hunt --- test/e2e_node/summary_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 948d3605cf5..77c2fa394a6 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -259,7 +259,7 @@ var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() { "InodesUsed": bounded(0, 1e8), }), "ProcessStats": ptrMatchAllFields(gstruct.Fields{ - "ProcessCount": bounded(0, 1e8), + "ProcessCount": bounded(1, 1e8), }), })