diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 8050613e672..8c57bc2d25d 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -738,7 +738,6 @@ func process(stats statsFunc) cmpFunc { p1Process := processUsage(p1Stats.ProcessStats) p2Process := processUsage(p2Stats.ProcessStats) - // prioritize evicting the pod which has the larger consumption of process return int(p2Process - p1Process) } } diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index b2330814de2..25f8683d1c1 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -139,6 +139,8 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS } podStats.Containers = append(podStats.Containers, *containerStat) } + // Either way, collect process stats + podStats.ProcessStats = mergeProcessStats(podStats.ProcessStats, cadvisorInfoToProcessStats(&cinfo)) } // Add each PodStats to the result. @@ -154,7 +156,7 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS podStats.CPU = cpu podStats.Memory = memory podStats.Swap = cadvisorInfoToSwapStats(podInfo) - podStats.ProcessStats = cadvisorInfoToProcessStats(podInfo) + // ProcessStats were accumulated as the containers were iterated. } status, found := p.statusProvider.GetPodStatus(podUID) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 243e8eeb590..9d8091e62c1 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -211,7 +211,6 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID]) p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) - p.addProcessStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs) // If cadvisor stats is available for the container, use it to populate // container stats @@ -220,7 +219,9 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd klog.V(5).InfoS("Unable to find cadvisor stats for container", "containerID", containerID) } else { p.addCadvisorContainerStats(cs, &caStats) + p.addProcessStats(ps, &caStats) } + ps.Containers = append(ps.Containers, *cs) } // cleanup outdated caches. @@ -584,16 +585,11 @@ func (p *criStatsProvider) addSwapStats( func (p *criStatsProvider) addProcessStats( ps *statsapi.PodStats, - podUID types.UID, - allInfos map[string]cadvisorapiv2.ContainerInfo, - cs *statsapi.ContainerStats, + container *cadvisorapiv2.ContainerInfo, ) { - // try get process stats from cadvisor only. - info := getCadvisorPodInfoFromPodUID(podUID, allInfos) - if info != nil { - ps.ProcessStats = cadvisorInfoToProcessStats(info) - return - } + processStats := cadvisorInfoToProcessStats(container) + // Sum up all of the process stats for each of the containers to obtain the cumulative pod level process count + ps.ProcessStats = mergeProcessStats(ps.ProcessStats, processStats) } func (p *criStatsProvider) makeContainerStats( diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index 36e8a3030cb..e90d96d6aec 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -168,6 +168,31 @@ func cadvisorInfoToProcessStats(info *cadvisorapiv2.ContainerInfo) *statsapi.Pro return &statsapi.ProcessStats{ProcessCount: uint64Ptr(num)} } +func mergeProcessStats(first *statsapi.ProcessStats, second *statsapi.ProcessStats) *statsapi.ProcessStats { + if first == nil && second == nil { + return nil + } + + if first == nil { + return second + } + if second == nil { + return first + } + + firstProcessCount := uint64(0) + if first.ProcessCount != nil { + firstProcessCount = *first.ProcessCount + } + + secondProcessCount := uint64(0) + if second.ProcessCount != nil { + secondProcessCount = *second.ProcessCount + } + + return &statsapi.ProcessStats{ProcessCount: uint64Ptr(firstProcessCount + secondProcessCount)} +} + // cadvisorInfoToNetworkStats returns the statsapi.NetworkStats converted from // the container info from cadvisor. func cadvisorInfoToNetworkStats(info *cadvisorapiv2.ContainerInfo) *statsapi.NetworkStats { diff --git a/pkg/kubelet/stats/helper_test.go b/pkg/kubelet/stats/helper_test.go index 38486bd9f85..d0bc23b69fa 100644 --- a/pkg/kubelet/stats/helper_test.go +++ b/pkg/kubelet/stats/helper_test.go @@ -22,10 +22,12 @@ import ( cadvisorapiv1 "github.com/google/cadvisor/info/v1" cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/utils/ptr" ) func TestCustomMetrics(t *testing.T) { @@ -97,3 +99,44 @@ func TestCustomMetrics(t *testing.T) { Value: 2.1, }) } + +func TestMergeProcessStats(t *testing.T) { + for _, tc := range []struct { + desc string + first *statsapi.ProcessStats + second *statsapi.ProcessStats + expected *statsapi.ProcessStats + }{ + { + desc: "both nil", + first: nil, + second: nil, + expected: nil, + }, + { + desc: "first non-nil, second not", + first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + second: nil, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + }, + { + desc: "first nil, second non-nil", + first: nil, + second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + }, + { + desc: "both non nill", + first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)}, + expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](200)}, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + got := mergeProcessStats(tc.first, tc.second) + if diff := cmp.Diff(tc.expected, got); diff != "" { + t.Fatalf("Unexpected diff on process stats (-want,+got):\n%s", diff) + } + }) + } +} diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 2f3375e3e65..c518fdb42cb 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -475,6 +475,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo highPriorityClassName := f.BaseName + "-high-priority" highPriority := int32(999999999) + processes := 30000 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { @@ -497,7 +498,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo specs := []podEvictSpec{ { evictionPriority: 2, - pod: pidConsumingPod("fork-bomb-container-with-low-priority", 12000), + pod: pidConsumingPod("fork-bomb-container-with-low-priority", processes), }, { evictionPriority: 0, @@ -505,7 +506,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo }, { evictionPriority: 1, - pod: pidConsumingPod("fork-bomb-container-with-high-priority", 12000), + pod: pidConsumingPod("fork-bomb-container-with-high-priority", processes), }, } specs[1].pod.Spec.PriorityClassName = highPriorityClassName @@ -524,7 +525,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo specs := []podEvictSpec{ { evictionPriority: 1, - pod: pidConsumingPod("fork-bomb-container", 30000), + pod: pidConsumingPod("fork-bomb-container", processes), wantPodDisruptionCondition: ptr.To(v1.DisruptionTarget), }, } diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 948d3605cf5..77c2fa394a6 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -259,7 +259,7 @@ var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() { "InodesUsed": bounded(0, 1e8), }), "ProcessStats": ptrMatchAllFields(gstruct.Fields{ - "ProcessCount": bounded(0, 1e8), + "ProcessCount": bounded(1, 1e8), }), })