mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #124101 from haircommander/process_stats-with-pid-fix
kubelet: fix PID based eviction
This commit is contained in:
commit
e9d9a82839
@ -738,7 +738,6 @@ func process(stats statsFunc) cmpFunc {
|
||||
|
||||
p1Process := processUsage(p1Stats.ProcessStats)
|
||||
p2Process := processUsage(p2Stats.ProcessStats)
|
||||
// prioritize evicting the pod which has the larger consumption of process
|
||||
return int(p2Process - p1Process)
|
||||
}
|
||||
}
|
||||
|
@ -139,6 +139,8 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS
|
||||
}
|
||||
podStats.Containers = append(podStats.Containers, *containerStat)
|
||||
}
|
||||
// Either way, collect process stats
|
||||
podStats.ProcessStats = mergeProcessStats(podStats.ProcessStats, cadvisorInfoToProcessStats(&cinfo))
|
||||
}
|
||||
|
||||
// Add each PodStats to the result.
|
||||
@ -154,7 +156,7 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS
|
||||
podStats.CPU = cpu
|
||||
podStats.Memory = memory
|
||||
podStats.Swap = cadvisorInfoToSwapStats(podInfo)
|
||||
podStats.ProcessStats = cadvisorInfoToProcessStats(podInfo)
|
||||
// ProcessStats were accumulated as the containers were iterated.
|
||||
}
|
||||
|
||||
status, found := p.statusProvider.GetPodStatus(podUID)
|
||||
|
@ -211,7 +211,6 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd
|
||||
p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID])
|
||||
p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
|
||||
p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
|
||||
p.addProcessStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
|
||||
|
||||
// If cadvisor stats is available for the container, use it to populate
|
||||
// container stats
|
||||
@ -220,7 +219,9 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd
|
||||
klog.V(5).InfoS("Unable to find cadvisor stats for container", "containerID", containerID)
|
||||
} else {
|
||||
p.addCadvisorContainerStats(cs, &caStats)
|
||||
p.addProcessStats(ps, &caStats)
|
||||
}
|
||||
|
||||
ps.Containers = append(ps.Containers, *cs)
|
||||
}
|
||||
// cleanup outdated caches.
|
||||
@ -584,16 +585,11 @@ func (p *criStatsProvider) addSwapStats(
|
||||
|
||||
func (p *criStatsProvider) addProcessStats(
|
||||
ps *statsapi.PodStats,
|
||||
podUID types.UID,
|
||||
allInfos map[string]cadvisorapiv2.ContainerInfo,
|
||||
cs *statsapi.ContainerStats,
|
||||
container *cadvisorapiv2.ContainerInfo,
|
||||
) {
|
||||
// try get process stats from cadvisor only.
|
||||
info := getCadvisorPodInfoFromPodUID(podUID, allInfos)
|
||||
if info != nil {
|
||||
ps.ProcessStats = cadvisorInfoToProcessStats(info)
|
||||
return
|
||||
}
|
||||
processStats := cadvisorInfoToProcessStats(container)
|
||||
// Sum up all of the process stats for each of the containers to obtain the cumulative pod level process count
|
||||
ps.ProcessStats = mergeProcessStats(ps.ProcessStats, processStats)
|
||||
}
|
||||
|
||||
func (p *criStatsProvider) makeContainerStats(
|
||||
|
@ -168,6 +168,31 @@ func cadvisorInfoToProcessStats(info *cadvisorapiv2.ContainerInfo) *statsapi.Pro
|
||||
return &statsapi.ProcessStats{ProcessCount: uint64Ptr(num)}
|
||||
}
|
||||
|
||||
func mergeProcessStats(first *statsapi.ProcessStats, second *statsapi.ProcessStats) *statsapi.ProcessStats {
|
||||
if first == nil && second == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if first == nil {
|
||||
return second
|
||||
}
|
||||
if second == nil {
|
||||
return first
|
||||
}
|
||||
|
||||
firstProcessCount := uint64(0)
|
||||
if first.ProcessCount != nil {
|
||||
firstProcessCount = *first.ProcessCount
|
||||
}
|
||||
|
||||
secondProcessCount := uint64(0)
|
||||
if second.ProcessCount != nil {
|
||||
secondProcessCount = *second.ProcessCount
|
||||
}
|
||||
|
||||
return &statsapi.ProcessStats{ProcessCount: uint64Ptr(firstProcessCount + secondProcessCount)}
|
||||
}
|
||||
|
||||
// cadvisorInfoToNetworkStats returns the statsapi.NetworkStats converted from
|
||||
// the container info from cadvisor.
|
||||
func cadvisorInfoToNetworkStats(info *cadvisorapiv2.ContainerInfo) *statsapi.NetworkStats {
|
||||
|
@ -22,10 +22,12 @@ import (
|
||||
|
||||
cadvisorapiv1 "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
func TestCustomMetrics(t *testing.T) {
|
||||
@ -97,3 +99,44 @@ func TestCustomMetrics(t *testing.T) {
|
||||
Value: 2.1,
|
||||
})
|
||||
}
|
||||
|
||||
func TestMergeProcessStats(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
desc string
|
||||
first *statsapi.ProcessStats
|
||||
second *statsapi.ProcessStats
|
||||
expected *statsapi.ProcessStats
|
||||
}{
|
||||
{
|
||||
desc: "both nil",
|
||||
first: nil,
|
||||
second: nil,
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
desc: "first non-nil, second not",
|
||||
first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
second: nil,
|
||||
expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
},
|
||||
{
|
||||
desc: "first nil, second non-nil",
|
||||
first: nil,
|
||||
second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
},
|
||||
{
|
||||
desc: "both non nill",
|
||||
first: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
second: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](100)},
|
||||
expected: &statsapi.ProcessStats{ProcessCount: ptr.To[uint64](200)},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
got := mergeProcessStats(tc.first, tc.second)
|
||||
if diff := cmp.Diff(tc.expected, got); diff != "" {
|
||||
t.Fatalf("Unexpected diff on process stats (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -475,6 +475,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
|
||||
|
||||
highPriorityClassName := f.BaseName + "-high-priority"
|
||||
highPriority := int32(999999999)
|
||||
processes := 30000
|
||||
|
||||
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
@ -497,7 +498,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
|
||||
specs := []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 2,
|
||||
pod: pidConsumingPod("fork-bomb-container-with-low-priority", 12000),
|
||||
pod: pidConsumingPod("fork-bomb-container-with-low-priority", processes),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
@ -505,7 +506,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
|
||||
},
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: pidConsumingPod("fork-bomb-container-with-high-priority", 12000),
|
||||
pod: pidConsumingPod("fork-bomb-container-with-high-priority", processes),
|
||||
},
|
||||
}
|
||||
specs[1].pod.Spec.PriorityClassName = highPriorityClassName
|
||||
@ -524,7 +525,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
|
||||
specs := []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: pidConsumingPod("fork-bomb-container", 30000),
|
||||
pod: pidConsumingPod("fork-bomb-container", processes),
|
||||
wantPodDisruptionCondition: ptr.To(v1.DisruptionTarget),
|
||||
},
|
||||
}
|
||||
|
@ -259,7 +259,7 @@ var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() {
|
||||
"InodesUsed": bounded(0, 1e8),
|
||||
}),
|
||||
"ProcessStats": ptrMatchAllFields(gstruct.Fields{
|
||||
"ProcessCount": bounded(0, 1e8),
|
||||
"ProcessCount": bounded(1, 1e8),
|
||||
}),
|
||||
})
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user