From 195a77471806e7959e956deb6c15b8d4a9913501 Mon Sep 17 00:00:00 2001 From: Hrishikesh D Kakkad Date: Sun, 29 Oct 2023 22:48:50 +0000 Subject: [PATCH 1/7] eviction_manager: add unit test for PID pressure (cherry picked from commit 696d0156565ebca494d5a67f0582bf3a2a2e8079) --- pkg/kubelet/eviction/eviction_manager_test.go | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index cae32993ed3..85f2f75725a 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -115,6 +115,14 @@ func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceLis return pod, podStats } +func makePodWithPIDStats(name string, priority int32, processCount uint64) (*v1.Pod, statsapi.PodStats) { + pod := newPod(name, priority, []v1.Container{ + newContainer(name, nil, nil), + }, nil) + podStats := newPodProcessStats(pod, processCount) + return pod, podStats +} + func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) { pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), @@ -149,6 +157,32 @@ func makePodWithLocalStorageCapacityIsolationOpen(name string, priority int32, r return pod, podStats } +func makePIDStats(nodeAvailablePIDs string, numberOfRunningProcesses string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary { + val := resource.MustParse(nodeAvailablePIDs) + availablePIDs := int64(val.Value()) + + parsed := resource.MustParse(numberOfRunningProcesses) + NumberOfRunningProcesses := int64(parsed.Value()) + result := &statsapi.Summary{ + Node: statsapi.NodeStats{ + Rlimit: &statsapi.RlimitStats{ + MaxPID: &availablePIDs, + NumOfRunningProcesses: &NumberOfRunningProcesses, + }, + SystemContainers: []statsapi.ContainerStats{ + { + Name: statsapi.SystemContainerPods, + }, + }, + }, + Pods: []statsapi.PodStats{}, + } + for _, podStat := range podStats { + result.Pods = append(result.Pods, podStat) + } + return result +} + func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary { val := resource.MustParse(nodeAvailableBytes) availableBytes := uint64(val.Value()) @@ -347,6 +381,105 @@ func TestMemoryPressure_VerifyPodStatus(t *testing.T) { } } +func TestPIDPressure_VerifyPodStatus(t *testing.T) { + testCases := map[string]struct { + wantPodStatus v1.PodStatus + }{ + "eviction due to pid pressure": { + wantPodStatus: v1.PodStatus{ + Phase: v1.PodFailed, + Reason: "Evicted", + Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ", + }, + }, + } + for name, tc := range testCases { + for _, enablePodDisruptionConditions := range []bool{true, false} { + t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)() + + podMaker := makePodWithPIDStats + summaryStatsMaker := makePIDStats + podsToMake := []podToMake{ + {name: "below-requests"}, + {name: "above-requests"}, + } + pods := []*v1.Pod{} + podStats := map[*v1.Pod]statsapi.PodStats{} + for _, podToMake := range podsToMake { + pod, podStat := podMaker(podToMake.name, podToMake.priority, 2) + pods = append(pods, pod) + podStats[pod] = podStat + } + activePodsFunc := func() []*v1.Pod { + return pods + } + + fakeClock := testingclock.NewFakeClock(time.Now()) + podKiller := &mockPodKiller{} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} + diskGC := &mockDiskGC{err: nil} + nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} + + config := Config{ + PressureTransitionPeriod: time.Minute * 5, + Thresholds: []evictionapi.Threshold{ + { + Signal: evictionapi.SignalPIDAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("1200"), + }, + }, + }, + } + summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500", "1000", podStats)} + manager := &managerImpl{ + clock: fakeClock, + killPodFunc: podKiller.killPodNow, + imageGC: diskGC, + containerGC: diskGC, + config: config, + recorder: &record.FakeRecorder{}, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + } + + // synchronize to detect the memory pressure + manager.synchronize(diskInfoProvider, activePodsFunc) + + // verify pid pressure is detected + if !manager.IsUnderPIDPressure() { + t.Fatalf("Manager should have detected PID pressure") + } + + // verify a pod is selected for eviction + if podKiller.pod == nil { + t.Fatalf("Manager should have selected a pod for eviction") + } + + wantPodStatus := tc.wantPodStatus.DeepCopy() + if enablePodDisruptionConditions { + wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{ + Type: "DisruptionTarget", + Status: "True", + Reason: "TerminationByKubelet", + Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ", + }) + } + + // verify the pod status after applying the status update function + podKiller.statusFn(&podKiller.pod.Status) + if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" { + t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff) + } + }) + } + } +} + func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) { testCases := map[string]struct { nodeFsStats string From b0c9a18673bab70a4386e70df101d18af1e154e4 Mon Sep 17 00:00:00 2001 From: reinka Date: Fri, 2 Feb 2024 17:09:58 +0100 Subject: [PATCH 2/7] add TestPIDPressure --- pkg/kubelet/eviction/eviction_manager_test.go | 213 +++++++++++++++++- 1 file changed, 211 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 85f2f75725a..cf0123a8b57 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -264,6 +264,7 @@ type podToMake struct { requests v1.ResourceList limits v1.ResourceList memoryWorkingSet string + pidUsage uint64 rootFsUsed string logsFsUsed string logsFsInodesUsed string @@ -417,7 +418,7 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} @@ -448,7 +449,11 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { } // synchronize to detect the memory pressure - manager.synchronize(diskInfoProvider, activePodsFunc) + _, err := manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } // verify pid pressure is detected if !manager.IsUnderPIDPressure() { @@ -918,6 +923,210 @@ func makeContainersByQOS(class v1.PodQOSClass) []v1.Container { } } +func TestPIDPressure(t *testing.T) { + podMaker := makePodWithPIDStats + summaryStatsMaker := makePIDStats + podsToMake := []podToMake{ + {name: "high-priority-high-usage", priority: highPriority, pidUsage: 900}, + {name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100}, + {name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400}, + {name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600}, + {name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50}, + } + pods := []*v1.Pod{} + podStats := map[*v1.Pod]statsapi.PodStats{} + for _, podToMake := range podsToMake { + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage) + pods = append(pods, pod) + podStats[pod] = podStat + } + podToEvict := pods[3] + activePodsFunc := func() []*v1.Pod { + return pods + } + + fakeClock := testingclock.NewFakeClock(time.Now()) + podKiller := &mockPodKiller{} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} + diskGC := &mockDiskGC{err: nil} + nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} + + config := Config{ + MaxPodGracePeriodSeconds: 5, + PressureTransitionPeriod: time.Minute * 5, + Thresholds: []evictionapi.Threshold{ + { + Signal: evictionapi.SignalPIDAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("1200"), + }, + }, + { + Signal: evictionapi.SignalPIDAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("1500"), + }, + GracePeriod: time.Minute * 2, + }, + }, + } + summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2000", "300", podStats)} + manager := &managerImpl{ + clock: fakeClock, + killPodFunc: podKiller.killPodNow, + imageGC: diskGC, + containerGC: diskGC, + config: config, + recorder: &record.FakeRecorder{}, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + } + + // synchronize + _, err := manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Fatalf("Manager should not report disk pressure") + } + + // induce soft threshold for PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("2000", "700", podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // now, we should have PID pressure + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure since soft threshold was met") + } + + // verify no pod was yet killed because there has not yet been enough time passed + if podKiller.pod != nil { + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) + } + + // step forward in time past the grace period + fakeClock.Step(3 * time.Minute) + // no change in PID stats to simulate continued pressure + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // verify PID pressure is still reported + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should still report PID pressure") + } + + // verify the right pod was killed with the right grace period. + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod := *podKiller.gracePeriodOverride + if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod) + } + + // reset state + podKiller.pod = nil + podKiller.gracePeriodOverride = nil + + // remove PID pressure by simulating increased PID availability + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker("2000", "300", podStats) // Simulate increased PID availability + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // verify PID pressure is resolved + if manager.IsUnderPIDPressure() { + t.Errorf("Manager should not report PID pressure") + } + + // re-induce PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("2000", "1200", podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // verify PID pressure is reported again + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure") + } + + // verify the right pod was killed with the right grace period. + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod = *podKiller.gracePeriodOverride + if observedGracePeriod != int64(0) { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) + } + + // reduce PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker("2000", "300", podStats) + podKiller.pod = nil // reset state + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // we should have memory pressure (because transition period not yet met) + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure") + } + + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) + } + + // move the clock past the transition period + fakeClock.Step(5 * time.Minute) + summaryProvider.result = summaryStatsMaker("2000", "300", podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // we should not have PID pressure (because transition period met) + if manager.IsUnderPIDPressure() { + t.Errorf("Manager should not report memory pressure") + } + + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) + } +} + func TestAdmitUnderNodeConditions(t *testing.T) { manager := &managerImpl{} pods := []*v1.Pod{ From a0770386abe583831ea9fb0e312b00f041fcd1a3 Mon Sep 17 00:00:00 2001 From: reinka Date: Fri, 2 Feb 2024 20:35:19 +0100 Subject: [PATCH 3/7] fix copy paste --- pkg/kubelet/eviction/eviction_manager_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index cf0123a8b57..bf7340c9ebc 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -448,14 +448,14 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { thresholdsFirstObservedAt: thresholdsObservedAt{}, } - // synchronize to detect the memory pressure + // synchronize to detect the PID pressure _, err := manager.synchronize(diskInfoProvider, activePodsFunc) if err != nil { t.Fatalf("Manager expects no error but got %v", err) } - // verify pid pressure is detected + // verify PID pressure is detected if !manager.IsUnderPIDPressure() { t.Fatalf("Manager should have detected PID pressure") } @@ -1097,7 +1097,7 @@ func TestPIDPressure(t *testing.T) { t.Fatalf("Manager expects no error but got %v", err) } - // we should have memory pressure (because transition period not yet met) + // we should have PID pressure (because transition period not yet met) if !manager.IsUnderPIDPressure() { t.Errorf("Manager should report PID pressure") } @@ -1118,7 +1118,7 @@ func TestPIDPressure(t *testing.T) { // we should not have PID pressure (because transition period met) if manager.IsUnderPIDPressure() { - t.Errorf("Manager should not report memory pressure") + t.Errorf("Manager should not report PID pressure") } // no pod should have been killed From 960d7fbf094b5885890dffe4cd68fc6a14416f16 Mon Sep 17 00:00:00 2001 From: reinka Date: Fri, 2 Feb 2024 20:53:27 +0100 Subject: [PATCH 4/7] add admission tests --- pkg/kubelet/eviction/eviction_manager_test.go | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index bf7340c9ebc..336b8e69bde 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -986,6 +986,9 @@ func TestPIDPressure(t *testing.T) { thresholdsFirstObservedAt: thresholdsObservedAt{}, } + // create a best effort pod to test admission + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50) + // synchronize _, err := manager.synchronize(diskInfoProvider, activePodsFunc) @@ -998,6 +1001,11 @@ func TestPIDPressure(t *testing.T) { t.Fatalf("Manager should not report disk pressure") } + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } + // induce soft threshold for PID pressure fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("2000", "700", podStats) @@ -1087,6 +1095,11 @@ func TestPIDPressure(t *testing.T) { t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) } + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } + // reduce PID pressure fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("2000", "300", podStats) @@ -1107,6 +1120,11 @@ func TestPIDPressure(t *testing.T) { t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } + // move the clock past the transition period fakeClock.Step(5 * time.Minute) summaryProvider.result = summaryStatsMaker("2000", "300", podStats) @@ -1125,6 +1143,11 @@ func TestPIDPressure(t *testing.T) { if podKiller.pod != nil { t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } + + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } } func TestAdmitUnderNodeConditions(t *testing.T) { From 48b1576980d45c55ec018f21fd684cdafc625461 Mon Sep 17 00:00:00 2001 From: reinka Date: Tue, 13 Feb 2024 20:05:26 +0100 Subject: [PATCH 5/7] use table test pattern and replace hardcoded values --- pkg/kubelet/eviction/eviction_manager_test.go | 407 +++++++++--------- 1 file changed, 213 insertions(+), 194 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 336b8e69bde..24d9213cdfb 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -169,11 +169,6 @@ func makePIDStats(nodeAvailablePIDs string, numberOfRunningProcesses string, pod MaxPID: &availablePIDs, NumOfRunningProcesses: &NumberOfRunningProcesses, }, - SystemContainers: []statsapi.ContainerStats{ - { - Name: statsapi.SystemContainerPods, - }, - }, }, Pods: []statsapi.PodStats{}, } @@ -402,8 +397,8 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { podMaker := makePodWithPIDStats summaryStatsMaker := makePIDStats podsToMake := []podToMake{ - {name: "below-requests"}, - {name: "above-requests"}, + {}, + {}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -924,229 +919,253 @@ func makeContainersByQOS(class v1.PodQOSClass) []v1.Container { } func TestPIDPressure(t *testing.T) { - podMaker := makePodWithPIDStats - summaryStatsMaker := makePIDStats - podsToMake := []podToMake{ - {name: "high-priority-high-usage", priority: highPriority, pidUsage: 900}, - {name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100}, - {name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400}, - {name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600}, - {name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50}, - } - pods := []*v1.Pod{} - podStats := map[*v1.Pod]statsapi.PodStats{} - for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage) - pods = append(pods, pod) - podStats[pod] = podStat - } - podToEvict := pods[3] - activePodsFunc := func() []*v1.Pod { - return pods - } - fakeClock := testingclock.NewFakeClock(time.Now()) - podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} - diskGC := &mockDiskGC{err: nil} - nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} - - config := Config{ - MaxPodGracePeriodSeconds: 5, - PressureTransitionPeriod: time.Minute * 5, - Thresholds: []evictionapi.Threshold{ - { - Signal: evictionapi.SignalPIDAvailable, - Operator: evictionapi.OpLessThan, - Value: evictionapi.ThresholdValue{ - Quantity: quantityMustParse("1200"), - }, - }, - { - Signal: evictionapi.SignalPIDAvailable, - Operator: evictionapi.OpLessThan, - Value: evictionapi.ThresholdValue{ - Quantity: quantityMustParse("1500"), - }, - GracePeriod: time.Minute * 2, + // Define test cases + testCases := []struct { + name string + podsToMake []podToMake + evictPodIndex int + noPressurePIDUsage string + pressurePIDUsageWithGracePeriod string + pressurePIDUsageWithoutGracePeriod string + totalPID string + }{ + { + name: "eviction due to pid pressure", + podsToMake: []podToMake{ + {name: "high-priority-high-usage", priority: highPriority, pidUsage: 900}, + {name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100}, + {name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400}, + {name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600}, + {name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50}, }, + evictPodIndex: 3, // we expect the low-priority-high-usage pod to be evicted + noPressurePIDUsage: "300", + pressurePIDUsageWithGracePeriod: "700", + pressurePIDUsageWithoutGracePeriod: "1200", + totalPID: "2000", }, } - summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2000", "300", podStats)} - manager := &managerImpl{ - clock: fakeClock, - killPodFunc: podKiller.killPodNow, - imageGC: diskGC, - containerGC: diskGC, - config: config, - recorder: &record.FakeRecorder{}, - summaryProvider: summaryProvider, - nodeRef: nodeRef, - nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, - thresholdsFirstObservedAt: thresholdsObservedAt{}, - } - // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + podMaker := makePodWithPIDStats + summaryStatsMaker := makePIDStats + pods := []*v1.Pod{} + podStats := map[*v1.Pod]statsapi.PodStats{} + for _, podToMake := range tc.podsToMake { + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage) + pods = append(pods, pod) + podStats[pod] = podStat + } + podToEvict := pods[tc.evictPodIndex] + activePodsFunc := func() []*v1.Pod { return pods } - // synchronize - _, err := manager.synchronize(diskInfoProvider, activePodsFunc) + fakeClock := testingclock.NewFakeClock(time.Now()) + podKiller := &mockPodKiller{} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} + diskGC := &mockDiskGC{err: nil} + nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + config := Config{ + MaxPodGracePeriodSeconds: 5, + PressureTransitionPeriod: time.Minute * 5, + Thresholds: []evictionapi.Threshold{ + { + Signal: evictionapi.SignalPIDAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("1200"), + }, + }, + { + Signal: evictionapi.SignalPIDAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("1500"), + }, + GracePeriod: time.Minute * 2, + }, + }, + } - // we should not have disk pressure - if manager.IsUnderDiskPressure() { - t.Fatalf("Manager should not report disk pressure") - } + summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)} + manager := &managerImpl{ + clock: fakeClock, + killPodFunc: podKiller.killPodNow, + imageGC: diskGC, + containerGC: diskGC, + config: config, + recorder: &record.FakeRecorder{}, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + } - // try to admit our pod (should succeed) - if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { - t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) - } + // create a pod to test admission + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50) - // induce soft threshold for PID pressure - fakeClock.Step(1 * time.Minute) - summaryProvider.result = summaryStatsMaker("2000", "700", podStats) - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + // synchronize + _, err := manager.synchronize(diskInfoProvider, activePodsFunc) - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - // now, we should have PID pressure - if !manager.IsUnderPIDPressure() { - t.Errorf("Manager should report PID pressure since soft threshold was met") - } + // we should not have PID pressure + if manager.IsUnderPIDPressure() { + t.Fatalf("Manager should not report PID pressure") + } - // verify no pod was yet killed because there has not yet been enough time passed - if podKiller.pod != nil { - t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) - } + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } - // step forward in time past the grace period - fakeClock.Step(3 * time.Minute) - // no change in PID stats to simulate continued pressure - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + // induce soft threshold for PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithGracePeriod, podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - // verify PID pressure is still reported - if !manager.IsUnderPIDPressure() { - t.Errorf("Manager should still report PID pressure") - } + // now, we should have PID pressure + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure since soft threshold was met") + } - // verify the right pod was killed with the right grace period. - if podKiller.pod != podToEvict { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) - } - if podKiller.gracePeriodOverride == nil { - t.Errorf("Manager chose to kill pod but should have had a grace period override.") - } - observedGracePeriod := *podKiller.gracePeriodOverride - if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds { - t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod) - } + // verify no pod was yet killed because there has not yet been enough time passed + if podKiller.pod != nil { + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) + } - // reset state - podKiller.pod = nil - podKiller.gracePeriodOverride = nil + // step forward in time past the grace period + fakeClock.Step(3 * time.Minute) + // no change in PID stats to simulate continued pressure + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) - // remove PID pressure by simulating increased PID availability - fakeClock.Step(20 * time.Minute) - summaryProvider.result = summaryStatsMaker("2000", "300", podStats) // Simulate increased PID availability - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + // verify PID pressure is still reported + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should still report PID pressure") + } - // verify PID pressure is resolved - if manager.IsUnderPIDPressure() { - t.Errorf("Manager should not report PID pressure") - } + // verify the right pod was killed with the right grace period. + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod := *podKiller.gracePeriodOverride + if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod) + } - // re-induce PID pressure - fakeClock.Step(1 * time.Minute) - summaryProvider.result = summaryStatsMaker("2000", "1200", podStats) - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + // reset state + podKiller.pod = nil + podKiller.gracePeriodOverride = nil - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + // remove PID pressure by simulating increased PID availability + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats) // Simulate increased PID availability + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) - // verify PID pressure is reported again - if !manager.IsUnderPIDPressure() { - t.Errorf("Manager should report PID pressure") - } + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - // verify the right pod was killed with the right grace period. - if podKiller.pod != podToEvict { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) - } - if podKiller.gracePeriodOverride == nil { - t.Errorf("Manager chose to kill pod but should have had a grace period override.") - } - observedGracePeriod = *podKiller.gracePeriodOverride - if observedGracePeriod != int64(0) { - t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) - } + // verify PID pressure is resolved + if manager.IsUnderPIDPressure() { + t.Errorf("Manager should not report PID pressure") + } - // try to admit our pod (should fail) - if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { - t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) - } + // re-induce PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithoutGracePeriod, podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) - // reduce PID pressure - fakeClock.Step(1 * time.Minute) - summaryProvider.result = summaryStatsMaker("2000", "300", podStats) - podKiller.pod = nil // reset state - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + // verify PID pressure is reported again + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure") + } - // we should have PID pressure (because transition period not yet met) - if !manager.IsUnderPIDPressure() { - t.Errorf("Manager should report PID pressure") - } + // verify the right pod was killed with the right grace period. + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) + } + if podKiller.gracePeriodOverride == nil { + t.Errorf("Manager chose to kill pod but should have had a grace period override.") + } + observedGracePeriod = *podKiller.gracePeriodOverride + if observedGracePeriod != int64(0) { + t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod) + } - // no pod should have been killed - if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) - } + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } - // try to admit our pod (should fail) - if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { - t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) - } + // reduce PID pressure + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats) + podKiller.pod = nil // reset state + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) - // move the clock past the transition period - fakeClock.Step(5 * time.Minute) - summaryProvider.result = summaryStatsMaker("2000", "300", podStats) - _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } + // we should have PID pressure (because transition period not yet met) + if !manager.IsUnderPIDPressure() { + t.Errorf("Manager should report PID pressure") + } - // we should not have PID pressure (because transition period met) - if manager.IsUnderPIDPressure() { - t.Errorf("Manager should not report PID pressure") - } + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) + } - // no pod should have been killed - if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) - } + // try to admit our pod (should fail) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit) + } - // try to admit our pod (should succeed) - if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { - t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + // move the clock past the transition period + fakeClock.Step(5 * time.Minute) + summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats) + _, err = manager.synchronize(diskInfoProvider, activePodsFunc) + + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + + // we should not have PID pressure (because transition period met) + if manager.IsUnderPIDPressure() { + t.Errorf("Manager should not report PID pressure") + } + + // no pod should have been killed + if podKiller.pod != nil { + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) + } + + // try to admit our pod (should succeed) + if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit { + t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit) + } + }) } } From 2fa02552ebf33db4bcf483848cfbdd9882b0e787 Mon Sep 17 00:00:00 2001 From: reinka Date: Tue, 13 Feb 2024 20:12:00 +0100 Subject: [PATCH 6/7] remove comment --- pkg/kubelet/eviction/eviction_manager_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 24d9213cdfb..fe7666ea2cc 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -919,8 +919,6 @@ func makeContainersByQOS(class v1.PodQOSClass) []v1.Container { } func TestPIDPressure(t *testing.T) { - - // Define test cases testCases := []struct { name string podsToMake []podToMake From 0f083966a7a799cebd9937697b0dc929cbf4b0f3 Mon Sep 17 00:00:00 2001 From: reinka Date: Sun, 25 Feb 2024 19:27:39 +0100 Subject: [PATCH 7/7] set actual podToMake values --- pkg/kubelet/eviction/eviction_manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index fe7666ea2cc..5601a9c31b8 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -397,8 +397,8 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { podMaker := makePodWithPIDStats summaryStatsMaker := makePIDStats podsToMake := []podToMake{ - {}, - {}, + {name: "pod1", priority: lowPriority, pidUsage: 500}, + {name: "pod2", priority: defaultPriority, pidUsage: 500}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{}