From 780191bea653e45a322053e19b5f28a29c8b596d Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 28 Jun 2024 16:36:45 +0200 Subject: [PATCH] review remarks for graduating PodDisruptionConditions --- pkg/controller/job/job_controller_test.go | 35 +---- .../tainteviction/taint_eviction_test.go | 2 +- pkg/kubelet/eviction/eviction_manager_test.go | 147 ++++++++---------- pkg/kubelet/status/status_manager_test.go | 7 + .../core/pod/storage/eviction_test.go | 3 - .../default_preemption_test.go | 10 +- test/e2e/nodefeature/nodefeature.go | 3 - test/e2e_node/critical_pod_test.go | 2 +- test/e2e_node/eviction_test.go | 6 +- test/e2e_node/node_shutdown_linux_test.go | 2 +- test/integration/evictions/evictions_test.go | 3 - 11 files changed, 84 insertions(+), 136 deletions(-) diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index cf96b21afbc..17bce5652c1 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -3701,40 +3701,7 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) { wantStatusFailed: 1, wantStatusSucceeded: 0, }, - "terminating Pod considered failed when PodDisruptionConditions is disabled": { - enableJobPodFailurePolicy: true, - job: batch.Job{ - TypeMeta: metav1.TypeMeta{Kind: "Job"}, - ObjectMeta: validObjectMeta, - Spec: batch.JobSpec{ - Parallelism: ptr.To[int32](1), - Selector: validSelector, - Template: validTemplate, - BackoffLimit: ptr.To[int32](0), - PodFailurePolicy: &batch.PodFailurePolicy{ - Rules: []batch.PodFailurePolicyRule{ - { - Action: batch.PodFailurePolicyActionCount, - OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{ - { - Type: v1.DisruptionTarget, - Status: v1.ConditionTrue, - }, - }, - }, - }, - }, - }, - }, - pods: []v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - DeletionTimestamp: &now, - }, - }, - }, - }, - "terminating Pod not considered failed when PodDisruptionConditions is enabled": { + "terminating Pod not considered failed when JobPodFailurePolicy is enabled and used": { enableJobPodFailurePolicy: true, job: batch.Job{ TypeMeta: metav1.TypeMeta{Kind: "Job"}, diff --git a/pkg/controller/tainteviction/taint_eviction_test.go b/pkg/controller/tainteviction/taint_eviction_test.go index b9a4ab289e8..6c933d5f23d 100644 --- a/pkg/controller/tainteviction/taint_eviction_test.go +++ b/pkg/controller/tainteviction/taint_eviction_test.go @@ -180,7 +180,7 @@ func TestCreatePod(t *testing.T) { expectDelete: false, }, { - description: "schedule on tainted Node with infinite ivalid toleration", + description: "schedule on tainted Node with infinite invalid toleration", pod: addToleration(testutil.NewPod("pod1", "node1"), 2, -1), taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 0298d8b4589..c8993087246 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -268,96 +268,87 @@ type podToMake struct { } func TestMemoryPressure_VerifyPodStatus(t *testing.T) { - testCases := map[string]struct { - wantPodStatus v1.PodStatus - }{ - "eviction due to memory pressure": { - wantPodStatus: v1.PodStatus{ - Phase: v1.PodFailed, - Reason: "Evicted", - Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ", + podMaker := makePodWithMemoryStats + summaryStatsMaker := makeMemoryStats + podsToMake := []podToMake{ + {name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"}, + {name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"}, + } + pods := []*v1.Pod{} + podStats := map[*v1.Pod]statsapi.PodStats{} + for _, podToMake := range podsToMake { + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) + pods = append(pods, pod) + podStats[pod] = podStat + } + activePodsFunc := func() []*v1.Pod { + return pods + } + + fakeClock := testingclock.NewFakeClock(time.Now()) + podKiller := &mockPodKiller{} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} + diskGC := &mockDiskGC{err: nil} + nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} + + config := Config{ + PressureTransitionPeriod: time.Minute * 5, + Thresholds: []evictionapi.Threshold{ + { + Signal: evictionapi.SignalMemoryAvailable, + Operator: evictionapi.OpLessThan, + Value: evictionapi.ThresholdValue{ + Quantity: quantityMustParse("2Gi"), + }, }, }, } - for _, tc := range testCases { - podMaker := makePodWithMemoryStats - summaryStatsMaker := makeMemoryStats - podsToMake := []podToMake{ - {name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"}, - {name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"}, - } - pods := []*v1.Pod{} - podStats := map[*v1.Pod]statsapi.PodStats{} - for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) - pods = append(pods, pod) - podStats[pod] = podStat - } - activePodsFunc := func() []*v1.Pod { - return pods - } + summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)} + manager := &managerImpl{ + clock: fakeClock, + killPodFunc: podKiller.killPodNow, + imageGC: diskGC, + containerGC: diskGC, + config: config, + recorder: &record.FakeRecorder{}, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + } - fakeClock := testingclock.NewFakeClock(time.Now()) - podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} - diskGC := &mockDiskGC{err: nil} - nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} + // synchronize to detect the memory pressure + _, err := manager.synchronize(diskInfoProvider, activePodsFunc) - config := Config{ - PressureTransitionPeriod: time.Minute * 5, - Thresholds: []evictionapi.Threshold{ - { - Signal: evictionapi.SignalMemoryAvailable, - Operator: evictionapi.OpLessThan, - Value: evictionapi.ThresholdValue{ - Quantity: quantityMustParse("2Gi"), - }, - }, - }, - } - summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)} - manager := &managerImpl{ - clock: fakeClock, - killPodFunc: podKiller.killPodNow, - imageGC: diskGC, - containerGC: diskGC, - config: config, - recorder: &record.FakeRecorder{}, - summaryProvider: summaryProvider, - nodeRef: nodeRef, - nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, - thresholdsFirstObservedAt: thresholdsObservedAt{}, - } + if err != nil { + t.Fatalf("Manager expects no error but got %v", err) + } + // verify memory pressure is detected + if !manager.IsUnderMemoryPressure() { + t.Fatalf("Manager should have detected memory pressure") + } - // synchronize to detect the memory pressure - _, err := manager.synchronize(diskInfoProvider, activePodsFunc) + // verify a pod is selected for eviction + if podKiller.pod == nil { + t.Fatalf("Manager should have selected a pod for eviction") + } - if err != nil { - t.Fatalf("Manager expects no error but got %v", err) - } - // verify memory pressure is detected - if !manager.IsUnderMemoryPressure() { - t.Fatalf("Manager should have detected memory pressure") - } - - // verify a pod is selected for eviction - if podKiller.pod == nil { - t.Fatalf("Manager should have selected a pod for eviction") - } - - wantPodStatus := tc.wantPodStatus.DeepCopy() - wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{ + wantPodStatus := v1.PodStatus{ + Phase: v1.PodFailed, + Reason: "Evicted", + Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ", + Conditions: []v1.PodCondition{{ Type: "DisruptionTarget", Status: "True", Reason: "TerminationByKubelet", Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ", - }) + }}, + } - // verify the pod status after applying the status update function - podKiller.statusFn(&podKiller.pod.Status) - if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" { - t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff) - } + // verify the pod status after applying the status update function + podKiller.statusFn(&podKiller.pod.Status) + if diff := cmp.Diff(wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" { + t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff) } } diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index 857e7b81549..8a8432a71cc 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -1544,6 +1544,13 @@ func TestMergePodStatus(t *testing.T) { newPodStatus func(input v1.PodStatus) v1.PodStatus expectPodStatus v1.PodStatus }{ + { + "no change", + false, + func(input v1.PodStatus) v1.PodStatus { return input }, + func(input v1.PodStatus) v1.PodStatus { return input }, + getPodStatus(), + }, { "add DisruptionTarget condition when transitioning into failed phase", false, diff --git a/pkg/registry/core/pod/storage/eviction_test.go b/pkg/registry/core/pod/storage/eviction_test.go index f402e7e8bfc..d39a84ba63e 100644 --- a/pkg/registry/core/pod/storage/eviction_test.go +++ b/pkg/registry/core/pod/storage/eviction_test.go @@ -34,13 +34,10 @@ import ( "k8s.io/apimachinery/pkg/watch" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/apiserver/pkg/registry/rest" - utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/kubernetes/fake" - featuregatetesting "k8s.io/component-base/featuregate/testing" podapi "k8s.io/kubernetes/pkg/api/pod" api "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/apis/policy" - "k8s.io/kubernetes/pkg/features" ) func TestEviction(t *testing.T) { diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index 5545a87a7fa..efdd23a2be5 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -1431,14 +1431,6 @@ func TestPodEligibleToPreemptOthers(t *testing.T) { nominatedNodeStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, tainttoleration.ErrReasonNotMatch), expected: true, }, - { - name: "Pod with nominated node, but without nominated node status", - pod: st.MakePod().Name("p_without_status").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), - pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, - nodes: []string{"node1"}, - nominatedNodeStatus: nil, - expected: true, - }, { name: "Pod without nominated node", pod: st.MakePod().Name("p_without_nominated_node").UID("p").Priority(highPriority).Obj(), @@ -1456,7 +1448,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) { expected: false, }, { - name: "victim Pods terminating", + name: "preemption victim pod terminating, as indicated by the dedicated DisruptionTarget condition", pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating(). Condition(v1.DisruptionTarget, v1.ConditionTrue, v1.PodReasonPreemptionByScheduler).Obj()}, diff --git a/test/e2e/nodefeature/nodefeature.go b/test/e2e/nodefeature/nodefeature.go index d8337d27597..964e7aefa85 100644 --- a/test/e2e/nodefeature/nodefeature.go +++ b/test/e2e/nodefeature/nodefeature.go @@ -82,9 +82,6 @@ var ( // TODO: document the feature (owning SIG, when to use this feature for a test) OOMScoreAdj = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("OOMScoreAdj")) - // TODO: document the feature (owning SIG, when to use this feature for a test) - PodDisruptionConditions = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("PodDisruptionConditions")) - // TODO: document the feature (owning SIG, when to use this feature for a test) PodResources = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("PodResources")) diff --git a/test/e2e_node/critical_pod_test.go b/test/e2e_node/critical_pod_test.go index 493248bef13..7c6149f2cc6 100644 --- a/test/e2e_node/critical_pod_test.go +++ b/test/e2e_node/critical_pod_test.go @@ -91,7 +91,7 @@ var _ = SIGDescribe("CriticalPod", framework.WithSerial(), framework.WithDisrupt } }) - f.It("should add DisruptionTarget condition to the preempted pod", nodefeature.PodDisruptionConditions, func(ctx context.Context) { + f.It("should add DisruptionTarget condition to the preempted pod", func(ctx context.Context) { // because adminssion Priority enable, If the priority class is not found, the Pod is rejected. node := getNodeName(ctx, f) nonCriticalGuaranteed := getTestPod(false, guaranteedPodName, v1.ResourceRequirements{ diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 7cad7e3b785..2f3375e3e65 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -44,6 +44,7 @@ import ( testutils "k8s.io/kubernetes/test/utils" imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" + "k8s.io/utils/ptr" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -512,7 +513,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs) }) - f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; baseline scenario to verify DisruptionTarget is added", nodefeature.PodDisruptionConditions, func() { + f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; baseline scenario to verify DisruptionTarget is added", func() { tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { pidsConsumed := int64(10000) summary := eventuallyGetSummary(ctx) @@ -520,12 +521,11 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)} initialConfig.EvictionMinimumReclaim = map[string]string{} }) - disruptionTarget := v1.DisruptionTarget specs := []podEvictSpec{ { evictionPriority: 1, pod: pidConsumingPod("fork-bomb-container", 30000), - wantPodDisruptionCondition: &disruptionTarget, + wantPodDisruptionCondition: ptr.To(v1.DisruptionTarget), }, } runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs) diff --git a/test/e2e_node/node_shutdown_linux_test.go b/test/e2e_node/node_shutdown_linux_test.go index 762e4a65a89..60743d6c778 100644 --- a/test/e2e_node/node_shutdown_linux_test.go +++ b/test/e2e_node/node_shutdown_linux_test.go @@ -83,7 +83,7 @@ var _ = SIGDescribe("GracefulNodeShutdown", framework.WithSerial(), nodefeature. } }) - f.Context("graceful node shutdown; baseline scenario to verify DisruptionTarget is added", nodefeature.PodDisruptionConditions, func() { + f.Context("graceful node shutdown; baseline scenario to verify DisruptionTarget is added", func() { const ( pollInterval = 1 * time.Second diff --git a/test/integration/evictions/evictions_test.go b/test/integration/evictions/evictions_test.go index e691104881a..9fabc44c99f 100644 --- a/test/integration/evictions/evictions_test.go +++ b/test/integration/evictions/evictions_test.go @@ -40,7 +40,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apiserver/pkg/util/feature" cacheddiscovery "k8s.io/client-go/discovery/cached/memory" "k8s.io/client-go/dynamic" "k8s.io/client-go/informers" @@ -50,12 +49,10 @@ import ( "k8s.io/client-go/restmapper" "k8s.io/client-go/scale" "k8s.io/client-go/tools/cache" - featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/klog/v2" kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/controller/disruption" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/test/integration/framework" "k8s.io/kubernetes/test/utils/ktesting" )