From 7c85784b9f5007d8c8d85f6929309cde09ecedc7 Mon Sep 17 00:00:00 2001 From: HirazawaUi <695097494plus@gmail.com> Date: Sat, 20 Jan 2024 23:33:29 +0800 Subject: [PATCH 1/3] fix the bug where pod grace period will be overwritten --- pkg/features/kube_features.go | 6 ++++++ pkg/features/versioned_kube_features.go | 3 +++ pkg/kubelet/eviction/eviction_manager.go | 4 ++++ .../test_data/versioned_feature_list.yaml | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 11fb8d88055..439bea9b999 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -45,6 +45,12 @@ const ( // with DNS names. AllowDNSOnlyNodeCSR featuregate.Feature = "AllowDNSOnlyNodeCSR" + // owner: @HirazawaUi + // Deprecated: v1.32 + // + // Allow spec.terminationGracePeriodSeconds to be overridden by MaxPodGracePeriodSeconds in soft evictions. + AllowOverwriteTerminationGracePeriodSeconds featuregate.Feature = "AllowOverwriteTerminationGracePeriodSeconds" + // owner: @bswartz // alpha: v1.18 // beta: v1.24 diff --git a/pkg/features/versioned_kube_features.go b/pkg/features/versioned_kube_features.go index 6398233b835..7b8603fde74 100644 --- a/pkg/features/versioned_kube_features.go +++ b/pkg/features/versioned_kube_features.go @@ -33,6 +33,9 @@ import ( // // Entries are alphabetized. var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{ + AllowOverwriteTerminationGracePeriodSeconds: { + {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Deprecated}, + }, AnyVolumeDataSource: { {Version: version.MustParse("1.18"), Default: false, PreRelease: featuregate.Alpha}, {Version: version.MustParse("1.24"), Default: true, PreRelease: featuregate.Beta}, diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 00df1a773c6..63f599e843e 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -411,7 +411,11 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act gracePeriodOverride := int64(immediateEvictionGracePeriodSeconds) if !isHardEvictionThreshold(thresholdToReclaim) { gracePeriodOverride = m.config.MaxPodGracePeriodSeconds + if pod.Spec.TerminationGracePeriodSeconds != nil && !utilfeature.DefaultFeatureGate.Enabled(features.AllowOverwriteTerminationGracePeriodSeconds) { + gracePeriodOverride = min(m.config.MaxPodGracePeriodSeconds, *pod.Spec.TerminationGracePeriodSeconds) + } } + message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc, thresholds, observations) condition := &v1.PodCondition{ Type: v1.DisruptionTarget, diff --git a/test/featuregates_linter/test_data/versioned_feature_list.yaml b/test/featuregates_linter/test_data/versioned_feature_list.yaml index e0dc157f936..00d6f8c8c7e 100644 --- a/test/featuregates_linter/test_data/versioned_feature_list.yaml +++ b/test/featuregates_linter/test_data/versioned_feature_list.yaml @@ -1,3 +1,9 @@ +- name: AllowOverwriteTerminationGracePeriodSeconds + versionedSpecs: + - default: false + lockToDefault: false + preRelease: Deprecated + version: "1.32" - name: AnonymousAuthConfigurableEndpoints versionedSpecs: - default: false From 9d4e272c161f53619d5d89386c27b8f720512514 Mon Sep 17 00:00:00 2001 From: HirazawaUi <695097494plus@gmail.com> Date: Wed, 24 Jan 2024 01:01:03 +0800 Subject: [PATCH 2/3] add e2e test for pod grace period being overridden --- test/e2e_node/eviction_test.go | 68 ++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 6f49572afc5..36c5095f9c6 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -248,6 +248,44 @@ var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework. }) }) +var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { + f := framework.NewDefaultFramework("localstorage-eviction-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + pressureTimeout := 10 * time.Minute + expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := v1.ResourceEphemeralStorage + + evictionMaxPodGracePeriod := 30 + evictionSoftGracePeriod := 30 + ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { + tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { + diskConsumed := resource.MustParse("4Gi") + summary := eventuallyGetSummary(ctx) + availableBytes := *(summary.Node.Fs.AvailableBytes) + if availableBytes <= uint64(diskConsumed.Value()) { + e2eskipper.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run") + } + initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} + initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "30s"} + // Defer to the pod default grace period + initialConfig.EvictionMaxPodGracePeriod = int32(evictionMaxPodGracePeriod) + initialConfig.EvictionMinimumReclaim = map[string]string{} + // Ensure that pods are not evicted because of the eviction-hard threshold + // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty) + initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"} + }) + + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ + { + evictionMaxPodGracePeriod: evictionSoftGracePeriod, + evictionSoftGracePeriod: evictionMaxPodGracePeriod, + evictionPriority: 1, + pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), + }, + }) + }) +}) + // This test validates that in-memory EmptyDir's are evicted when the Kubelet does // not have Sized Memory Volumes enabled. When Sized volumes are enabled, it's // not possible to exhaust the quota. @@ -551,6 +589,9 @@ type podEvictSpec struct { evictionPriority int pod *v1.Pod wantPodDisruptionCondition *v1.PodConditionType + + evictionMaxPodGracePeriod int + evictionSoftGracePeriod int } // runEvictionTest sets up a testing environment given the provided pods, and checks a few things: @@ -589,16 +630,21 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) ginkgo.By("Waiting for evictions to occur") + nodeUnreadyTime := time.Now() + gomega.Eventually(ctx, func(ctx context.Context) error { if expectedNodeCondition != noPressure { if hasNodeCondition(ctx, f, expectedNodeCondition) { framework.Logf("Node has %s", expectedNodeCondition) } else { framework.Logf("Node does NOT have %s", expectedNodeCondition) + nodeUnreadyTime = time.Now() } } logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) logFunc(ctx) + + verifyEvictionPeriod(ctx, f, testSpecs, nodeUnreadyTime) return verifyEvictionOrdering(ctx, f, testSpecs) }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) @@ -770,6 +816,28 @@ func verifyEvictionOrdering(ctx context.Context, f *framework.Framework, testSpe return fmt.Errorf("pods that should be evicted are still running: %#v", pendingPods) } +func verifyEvictionPeriod(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec, nodeUnreadyTime time.Time) { + for i, spec := range testSpecs { + if spec.evictionMaxPodGracePeriod == 0 && spec.evictionSoftGracePeriod == 0 { + continue + } + softEvictionPeriod := spec.evictionMaxPodGracePeriod + spec.evictionSoftGracePeriod + + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, spec.pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "Failed to get the recent pod object for name: %q", pod.Name) + + minSoftEvictionPeriod := min(float64(softEvictionPeriod), float64(*spec.pod.Spec.TerminationGracePeriodSeconds+int64(spec.evictionSoftGracePeriod))) + if pod.Status.Phase == v1.PodFailed { + if time.Since(nodeUnreadyTime).Seconds() > minSoftEvictionPeriod+15 { + framework.Failf("pod %s should be evicted within %f seconds, but it has not been evicted for %f seconds.", pod.Name, minSoftEvictionPeriod, time.Since(nodeUnreadyTime).Seconds()) + } else { + testSpecs[i].evictionMaxPodGracePeriod = 0 + testSpecs[i].evictionSoftGracePeriod = 0 + } + } + } +} + func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec) { for _, spec := range testSpecs { if spec.wantPodDisruptionCondition != nil { From 49058ee799bb0fb6f72461576257a5d108300359 Mon Sep 17 00:00:00 2001 From: HirazawaUi <695097494plus@gmail.com> Date: Sun, 28 Jan 2024 21:58:34 +0800 Subject: [PATCH 3/3] remove useless comments --- pkg/generated/openapi/zz_generated.openapi.go | 2 +- staging/src/k8s.io/kubelet/config/v1beta1/types.go | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 465001ed127..b950699f1f7 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -62435,7 +62435,7 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen }, "evictionMaxPodGracePeriod": { SchemaProps: spec.SchemaProps{ - Description: "evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. This value effectively caps the Pod's terminationGracePeriodSeconds value during soft evictions. Note: Due to issue #64530, the behavior has a bug where this value currently just overrides the grace period during soft eviction, which can increase the grace period from what is set on the Pod. This bug will be fixed in a future release. Default: 0", + Description: "evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. This value effectively caps the Pod's terminationGracePeriodSeconds value during soft evictions. Default: 0", Type: []string{"integer"}, Format: "int32", }, diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index 1f7bbc93931..8c7ad4ec61e 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -527,9 +527,6 @@ type KubeletConfiguration struct { // evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use // when terminating pods in response to a soft eviction threshold being met. This value // effectively caps the Pod's terminationGracePeriodSeconds value during soft evictions. - // Note: Due to issue #64530, the behavior has a bug where this value currently just - // overrides the grace period during soft eviction, which can increase the grace - // period from what is set on the Pod. This bug will be fixed in a future release. // Default: 0 // +optional EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod,omitempty"`