Promote Local storage capacity isolation feature to GA

This change is to promote local storage capacity isolation feature to GA

At the same time, to allow rootless system disable this feature due to
unable to get root fs, this change introduced a new kubelet config
"localStorageCapacityIsolation". By default it is set to true. For
rootless systems, they can set this configuration to false to disable
the feature. Once it is set, user cannot set ephemeral-storage
request/limit because capacity and allocatable will not be set.

Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a
This commit is contained in:
jinxu
2022-07-28 08:03:20 -07:00
parent bc4c4930ff
commit 0064010cdd
42 changed files with 267 additions and 383 deletions

View File

@@ -26,13 +26,11 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
v1helper "k8s.io/component-helpers/scheduling/corev1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
@@ -97,6 +95,8 @@ type managerImpl struct {
thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
thresholdsLastUpdated time.Time
// whether can support local storage capacity isolation
localStorageCapacityIsolation bool
}
// ensure it implements the required interface
@@ -113,21 +113,23 @@ func NewManager(
recorder record.EventRecorder,
nodeRef *v1.ObjectReference,
clock clock.WithTicker,
localStorageCapacityIsolation bool,
) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{
clock: clock,
killPodFunc: killPodFunc,
mirrorPodFunc: mirrorPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
clock: clock,
killPodFunc: killPodFunc,
mirrorPodFunc: mirrorPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
localStorageCapacityIsolation: localStorageCapacityIsolation,
}
return manager, manager
}
@@ -230,7 +232,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
return nil
}
@@ -318,7 +320,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// evict pods if there is a resource usage violation from local volume temporary storage
// If eviction happens in localStorageEviction function, skip the rest of eviction action
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if m.localStorageCapacityIsolation {
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
return evictedPods
}

View File

@@ -18,12 +18,13 @@ package eviction
import (
"fmt"
"k8s.io/apimachinery/pkg/util/diff"
"reflect"
"sort"
"testing"
"time"
"k8s.io/apimachinery/pkg/util/diff"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -31,6 +32,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@@ -699,7 +701,6 @@ func TestOrderedByExceedsRequestMemory(t *testing.T) {
}
func TestOrderedByExceedsRequestDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
below := newPod("below-requests", -1, []v1.Container{
newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")),
}, nil)
@@ -748,7 +749,6 @@ func TestOrderedByPriority(t *testing.T) {
}
func TestOrderedbyDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
@@ -813,73 +813,6 @@ func TestOrderedbyDisk(t *testing.T) {
}
}
// Tests that we correctly ignore disk requests when the local storage feature gate is disabled.
func TestOrderedbyDiskDisableLocalStorage(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, false)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{
newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod3 := newPod("burstable-high", defaultPriority, []v1.Container{
newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod4 := newPod("burstable-low", defaultPriority, []v1.Container{
newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{
newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{
newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
stats := map[*v1.Pod]statsapi.PodStats{
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi
pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi
pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi
pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi
pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi
pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi
}
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
result, found := stats[pod]
return result, found
}
pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2}
for i := range expected {
if pods[i] != expected[i] {
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
}
}
}
func TestOrderedbyInodes(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
low := newPod("low", defaultPriority, []v1.Container{