Promote Local storage capacity isolation feature to GA

This change is to promote local storage capacity isolation feature to GA

At the same time, to allow rootless system disable this feature due to
unable to get root fs, this change introduced a new kubelet config
"localStorageCapacityIsolation". By default it is set to true. For
rootless systems, they can set this configuration to false to disable
the feature. Once it is set, user cannot set ephemeral-storage
request/limit because capacity and allocatable will not be set.

Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a
This commit is contained in:
jinxu 2022-07-28 08:03:20 -07:00
parent bc4c4930ff
commit 0064010cdd
42 changed files with 267 additions and 383 deletions

View File

@ -492,6 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.") fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.") fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.BoolVar(&c.LocalStorageCapacityIsolation, "local-storage-capacity-isolation", c.LocalStorageCapacityIsolation, "If true, local ephemeral storage isolation is enabled. Otherwise, local storage isolation feature will be disabled")
// Flags intended for testing, not recommended used in production environments. // Flags intended for testing, not recommended used in production environments.
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.") fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")

View File

@ -646,7 +646,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
if kubeDeps.CAdvisorInterface == nil { if kubeDeps.CAdvisorInterface == nil {
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint) imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint)
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint)) kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint), s.LocalStorageCapacityIsolation)
if err != nil { if err != nil {
return err return err
} }

View File

@ -237,6 +237,7 @@ CPU_CFS_QUOTA=${CPU_CFS_QUOTA:-true}
ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"} ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"}
CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default
ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default
LOCAL_STORAGE_CAPACITY_ISOLATION=${LOCAL_STORAGE_CAPACITY_ISOLATION:-"true"} # current default
# This is the default dir and filename where the apiserver will generate a self-signed cert # This is the default dir and filename where the apiserver will generate a self-signed cert
# which should be able to be used as the CA to verify itself # which should be able to be used as the CA to verify itself
CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"} CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"}
@ -754,6 +755,7 @@ cgroupRoot: "${CGROUP_ROOT}"
cgroupsPerQOS: ${CGROUPS_PER_QOS} cgroupsPerQOS: ${CGROUPS_PER_QOS}
cpuCFSQuota: ${CPU_CFS_QUOTA} cpuCFSQuota: ${CPU_CFS_QUOTA}
enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH} enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH}
localStorageCapacityIsolation: ${LOCAL_STORAGE_CAPACITY_ISOLATION}
evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}" evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}"
failSwapOn: ${FAIL_SWAP_ON} failSwapOn: ${FAIL_SWAP_ON}
port: ${KUBELET_PORT} port: ${KUBELET_PORT}

View File

@ -517,14 +517,6 @@ func dropDisabledFields(
} }
} }
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) && !emptyDirSizeLimitInUse(oldPodSpec) {
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
podSpec.Volumes[i].EmptyDir.SizeLimit = nil
}
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) { if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) {
// Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used // Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used
VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool { VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
@ -703,21 +695,6 @@ func appArmorInUse(podAnnotations map[string]string) bool {
return false return false
} }
// emptyDirSizeLimitInUse returns true if any pod's EmptyDir volumes use SizeLimit.
func emptyDirSizeLimitInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
if podSpec.Volumes[i].EmptyDir.SizeLimit != nil {
return true
}
}
}
return false
}
// probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use // probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use
// of the probe-level terminationGracePeriodSeconds feature // of the probe-level terminationGracePeriodSeconds feature
func probeGracePeriodInUse(podSpec *api.PodSpec) bool { func probeGracePeriodInUse(podSpec *api.PodSpec) bool {

View File

@ -25,7 +25,6 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/validation/field"
@ -703,116 +702,6 @@ func TestDropProcMount(t *testing.T) {
} }
} }
func TestDropEmptyDirSizeLimit(t *testing.T) {
sizeLimit := resource.MustParse("1Gi")
podWithEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
SizeLimit: &sizeLimit,
},
},
},
},
},
}
}
podWithoutEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
},
},
},
},
},
}
}
podInfo := []struct {
description string
hasEmptyDirSizeLimit bool
pod func() *api.Pod
}{
{
description: "has EmptyDir Size Limit",
hasEmptyDirSizeLimit: true,
pod: podWithEmptyDirSizeLimit,
},
{
description: "does not have EmptyDir Size Limit",
hasEmptyDirSizeLimit: false,
pod: podWithoutEmptyDirSizeLimit,
},
{
description: "is nil",
hasEmptyDirSizeLimit: false,
pod: func() *api.Pod { return nil },
},
}
for _, enabled := range []bool{true, false} {
for _, oldPodInfo := range podInfo {
for _, newPodInfo := range podInfo {
oldPodHasEmptyDirSizeLimit, oldPod := oldPodInfo.hasEmptyDirSizeLimit, oldPodInfo.pod()
newPodHasEmptyDirSizeLimit, newPod := newPodInfo.hasEmptyDirSizeLimit, newPodInfo.pod()
if newPod == nil {
continue
}
t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, enabled)()
var oldPodSpec *api.PodSpec
if oldPod != nil {
oldPodSpec = &oldPod.Spec
}
dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil)
// old pod should never be changed
if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) {
t.Errorf("old pod changed: %v", cmp.Diff(oldPod, oldPodInfo.pod()))
}
switch {
case enabled || oldPodHasEmptyDirSizeLimit:
// new pod should not be changed if the feature is enabled, or if the old pod had EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
case newPodHasEmptyDirSizeLimit:
// new pod should be changed
if reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod was not changed")
}
// new pod should not have EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, podWithoutEmptyDirSizeLimit()) {
t.Errorf("new pod had EmptyDir SizeLimit: %v", cmp.Diff(newPod, podWithoutEmptyDirSizeLimit()))
}
default:
// new pod should not need to be changed
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
}
})
}
}
}
}
func TestDropAppArmor(t *testing.T) { func TestDropAppArmor(t *testing.T) {
podWithAppArmor := func() *api.Pod { podWithAppArmor := func() *api.Pod {
return &api.Pod{ return &api.Pod{

View File

@ -24,8 +24,6 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
) )
// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all // PodRequestsAndLimits returns a dictionary of all defined resources summed up for all
@ -131,11 +129,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
requestQuantity = resource.Quantity{Format: resource.DecimalSI} requestQuantity = resource.Quantity{Format: resource.DecimalSI}
} }
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
return requestQuantity
}
for _, container := range pod.Spec.Containers { for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok { if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity) requestQuantity.Add(rQuantity)

View File

@ -515,8 +515,9 @@ const (
// owner: @jinxu // owner: @jinxu
// beta: v1.10 // beta: v1.10
// stable: v1.25
// //
// New local storage types to support local storage capacity isolation // Support local ephemeral storage types for local storage capacity isolation feature.
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation" LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
// owner: @RobertKrawitz // owner: @RobertKrawitz
@ -988,7 +989,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta}, LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta}, LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27
LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta}, LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta},

View File

@ -54589,6 +54589,13 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen
Ref: ref("k8s.io/component-base/tracing/api/v1.TracingConfiguration"), Ref: ref("k8s.io/component-base/tracing/api/v1.TracingConfiguration"),
}, },
}, },
"localStorageCapacityIsolation": {
SchemaProps: spec.SchemaProps{
Description: "LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk usage from the volume exceeds the limit. This feature depends on the capability of detecting correct root file system disk usage. For certain systems, such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. Default: true",
Type: []string{"boolean"},
Format: "",
},
},
}, },
}, },
}, },

View File

@ -20,7 +20,7 @@ import (
"math/rand" "math/rand"
"time" "time"
"github.com/google/gofuzz" fuzz "github.com/google/gofuzz"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer"
@ -110,6 +110,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
} }
obj.EnableSystemLogHandler = true obj.EnableSystemLogHandler = true
obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64()) obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64())
obj.LocalStorageCapacityIsolation = true
}, },
} }
} }

View File

@ -282,5 +282,6 @@ var (
"MemoryThrottlingFactor", "MemoryThrottlingFactor",
"Tracing.Endpoint", "Tracing.Endpoint",
"Tracing.SamplingRatePerMillion", "Tracing.SamplingRatePerMillion",
"LocalStorageCapacityIsolation",
) )
) )

View File

@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration kind: KubeletConfiguration
kubeAPIBurst: 10 kubeAPIBurst: 10
kubeAPIQPS: 5 kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging: logging:
flushFrequency: 5000000000 flushFrequency: 5000000000
format: text format: text

View File

@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration kind: KubeletConfiguration
kubeAPIBurst: 10 kubeAPIBurst: 10
kubeAPIQPS: 5 kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging: logging:
flushFrequency: 5000000000 flushFrequency: 5000000000
format: text format: text

View File

@ -450,6 +450,16 @@ type KubeletConfiguration struct {
// +featureGate=KubeletTracing // +featureGate=KubeletTracing
// +optional // +optional
Tracing *tracingapi.TracingConfiguration Tracing *tracingapi.TracingConfiguration
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
// usage from the volume exceeds the limit.
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
// +optional
LocalStorageCapacityIsolation bool
} }
// KubeletAuthorizationMode denotes the authorization mode for the kubelet // KubeletAuthorizationMode denotes the authorization mode for the kubelet

View File

@ -264,4 +264,7 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
if obj.RegisterNode == nil { if obj.RegisterNode == nil {
obj.RegisterNode = utilpointer.BoolPtr(true) obj.RegisterNode = utilpointer.BoolPtr(true)
} }
if obj.LocalStorageCapacityIsolation == nil {
obj.LocalStorageCapacityIsolation = utilpointer.BoolPtr(true)
}
} }

View File

@ -115,12 +115,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text", Format: "text",
FlushFrequency: 5 * time.Second, FlushFrequency: 5 * time.Second,
}, },
EnableSystemLogHandler: utilpointer.BoolPtr(true), EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true), EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true), EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false), SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true), RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
}, },
}, },
{ {
@ -245,6 +246,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
SeccompDefault: utilpointer.Bool(false), SeccompDefault: utilpointer.Bool(false),
MemoryThrottlingFactor: utilpointer.Float64(0), MemoryThrottlingFactor: utilpointer.Float64(0),
RegisterNode: utilpointer.BoolPtr(false), RegisterNode: utilpointer.BoolPtr(false),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
}, },
&v1beta1.KubeletConfiguration{ &v1beta1.KubeletConfiguration{
EnableServer: utilpointer.BoolPtr(false), EnableServer: utilpointer.BoolPtr(false),
@ -333,13 +335,14 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text", Format: "text",
FlushFrequency: 5 * time.Second, FlushFrequency: 5 * time.Second,
}, },
EnableSystemLogHandler: utilpointer.Bool(false), EnableSystemLogHandler: utilpointer.Bool(false),
ReservedMemory: []v1beta1.MemoryReservation{}, ReservedMemory: []v1beta1.MemoryReservation{},
EnableProfilingHandler: utilpointer.Bool(false), EnableProfilingHandler: utilpointer.Bool(false),
EnableDebugFlagsHandler: utilpointer.Bool(false), EnableDebugFlagsHandler: utilpointer.Bool(false),
SeccompDefault: utilpointer.Bool(false), SeccompDefault: utilpointer.Bool(false),
MemoryThrottlingFactor: utilpointer.Float64(0), MemoryThrottlingFactor: utilpointer.Float64(0),
RegisterNode: utilpointer.BoolPtr(false), RegisterNode: utilpointer.BoolPtr(false),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
}, },
}, },
{ {
@ -481,11 +484,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}, Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
}, },
}, },
EnableProfilingHandler: utilpointer.Bool(true), EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true), EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true), SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1), MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true), RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
}, },
&v1beta1.KubeletConfiguration{ &v1beta1.KubeletConfiguration{
EnableServer: utilpointer.BoolPtr(true), EnableServer: utilpointer.BoolPtr(true),
@ -624,11 +628,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}, Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
}, },
}, },
EnableProfilingHandler: utilpointer.Bool(true), EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true), EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true), SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1), MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true), RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
}, },
}, },
{ {
@ -709,12 +714,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text", Format: "text",
FlushFrequency: 5 * time.Second, FlushFrequency: 5 * time.Second,
}, },
EnableSystemLogHandler: utilpointer.BoolPtr(true), EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true), EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true), EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false), SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true), RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
}, },
}, },
} }

View File

@ -508,6 +508,9 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in
return err return err
} }
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing)) out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
if err := v1.Convert_Pointer_bool_To_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
return err
}
return nil return nil
} }
@ -683,6 +686,9 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in
return err return err
} }
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing)) out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
if err := v1.Convert_bool_To_Pointer_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
return err
}
return nil return nil
} }

View File

@ -80,7 +80,7 @@ func init() {
} }
// New creates a new cAdvisor Interface for linux systems. // New creates a new cAdvisor Interface for linux systems.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) { func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
sysFs := sysfs.NewRealSysFs() sysFs := sysfs.NewRealSysFs()
includedMetrics := cadvisormetrics.MetricSet{ includedMetrics := cadvisormetrics.MetricSet{
@ -99,7 +99,7 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{} includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
} }
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { if usingLegacyStats || localStorageCapacityIsolation {
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{} includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
} }

View File

@ -33,7 +33,7 @@ type cadvisorUnsupported struct {
var _ Interface = new(cadvisorUnsupported) var _ Interface = new(cadvisorUnsupported)
// New creates a new cAdvisor Interface for unsupported systems. // New creates a new cAdvisor Interface for unsupported systems.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats bool) (Interface, error) { func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
return &cadvisorUnsupported{}, nil return &cadvisorUnsupported{}, nil
} }

View File

@ -34,7 +34,7 @@ type cadvisorClient struct {
var _ Interface = new(cadvisorClient) var _ Interface = new(cadvisorClient)
// New creates a cAdvisor and exports its API on the specified port if port > 0. // New creates a cAdvisor and exports its API on the specified port if port > 0.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) { func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
client, err := winstats.NewPerfCounterClient() client, err := winstats.NewPerfCounterClient()
return &cadvisorClient{ return &cadvisorClient{
rootPath: rootPath, rootPath: rootPath,

View File

@ -47,7 +47,7 @@ type ContainerManager interface {
// Runs the container manager's housekeeping. // Runs the container manager's housekeeping.
// - Ensures that the Docker daemon is in a container. // - Ensures that the Docker daemon is in a container.
// - Creates the system container where all non-containerized processes run. // - Creates the system container where all non-containerized processes run.
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
// SystemCgroupsLimit returns resources allocated to system cgroups in the machine. // SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
// These cgroups include the system and Kubernetes services. // These cgroups include the system and Kubernetes services.
@ -73,7 +73,7 @@ type ContainerManager interface {
GetNodeAllocatableReservation() v1.ResourceList GetNodeAllocatableReservation() v1.ResourceList
// GetCapacity returns the amount of compute resources tracked by container manager available on the node. // GetCapacity returns the amount of compute resources tracked by container manager available on the node.
GetCapacity() v1.ResourceList GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources), // GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
// node allocatable (amount of total healthy resources reported by device plugin), // node allocatable (amount of total healthy resources reported by device plugin),

View File

@ -554,7 +554,8 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
activePods ActivePodsFunc, activePods ActivePodsFunc,
sourcesReady config.SourcesReady, sourcesReady config.SourcesReady,
podStatusProvider status.PodStatusProvider, podStatusProvider status.PodStatusProvider,
runtimeService internalapi.RuntimeService) error { runtimeService internalapi.RuntimeService,
localStorageCapacityIsolation bool) error {
// Initialize CPU manager // Initialize CPU manager
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) { if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) {
@ -578,7 +579,7 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
// allocatable of the node // allocatable of the node
cm.nodeInfo = node cm.nodeInfo = node
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { if localStorageCapacityIsolation {
rootfs, err := cm.cadvisorInterface.RootFsInfo() rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil { if err != nil {
return fmt.Errorf("failed to get rootfs info: %v", err) return fmt.Errorf("failed to get rootfs info: %v", err)
@ -915,8 +916,8 @@ func isKernelPid(pid int) bool {
// GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*" // GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*"
// At present this method is only invoked when introspecting ephemeral storage // At present this method is only invoked when introspecting ephemeral storage
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList { func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { if localStorageCapacityIsolation {
// We store allocatable ephemeral-storage in the capacity property once we Start() the container manager // We store allocatable ephemeral-storage in the capacity property once we Start() the container manager
if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok { if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok {
// If we haven't yet stored the capacity for ephemeral-storage, we can try to fetch it directly from cAdvisor, // If we haven't yet stored the capacity for ephemeral-storage, we can try to fetch it directly from cAdvisor,

View File

@ -28,9 +28,6 @@ import (
gomock "github.com/golang/mock/gomock" gomock "github.com/golang/mock/gomock"
cadvisorapiv2 "github.com/google/cadvisor/info/v2" cadvisorapiv2 "github.com/google/cadvisor/info/v2"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
kubefeatures "k8s.io/kubernetes/pkg/features"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -193,11 +190,11 @@ func TestGetCapacity(t *testing.T) {
mockCadvisorError := cadvisortest.NewMockInterface(mockCtrlError) mockCadvisorError := cadvisortest.NewMockInterface(mockCtrlError)
mockCadvisorError.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{}, errors.New("Unable to get rootfs data from cAdvisor interface")) mockCadvisorError.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{}, errors.New("Unable to get rootfs data from cAdvisor interface"))
cases := []struct { cases := []struct {
name string name string
cm *containerManagerImpl cm *containerManagerImpl
expectedResourceQuantity *resource.Quantity expectedResourceQuantity *resource.Quantity
expectedNoEphemeralStorage bool expectedNoEphemeralStorage bool
enableLocalStorageCapacityIsolation bool disablelocalStorageCapacityIsolation bool
}{ }{
{ {
name: "capacity property has ephemeral-storage", name: "capacity property has ephemeral-storage",
@ -207,9 +204,8 @@ func TestGetCapacity(t *testing.T) {
v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
}, },
}, },
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
expectedNoEphemeralStorage: false, expectedNoEphemeralStorage: false,
enableLocalStorageCapacityIsolation: true,
}, },
{ {
name: "capacity property does not have ephemeral-storage", name: "capacity property does not have ephemeral-storage",
@ -217,9 +213,8 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: mockCadvisor, cadvisorInterface: mockCadvisor,
capacity: v1.ResourceList{}, capacity: v1.ResourceList{},
}, },
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI), expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI),
expectedNoEphemeralStorage: false, expectedNoEphemeralStorage: false,
enableLocalStorageCapacityIsolation: true,
}, },
{ {
name: "capacity property does not have ephemeral-storage, error from rootfs", name: "capacity property does not have ephemeral-storage, error from rootfs",
@ -227,8 +222,7 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: mockCadvisorError, cadvisorInterface: mockCadvisorError,
capacity: v1.ResourceList{}, capacity: v1.ResourceList{},
}, },
expectedNoEphemeralStorage: true, expectedNoEphemeralStorage: true,
enableLocalStorageCapacityIsolation: true,
}, },
{ {
name: "capacity property does not have ephemeral-storage, cadvisor interface is nil", name: "capacity property does not have ephemeral-storage, cadvisor interface is nil",
@ -236,26 +230,24 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: nil, cadvisorInterface: nil,
capacity: v1.ResourceList{}, capacity: v1.ResourceList{},
}, },
expectedNoEphemeralStorage: true, expectedNoEphemeralStorage: true,
enableLocalStorageCapacityIsolation: true,
}, },
{ {
name: "LocalStorageCapacityIsolation feature flag is disabled", name: "capacity property has ephemeral-storage, but localStorageCapacityIsolation is disabled",
cm: &containerManagerImpl{ cm: &containerManagerImpl{
cadvisorInterface: mockCadvisor, cadvisorInterface: mockCadvisor,
capacity: v1.ResourceList{ capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"), v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
v1.ResourceMemory: resource.MustParse("16G"),
}, },
}, },
expectedNoEphemeralStorage: true, expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
enableLocalStorageCapacityIsolation: false, expectedNoEphemeralStorage: true,
disablelocalStorageCapacityIsolation: true,
}, },
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.LocalStorageCapacityIsolation, c.enableLocalStorageCapacityIsolation)() ret := c.cm.GetCapacity(!c.disablelocalStorageCapacityIsolation)
ret := c.cm.GetCapacity()
if v, exists := ret[v1.ResourceEphemeralStorage]; !exists { if v, exists := ret[v1.ResourceEphemeralStorage]; !exists {
if !c.expectedNoEphemeralStorage { if !c.expectedNoEphemeralStorage {
t.Errorf("did not get any ephemeral storage data") t.Errorf("did not get any ephemeral storage data")

View File

@ -41,7 +41,7 @@ type containerManagerStub struct {
var _ ContainerManager = &containerManagerStub{} var _ ContainerManager = &containerManagerStub{}
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
klog.V(2).InfoS("Starting stub container manager") klog.V(2).InfoS("Starting stub container manager")
return nil return nil
} }
@ -74,7 +74,10 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList
return nil return nil
} }
func (cm *containerManagerStub) GetCapacity() v1.ResourceList { func (cm *containerManagerStub) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if !localStorageCapacityIsolation {
return v1.ResourceList{}
}
c := v1.ResourceList{ c := v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity( v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(0), int64(0),

View File

@ -38,7 +38,7 @@ type unsupportedContainerManager struct {
var _ ContainerManager = &unsupportedContainerManager{} var _ ContainerManager = &unsupportedContainerManager{}
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
return fmt.Errorf("Container Manager is unsupported in this build") return fmt.Errorf("Container Manager is unsupported in this build")
} }

View File

@ -30,11 +30,9 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
internalapi "k8s.io/cri-api/pkg/apis" internalapi "k8s.io/cri-api/pkg/apis"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm/admission" "k8s.io/kubernetes/pkg/kubelet/cm/admission"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
@ -72,10 +70,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
activePods ActivePodsFunc, activePods ActivePodsFunc,
sourcesReady config.SourcesReady, sourcesReady config.SourcesReady,
podStatusProvider status.PodStatusProvider, podStatusProvider status.PodStatusProvider,
runtimeService internalapi.RuntimeService) error { runtimeService internalapi.RuntimeService,
localStorageCapacityIsolation bool) error {
klog.V(2).InfoS("Starting Windows container manager") klog.V(2).InfoS("Starting Windows container manager")
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { if localStorageCapacityIsolation {
rootfs, err := cm.cadvisorInterface.RootFsInfo() rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil { if err != nil {
return fmt.Errorf("failed to get rootfs info: %v", err) return fmt.Errorf("failed to get rootfs info: %v", err)
@ -171,7 +170,7 @@ func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList
return result return result
} }
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList { func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
return cm.capacity return cm.capacity
} }

View File

@ -50,7 +50,7 @@ func NewFakeContainerManager() *FakeContainerManager {
} }
} }
func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
cm.Lock() cm.Lock()
defer cm.Unlock() defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "Start") cm.CalledFunctions = append(cm.CalledFunctions, "Start")
@ -106,10 +106,13 @@ func (cm *FakeContainerManager) GetNodeAllocatableReservation() v1.ResourceList
return nil return nil
} }
func (cm *FakeContainerManager) GetCapacity() v1.ResourceList { func (cm *FakeContainerManager) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
cm.Lock() cm.Lock()
defer cm.Unlock() defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity") cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity")
if !localStorageCapacityIsolation {
return v1.ResourceList{}
}
c := v1.ResourceList{ c := v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity( v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(0), int64(0),

View File

@ -26,13 +26,11 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
v1helper "k8s.io/component-helpers/scheduling/corev1" v1helper "k8s.io/component-helpers/scheduling/corev1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource" apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/metrics"
@ -97,6 +95,8 @@ type managerImpl struct {
thresholdNotifiers []ThresholdNotifier thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated. // thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
thresholdsLastUpdated time.Time thresholdsLastUpdated time.Time
// whether can support local storage capacity isolation
localStorageCapacityIsolation bool
} }
// ensure it implements the required interface // ensure it implements the required interface
@ -113,21 +113,23 @@ func NewManager(
recorder record.EventRecorder, recorder record.EventRecorder,
nodeRef *v1.ObjectReference, nodeRef *v1.ObjectReference,
clock clock.WithTicker, clock clock.WithTicker,
localStorageCapacityIsolation bool,
) (Manager, lifecycle.PodAdmitHandler) { ) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{ manager := &managerImpl{
clock: clock, clock: clock,
killPodFunc: killPodFunc, killPodFunc: killPodFunc,
mirrorPodFunc: mirrorPodFunc, mirrorPodFunc: mirrorPodFunc,
imageGC: imageGC, imageGC: imageGC,
containerGC: containerGC, containerGC: containerGC,
config: config, config: config,
recorder: recorder, recorder: recorder,
summaryProvider: summaryProvider, summaryProvider: summaryProvider,
nodeRef: nodeRef, nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{}, thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil, dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{}, thresholdNotifiers: []ThresholdNotifier{},
localStorageCapacityIsolation: localStorageCapacityIsolation,
} }
return manager, manager return manager, manager
} }
@ -230,7 +232,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod { func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return // if we have nothing to do, just return
thresholds := m.config.Thresholds thresholds := m.config.Thresholds
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
return nil return nil
} }
@ -318,7 +320,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// evict pods if there is a resource usage violation from local volume temporary storage // evict pods if there is a resource usage violation from local volume temporary storage
// If eviction happens in localStorageEviction function, skip the rest of eviction action // If eviction happens in localStorageEviction function, skip the rest of eviction action
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { if m.localStorageCapacityIsolation {
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 { if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
return evictedPods return evictedPods
} }

View File

@ -18,12 +18,13 @@ package eviction
import ( import (
"fmt" "fmt"
"k8s.io/apimachinery/pkg/util/diff"
"reflect" "reflect"
"sort" "sort"
"testing" "testing"
"time" "time"
"k8s.io/apimachinery/pkg/util/diff"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -31,6 +32,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing" featuregatetesting "k8s.io/component-base/featuregate/testing"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -699,7 +701,6 @@ func TestOrderedByExceedsRequestMemory(t *testing.T) {
} }
func TestOrderedByExceedsRequestDisk(t *testing.T) { func TestOrderedByExceedsRequestDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
below := newPod("below-requests", -1, []v1.Container{ below := newPod("below-requests", -1, []v1.Container{
newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")), newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")),
}, nil) }, nil)
@ -748,7 +749,6 @@ func TestOrderedByPriority(t *testing.T) {
} }
func TestOrderedbyDisk(t *testing.T) { func TestOrderedbyDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{ }, []v1.Volume{
@ -813,73 +813,6 @@ func TestOrderedbyDisk(t *testing.T) {
} }
} }
// Tests that we correctly ignore disk requests when the local storage feature gate is disabled.
func TestOrderedbyDiskDisableLocalStorage(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, false)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{
newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod3 := newPod("burstable-high", defaultPriority, []v1.Container{
newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod4 := newPod("burstable-low", defaultPriority, []v1.Container{
newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{
newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{
newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
stats := map[*v1.Pod]statsapi.PodStats{
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi
pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi
pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi
pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi
pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi
pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi
}
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
result, found := stats[pod]
return result, found
}
pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2}
for i := range expected {
if pods[i] != expected[i] {
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
}
}
}
func TestOrderedbyInodes(t *testing.T) { func TestOrderedbyInodes(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)() defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
low := newPod("low", defaultPriority, []v1.Container{ low := newPod("low", defaultPriority, []v1.Container{

View File

@ -773,7 +773,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff) klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
// setup eviction manager // setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock) evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig,
killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock, kubeCfg.LocalStorageCapacityIsolation)
klet.evictionManager = evictionManager klet.evictionManager = evictionManager
klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
@ -1384,7 +1385,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
os.Exit(1) os.Exit(1)
} }
// containerManager must start after cAdvisor because it needs filesystem capacity information // containerManager must start after cAdvisor because it needs filesystem capacity information
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil { if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil {
// Fail kubelet and rely on the babysitter to retry starting kubelet. // Fail kubelet and rely on the babysitter to retry starting kubelet.
klog.ErrorS(err, "Failed to start ContainerManager") klog.ErrorS(err, "Failed to start ContainerManager")
os.Exit(1) os.Exit(1)
@ -2489,6 +2490,10 @@ func (kl *Kubelet) CheckpointContainer(
return nil return nil
} }
func (kl *Kubelet) supportLocalStorageCapacityIsolation() bool {
return kl.GetConfiguration().LocalStorageCapacityIsolation
}
// isSyncPodWorthy filters out events that are not worthy of pod syncing // isSyncPodWorthy filters out events that are not worthy of pod syncing
func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool { func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
// ContainerRemoved doesn't affect pod state // ContainerRemoved doesn't affect pod state

View File

@ -624,7 +624,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
setters = append(setters, setters = append(setters,
nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc), nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc),
nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity, nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity,
kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent), kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent, kl.supportLocalStorageCapacityIsolation()),
nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version), nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version),
nodestatus.DaemonEndpoints(kl.daemonEndpoints), nodestatus.DaemonEndpoints(kl.daemonEndpoints),
nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList), nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList),
@ -637,7 +637,8 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent), nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent),
nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent), nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent),
nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent), nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent),
nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent), nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors,
validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent, kl.supportLocalStorageCapacityIsolation()),
nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse), nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse),
// TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event // TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event
// and record state back to the Kubelet runtime object. In the future, I'd like to isolate // and record state back to the Kubelet runtime object. In the future, I'd like to isolate

View File

@ -153,7 +153,10 @@ func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
return lcm.allocatableReservation return lcm.allocatableReservation
} }
func (lcm *localCM) GetCapacity() v1.ResourceList { func (lcm *localCM) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if !localStorageCapacityIsolation {
delete(lcm.capacity, v1.ResourceEphemeralStorage)
}
return lcm.capacity return lcm.capacity
} }
@ -182,7 +185,7 @@ func TestUpdateNewNodeStatus(t *testing.T) {
} }
inputImageList, expectedImageList := generateTestingImageLists(numTestImages, int(tc.nodeStatusMaxImages)) inputImageList, expectedImageList := generateTestingImageLists(numTestImages, int(tc.nodeStatusMaxImages))
testKubelet := newTestKubeletWithImageList( testKubelet := newTestKubeletWithImageList(
t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/) t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/, true /* localStorageCapacityIsolation */)
defer testKubelet.Cleanup() defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
kubelet.nodeStatusMaxImages = tc.nodeStatusMaxImages kubelet.nodeStatusMaxImages = tc.nodeStatusMaxImages
@ -1347,7 +1350,7 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
// generate one more in inputImageList than we configure the Kubelet to report // generate one more in inputImageList than we configure the Kubelet to report
inputImageList, _ := generateTestingImageLists(nodeStatusMaxImages+1, nodeStatusMaxImages) inputImageList, _ := generateTestingImageLists(nodeStatusMaxImages+1, nodeStatusMaxImages)
testKubelet := newTestKubeletWithImageList( testKubelet := newTestKubeletWithImageList(
t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */) t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */, true)
defer testKubelet.Cleanup() defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet kubelet := testKubelet.kubelet
kubelet.nodeStatusMaxImages = nodeStatusMaxImages kubelet.nodeStatusMaxImages = nodeStatusMaxImages

View File

@ -23,7 +23,7 @@ import (
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
_ "k8s.io/kubernetes/pkg/apis/core/install" _ "k8s.io/kubernetes/pkg/apis/core/install"

View File

@ -145,14 +145,16 @@ func newTestKubelet(t *testing.T, controllerAttachDetachEnabled bool) *TestKubel
Size: 456, Size: 456,
}, },
} }
return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/) return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/, true /*localStorageCapacityIsolation*/)
} }
func newTestKubeletWithImageList( func newTestKubeletWithImageList(
t *testing.T, t *testing.T,
imageList []kubecontainer.Image, imageList []kubecontainer.Image,
controllerAttachDetachEnabled bool, controllerAttachDetachEnabled bool,
initFakeVolumePlugin bool) *TestKubelet { initFakeVolumePlugin bool,
localStorageCapacityIsolation bool,
) *TestKubelet {
logger, _ := ktesting.NewTestContext(t) logger, _ := ktesting.NewTestContext(t)
fakeRuntime := &containertest.FakeRuntime{ fakeRuntime := &containertest.FakeRuntime{
@ -320,7 +322,8 @@ func newTestKubeletWithImageList(
Namespace: "", Namespace: "",
} }
// setup eviction manager // setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock) evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{},
killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock, kubelet.supportLocalStorageCapacityIsolation())
kubelet.evictionManager = evictionManager kubelet.evictionManager = evictionManager
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
@ -386,6 +389,7 @@ func newTestKubeletWithImageList(
kubelet.AddPodSyncLoopHandler(activeDeadlineHandler) kubelet.AddPodSyncLoopHandler(activeDeadlineHandler)
kubelet.AddPodSyncHandler(activeDeadlineHandler) kubelet.AddPodSyncHandler(activeDeadlineHandler)
kubelet.lastContainerStartedTime = newTimeCache() kubelet.lastContainerStartedTime = newTimeCache()
kubelet.kubeletConfiguration.LocalStorageCapacityIsolation = localStorageCapacityIsolation
return &TestKubelet{kubelet, fakeRuntime, fakeContainerManager, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug} return &TestKubelet{kubelet, fakeRuntime, fakeContainerManager, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug}
} }

View File

@ -31,13 +31,11 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/errors"
utilnet "k8s.io/apimachinery/pkg/util/net" utilnet "k8s.io/apimachinery/pkg/util/net"
utilfeature "k8s.io/apiserver/pkg/util/feature"
cloudprovider "k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api" cloudproviderapi "k8s.io/cloud-provider/api"
cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers" cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers"
"k8s.io/component-base/version" "k8s.io/component-base/version"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -244,10 +242,11 @@ func MachineInfo(nodeName string,
maxPods int, maxPods int,
podsPerCore int, podsPerCore int,
machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo
capacityFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetCapacity capacityFunc func(localStorageCapacityIsolation bool) v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity
nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation
recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent
localStorageCapacityIsolation bool,
) Setter { ) Setter {
return func(node *v1.Node) error { return func(node *v1.Node) error {
// Note: avoid blindly overwriting the capacity in case opaque // Note: avoid blindly overwriting the capacity in case opaque
@ -295,16 +294,15 @@ func MachineInfo(nodeName string,
} }
node.Status.NodeInfo.BootID = info.BootID node.Status.NodeInfo.BootID = info.BootID
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { // TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the // capacity for every node status request
// capacity for every node status request initialCapacity := capacityFunc(localStorageCapacityIsolation)
initialCapacity := capacityFunc() if initialCapacity != nil {
if initialCapacity != nil { if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists { node.Status.Capacity[v1.ResourceEphemeralStorage] = v
node.Status.Capacity[v1.ResourceEphemeralStorage] = v
}
} }
} }
//}
devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc() devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc()
for k, v := range devicePluginCapacity { for k, v := range devicePluginCapacity {
@ -469,6 +467,7 @@ func ReadyCondition(
cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status
nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors. nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors.
recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
localStorageCapacityIsolation bool,
) Setter { ) Setter {
return func(node *v1.Node) error { return func(node *v1.Node) error {
// NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions. // NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
@ -484,7 +483,7 @@ func ReadyCondition(
} }
errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()} errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()}
requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods} requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { if localStorageCapacityIsolation {
requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage) requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
} }
missingCapacities := []string{} missingCapacities := []string{}

View File

@ -585,17 +585,18 @@ func TestMachineInfo(t *testing.T) {
} }
cases := []struct { cases := []struct {
desc string desc string
node *v1.Node node *v1.Node
maxPods int maxPods int
podsPerCore int podsPerCore int
machineInfo *cadvisorapiv1.MachineInfo machineInfo *cadvisorapiv1.MachineInfo
machineInfoError error machineInfoError error
capacity v1.ResourceList capacity v1.ResourceList
devicePluginResourceCapacity dprc devicePluginResourceCapacity dprc
nodeAllocatableReservation v1.ResourceList nodeAllocatableReservation v1.ResourceList
expectNode *v1.Node expectNode *v1.Node
expectEvents []testEvent expectEvents []testEvent
disableLocalStorageCapacityIsolation bool
}{ }{
{ {
desc: "machine identifiers, basic capacity and allocatable", desc: "machine identifiers, basic capacity and allocatable",
@ -797,6 +798,35 @@ func TestMachineInfo(t *testing.T) {
}, },
}, },
}, },
{
desc: "ephemeral storage is not reflected in capacity and allocatable because localStorageCapacityIsolation is disabled",
node: &v1.Node{},
maxPods: 110,
machineInfo: &cadvisorapiv1.MachineInfo{
NumCores: 2,
MemoryCapacity: 1024,
},
capacity: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
expectNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
},
},
disableLocalStorageCapacityIsolation: true,
},
{ {
desc: "device plugin resources are reflected in capacity and allocatable", desc: "device plugin resources are reflected in capacity and allocatable",
node: &v1.Node{}, node: &v1.Node{},
@ -962,7 +992,7 @@ func TestMachineInfo(t *testing.T) {
machineInfoFunc := func() (*cadvisorapiv1.MachineInfo, error) { machineInfoFunc := func() (*cadvisorapiv1.MachineInfo, error) {
return tc.machineInfo, tc.machineInfoError return tc.machineInfo, tc.machineInfoError
} }
capacityFunc := func() v1.ResourceList { capacityFunc := func(localStorageCapacityIsolation bool) v1.ResourceList {
return tc.capacity return tc.capacity
} }
devicePluginResourceCapacityFunc := func() (v1.ResourceList, v1.ResourceList, []string) { devicePluginResourceCapacityFunc := func() (v1.ResourceList, v1.ResourceList, []string) {
@ -983,7 +1013,7 @@ func TestMachineInfo(t *testing.T) {
} }
// construct setter // construct setter
setter := MachineInfo(nodeName, tc.maxPods, tc.podsPerCore, machineInfoFunc, capacityFunc, setter := MachineInfo(nodeName, tc.maxPods, tc.podsPerCore, machineInfoFunc, capacityFunc,
devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc) devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc, tc.disableLocalStorageCapacityIsolation)
// call setter on node // call setter on node
if err := setter(tc.node); err != nil { if err := setter(tc.node); err != nil {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)
@ -1180,17 +1210,28 @@ func TestReadyCondition(t *testing.T) {
}, },
} }
withoutStorageCapacity := &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI),
},
},
}
cases := []struct { cases := []struct {
desc string desc string
node *v1.Node node *v1.Node
runtimeErrors error runtimeErrors error
networkErrors error networkErrors error
storageErrors error storageErrors error
appArmorValidateHostFunc func() error appArmorValidateHostFunc func() error
cmStatus cm.Status cmStatus cm.Status
nodeShutdownManagerErrors error nodeShutdownManagerErrors error
expectConditions []v1.NodeCondition expectConditions []v1.NodeCondition
expectEvents []testEvent expectEvents []testEvent
disableLocalStorageCapacityIsolation bool
}{ }{
{ {
desc: "new, ready", desc: "new, ready",
@ -1245,6 +1286,12 @@ func TestReadyCondition(t *testing.T) {
node: &v1.Node{}, node: &v1.Node{},
expectConditions: []v1.NodeCondition{*makeReadyCondition(false, "missing node capacity for resources: cpu, memory, pods, ephemeral-storage", now, now)}, expectConditions: []v1.NodeCondition{*makeReadyCondition(false, "missing node capacity for resources: cpu, memory, pods, ephemeral-storage", now, now)},
}, },
{
desc: "new, ready: localStorageCapacityIsolation is not supported",
node: withoutStorageCapacity.DeepCopy(),
disableLocalStorageCapacityIsolation: true,
expectConditions: []v1.NodeCondition{*makeReadyCondition(true, "kubelet is posting ready status", now, now)},
},
// the transition tests ensure timestamps are set correctly, no need to test the entire condition matrix in this section // the transition tests ensure timestamps are set correctly, no need to test the entire condition matrix in this section
{ {
desc: "transition to ready", desc: "transition to ready",
@ -1324,7 +1371,7 @@ func TestReadyCondition(t *testing.T) {
}) })
} }
// construct setter // construct setter
setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc) setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc, !tc.disableLocalStorageCapacityIsolation)
// call setter on node // call setter on node
if err := setter(tc.node); err != nil { if err := setter(tc.node); err != nil {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)

View File

@ -130,7 +130,7 @@ func TestRunOnce(t *testing.T) {
return nil return nil
} }
fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false } fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false }
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock) evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock, kb.supportLocalStorageCapacityIsolation())
kb.evictionManager = evictionManager kb.evictionManager = evictionManager
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)

View File

@ -206,6 +206,7 @@ func GetHollowKubeletConfig(opt *HollowKubletOptions) (*options.KubeletFlags, *k
c.ProtectKernelDefaults = false c.ProtectKernelDefaults = false
c.RegisterWithTaints = opt.RegisterWithTaints c.RegisterWithTaints = opt.RegisterWithTaints
c.RegisterNode = true c.RegisterNode = true
c.LocalStorageCapacityIsolation = true
return f, c return f, c
} }

View File

@ -25,9 +25,6 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework"
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
@ -575,7 +572,6 @@ func TestStorageRequests(t *testing.T) {
pod *v1.Pod pod *v1.Pod
nodeInfo *framework.NodeInfo nodeInfo *framework.NodeInfo
name string name string
features map[featuregate.Feature]bool
wantStatus *framework.Status wantStatus *framework.Status
}{ }{
{ {
@ -599,13 +595,10 @@ func TestStorageRequests(t *testing.T) {
wantStatus: framework.NewStatus(framework.Unschedulable, getErrReason(v1.ResourceEphemeralStorage)), wantStatus: framework.NewStatus(framework.Unschedulable, getErrReason(v1.ResourceEphemeralStorage)),
}, },
{ {
pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 25}), framework.Resource{EphemeralStorage: 25}), pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 5})),
nodeInfo: framework.NewNodeInfo( nodeInfo: framework.NewNodeInfo(
newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2})), newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2, EphemeralStorage: 10})),
name: "ephemeral local storage request is ignored due to disabled feature gate", name: "ephemeral local storage is sufficient",
features: map[featuregate.Feature]bool{
"LocalStorageCapacityIsolation": false,
},
}, },
{ {
pod: newResourcePod(framework.Resource{EphemeralStorage: 10}), pod: newResourcePod(framework.Resource{EphemeralStorage: 10}),
@ -617,9 +610,6 @@ func TestStorageRequests(t *testing.T) {
for _, test := range storagePodsTests { for _, test := range storagePodsTests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
for k, v := range test.features {
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, k, v)()
}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}} node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node) test.nodeInfo.SetNode(&node)

View File

@ -29,9 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors" utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
schedutil "k8s.io/kubernetes/pkg/scheduler/util" schedutil "k8s.io/kubernetes/pkg/scheduler/util"
) )
@ -444,10 +442,7 @@ func (r *Resource) Add(rl v1.ResourceList) {
case v1.ResourcePods: case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value()) r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage: case v1.ResourceEphemeralStorage:
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { r.EphemeralStorage += rQuant.Value()
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
r.EphemeralStorage += rQuant.Value()
}
default: default:
if schedutil.IsScalarResourceName(rName) { if schedutil.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value()) r.AddScalar(rName, rQuant.Value())
@ -500,9 +495,7 @@ func (r *Resource) SetMaxResource(rl v1.ResourceList) {
case v1.ResourceCPU: case v1.ResourceCPU:
r.MilliCPU = max(r.MilliCPU, rQuantity.MilliValue()) r.MilliCPU = max(r.MilliCPU, rQuantity.MilliValue())
case v1.ResourceEphemeralStorage: case v1.ResourceEphemeralStorage:
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
}
default: default:
if schedutil.IsScalarResourceName(rName) { if schedutil.IsScalarResourceName(rName) {
r.SetScalar(rName, max(r.ScalarResources[rName], rQuantity.Value())) r.SetScalar(rName, max(r.ScalarResources[rName], rQuantity.Value()))

View File

@ -18,8 +18,6 @@ package util
import ( import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
) )
// For each of these resources, a pod that doesn't request the resource explicitly // For each of these resources, a pod that doesn't request the resource explicitly
@ -65,11 +63,6 @@ func GetRequestForResource(resource v1.ResourceName, requests *v1.ResourceList,
} }
return requests.Memory().Value() return requests.Memory().Value()
case v1.ResourceEphemeralStorage: case v1.ResourceEphemeralStorage:
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
return 0
}
quantity, found := (*requests)[v1.ResourceEphemeralStorage] quantity, found := (*requests)[v1.ResourceEphemeralStorage]
if !found { if !found {
return 0 return 0

View File

@ -786,6 +786,17 @@ type KubeletConfiguration struct {
// +featureGate=KubeletTracing // +featureGate=KubeletTracing
// +optional // +optional
Tracing *tracingapi.TracingConfiguration `json:"tracing,omitempty"` Tracing *tracingapi.TracingConfiguration `json:"tracing,omitempty"`
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
// usage from the volume exceeds the limit.
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
// Default: true
// +optional
LocalStorageCapacityIsolation *bool `json:"localStorageCapacityIsolation,omitempty"`
} }
type KubeletAuthorizationMode string type KubeletAuthorizationMode string

View File

@ -452,6 +452,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
*out = new(apiv1.TracingConfiguration) *out = new(apiv1.TracingConfiguration)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.LocalStorageCapacityIsolation != nil {
in, out := &in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation
*out = new(bool)
**out = **in
}
return return
} }