Promote Local storage capacity isolation feature to GA

This change is to promote local storage capacity isolation feature to GA

At the same time, to allow rootless system disable this feature due to
unable to get root fs, this change introduced a new kubelet config
"localStorageCapacityIsolation". By default it is set to true. For
rootless systems, they can set this configuration to false to disable
the feature. Once it is set, user cannot set ephemeral-storage
request/limit because capacity and allocatable will not be set.

Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a
This commit is contained in:
jinxu 2022-07-28 08:03:20 -07:00
parent bc4c4930ff
commit 0064010cdd
42 changed files with 267 additions and 383 deletions

View File

@ -492,6 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.BoolVar(&c.LocalStorageCapacityIsolation, "local-storage-capacity-isolation", c.LocalStorageCapacityIsolation, "If true, local ephemeral storage isolation is enabled. Otherwise, local storage isolation feature will be disabled")
// Flags intended for testing, not recommended used in production environments.
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")

View File

@ -646,7 +646,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
if kubeDeps.CAdvisorInterface == nil {
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint)
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint))
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint), s.LocalStorageCapacityIsolation)
if err != nil {
return err
}

View File

@ -237,6 +237,7 @@ CPU_CFS_QUOTA=${CPU_CFS_QUOTA:-true}
ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"}
CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default
ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default
LOCAL_STORAGE_CAPACITY_ISOLATION=${LOCAL_STORAGE_CAPACITY_ISOLATION:-"true"} # current default
# This is the default dir and filename where the apiserver will generate a self-signed cert
# which should be able to be used as the CA to verify itself
CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"}
@ -754,6 +755,7 @@ cgroupRoot: "${CGROUP_ROOT}"
cgroupsPerQOS: ${CGROUPS_PER_QOS}
cpuCFSQuota: ${CPU_CFS_QUOTA}
enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH}
localStorageCapacityIsolation: ${LOCAL_STORAGE_CAPACITY_ISOLATION}
evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}"
failSwapOn: ${FAIL_SWAP_ON}
port: ${KUBELET_PORT}

View File

@ -517,14 +517,6 @@ func dropDisabledFields(
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) && !emptyDirSizeLimitInUse(oldPodSpec) {
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
podSpec.Volumes[i].EmptyDir.SizeLimit = nil
}
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) {
// Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used
VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
@ -703,21 +695,6 @@ func appArmorInUse(podAnnotations map[string]string) bool {
return false
}
// emptyDirSizeLimitInUse returns true if any pod's EmptyDir volumes use SizeLimit.
func emptyDirSizeLimitInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
if podSpec.Volumes[i].EmptyDir.SizeLimit != nil {
return true
}
}
}
return false
}
// probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use
// of the probe-level terminationGracePeriodSeconds feature
func probeGracePeriodInUse(podSpec *api.PodSpec) bool {

View File

@ -25,7 +25,6 @@ import (
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
@ -703,116 +702,6 @@ func TestDropProcMount(t *testing.T) {
}
}
func TestDropEmptyDirSizeLimit(t *testing.T) {
sizeLimit := resource.MustParse("1Gi")
podWithEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
SizeLimit: &sizeLimit,
},
},
},
},
},
}
}
podWithoutEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
},
},
},
},
},
}
}
podInfo := []struct {
description string
hasEmptyDirSizeLimit bool
pod func() *api.Pod
}{
{
description: "has EmptyDir Size Limit",
hasEmptyDirSizeLimit: true,
pod: podWithEmptyDirSizeLimit,
},
{
description: "does not have EmptyDir Size Limit",
hasEmptyDirSizeLimit: false,
pod: podWithoutEmptyDirSizeLimit,
},
{
description: "is nil",
hasEmptyDirSizeLimit: false,
pod: func() *api.Pod { return nil },
},
}
for _, enabled := range []bool{true, false} {
for _, oldPodInfo := range podInfo {
for _, newPodInfo := range podInfo {
oldPodHasEmptyDirSizeLimit, oldPod := oldPodInfo.hasEmptyDirSizeLimit, oldPodInfo.pod()
newPodHasEmptyDirSizeLimit, newPod := newPodInfo.hasEmptyDirSizeLimit, newPodInfo.pod()
if newPod == nil {
continue
}
t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, enabled)()
var oldPodSpec *api.PodSpec
if oldPod != nil {
oldPodSpec = &oldPod.Spec
}
dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil)
// old pod should never be changed
if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) {
t.Errorf("old pod changed: %v", cmp.Diff(oldPod, oldPodInfo.pod()))
}
switch {
case enabled || oldPodHasEmptyDirSizeLimit:
// new pod should not be changed if the feature is enabled, or if the old pod had EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
case newPodHasEmptyDirSizeLimit:
// new pod should be changed
if reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod was not changed")
}
// new pod should not have EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, podWithoutEmptyDirSizeLimit()) {
t.Errorf("new pod had EmptyDir SizeLimit: %v", cmp.Diff(newPod, podWithoutEmptyDirSizeLimit()))
}
default:
// new pod should not need to be changed
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
}
})
}
}
}
}
func TestDropAppArmor(t *testing.T) {
podWithAppArmor := func() *api.Pod {
return &api.Pod{

View File

@ -24,8 +24,6 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
)
// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all
@ -131,11 +129,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
}
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
return requestQuantity
}
for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity)

View File

@ -515,8 +515,9 @@ const (
// owner: @jinxu
// beta: v1.10
// stable: v1.25
//
// New local storage types to support local storage capacity isolation
// Support local ephemeral storage types for local storage capacity isolation feature.
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
// owner: @RobertKrawitz
@ -988,7 +989,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27
LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta},

View File

@ -54589,6 +54589,13 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen
Ref: ref("k8s.io/component-base/tracing/api/v1.TracingConfiguration"),
},
},
"localStorageCapacityIsolation": {
SchemaProps: spec.SchemaProps{
Description: "LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk usage from the volume exceeds the limit. This feature depends on the capability of detecting correct root file system disk usage. For certain systems, such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. Default: true",
Type: []string{"boolean"},
Format: "",
},
},
},
},
},

View File

@ -20,7 +20,7 @@ import (
"math/rand"
"time"
"github.com/google/gofuzz"
fuzz "github.com/google/gofuzz"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer"
@ -110,6 +110,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
}
obj.EnableSystemLogHandler = true
obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64())
obj.LocalStorageCapacityIsolation = true
},
}
}

View File

@ -282,5 +282,6 @@ var (
"MemoryThrottlingFactor",
"Tracing.Endpoint",
"Tracing.SamplingRatePerMillion",
"LocalStorageCapacityIsolation",
)
)

View File

@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration
kubeAPIBurst: 10
kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging:
flushFrequency: 5000000000
format: text

View File

@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration
kubeAPIBurst: 10
kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging:
flushFrequency: 5000000000
format: text

View File

@ -450,6 +450,16 @@ type KubeletConfiguration struct {
// +featureGate=KubeletTracing
// +optional
Tracing *tracingapi.TracingConfiguration
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
// usage from the volume exceeds the limit.
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
// +optional
LocalStorageCapacityIsolation bool
}
// KubeletAuthorizationMode denotes the authorization mode for the kubelet

View File

@ -264,4 +264,7 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
if obj.RegisterNode == nil {
obj.RegisterNode = utilpointer.BoolPtr(true)
}
if obj.LocalStorageCapacityIsolation == nil {
obj.LocalStorageCapacityIsolation = utilpointer.BoolPtr(true)
}
}

View File

@ -115,12 +115,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text",
FlushFrequency: 5 * time.Second,
},
EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true),
EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
},
},
{
@ -245,6 +246,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
SeccompDefault: utilpointer.Bool(false),
MemoryThrottlingFactor: utilpointer.Float64(0),
RegisterNode: utilpointer.BoolPtr(false),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
},
&v1beta1.KubeletConfiguration{
EnableServer: utilpointer.BoolPtr(false),
@ -333,13 +335,14 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text",
FlushFrequency: 5 * time.Second,
},
EnableSystemLogHandler: utilpointer.Bool(false),
ReservedMemory: []v1beta1.MemoryReservation{},
EnableProfilingHandler: utilpointer.Bool(false),
EnableDebugFlagsHandler: utilpointer.Bool(false),
SeccompDefault: utilpointer.Bool(false),
MemoryThrottlingFactor: utilpointer.Float64(0),
RegisterNode: utilpointer.BoolPtr(false),
EnableSystemLogHandler: utilpointer.Bool(false),
ReservedMemory: []v1beta1.MemoryReservation{},
EnableProfilingHandler: utilpointer.Bool(false),
EnableDebugFlagsHandler: utilpointer.Bool(false),
SeccompDefault: utilpointer.Bool(false),
MemoryThrottlingFactor: utilpointer.Float64(0),
RegisterNode: utilpointer.BoolPtr(false),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
},
},
{
@ -481,11 +484,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
},
},
EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
},
&v1beta1.KubeletConfiguration{
EnableServer: utilpointer.BoolPtr(true),
@ -624,11 +628,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
},
},
EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.Bool(true),
EnableDebugFlagsHandler: utilpointer.Bool(true),
SeccompDefault: utilpointer.Bool(true),
MemoryThrottlingFactor: utilpointer.Float64(1),
RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
},
},
{
@ -709,12 +714,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
Format: "text",
FlushFrequency: 5 * time.Second,
},
EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true),
EnableSystemLogHandler: utilpointer.BoolPtr(true),
EnableProfilingHandler: utilpointer.BoolPtr(true),
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
SeccompDefault: utilpointer.BoolPtr(false),
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
RegisterNode: utilpointer.BoolPtr(true),
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
},
},
}

View File

@ -508,6 +508,9 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in
return err
}
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
if err := v1.Convert_Pointer_bool_To_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
return err
}
return nil
}
@ -683,6 +686,9 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in
return err
}
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
if err := v1.Convert_bool_To_Pointer_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
return err
}
return nil
}

View File

@ -80,7 +80,7 @@ func init() {
}
// New creates a new cAdvisor Interface for linux systems.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) {
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
sysFs := sysfs.NewRealSysFs()
includedMetrics := cadvisormetrics.MetricSet{
@ -99,7 +99,7 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
}
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
if usingLegacyStats || localStorageCapacityIsolation {
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
}

View File

@ -33,7 +33,7 @@ type cadvisorUnsupported struct {
var _ Interface = new(cadvisorUnsupported)
// New creates a new cAdvisor Interface for unsupported systems.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats bool) (Interface, error) {
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
return &cadvisorUnsupported{}, nil
}

View File

@ -34,7 +34,7 @@ type cadvisorClient struct {
var _ Interface = new(cadvisorClient)
// New creates a cAdvisor and exports its API on the specified port if port > 0.
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) {
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
client, err := winstats.NewPerfCounterClient()
return &cadvisorClient{
rootPath: rootPath,

View File

@ -47,7 +47,7 @@ type ContainerManager interface {
// Runs the container manager's housekeeping.
// - Ensures that the Docker daemon is in a container.
// - Creates the system container where all non-containerized processes run.
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
// These cgroups include the system and Kubernetes services.
@ -73,7 +73,7 @@ type ContainerManager interface {
GetNodeAllocatableReservation() v1.ResourceList
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
GetCapacity() v1.ResourceList
GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
// node allocatable (amount of total healthy resources reported by device plugin),

View File

@ -554,7 +554,8 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
activePods ActivePodsFunc,
sourcesReady config.SourcesReady,
podStatusProvider status.PodStatusProvider,
runtimeService internalapi.RuntimeService) error {
runtimeService internalapi.RuntimeService,
localStorageCapacityIsolation bool) error {
// Initialize CPU manager
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) {
@ -578,7 +579,7 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
// allocatable of the node
cm.nodeInfo = node
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
if localStorageCapacityIsolation {
rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil {
return fmt.Errorf("failed to get rootfs info: %v", err)
@ -915,8 +916,8 @@ func isKernelPid(pid int) bool {
// GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*"
// At present this method is only invoked when introspecting ephemeral storage
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if localStorageCapacityIsolation {
// We store allocatable ephemeral-storage in the capacity property once we Start() the container manager
if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok {
// If we haven't yet stored the capacity for ephemeral-storage, we can try to fetch it directly from cAdvisor,

View File

@ -28,9 +28,6 @@ import (
gomock "github.com/golang/mock/gomock"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
kubefeatures "k8s.io/kubernetes/pkg/features"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/stretchr/testify/assert"
@ -193,11 +190,11 @@ func TestGetCapacity(t *testing.T) {
mockCadvisorError := cadvisortest.NewMockInterface(mockCtrlError)
mockCadvisorError.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{}, errors.New("Unable to get rootfs data from cAdvisor interface"))
cases := []struct {
name string
cm *containerManagerImpl
expectedResourceQuantity *resource.Quantity
expectedNoEphemeralStorage bool
enableLocalStorageCapacityIsolation bool
name string
cm *containerManagerImpl
expectedResourceQuantity *resource.Quantity
expectedNoEphemeralStorage bool
disablelocalStorageCapacityIsolation bool
}{
{
name: "capacity property has ephemeral-storage",
@ -207,9 +204,8 @@ func TestGetCapacity(t *testing.T) {
v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
},
},
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
expectedNoEphemeralStorage: false,
enableLocalStorageCapacityIsolation: true,
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
expectedNoEphemeralStorage: false,
},
{
name: "capacity property does not have ephemeral-storage",
@ -217,9 +213,8 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: mockCadvisor,
capacity: v1.ResourceList{},
},
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI),
expectedNoEphemeralStorage: false,
enableLocalStorageCapacityIsolation: true,
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI),
expectedNoEphemeralStorage: false,
},
{
name: "capacity property does not have ephemeral-storage, error from rootfs",
@ -227,8 +222,7 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: mockCadvisorError,
capacity: v1.ResourceList{},
},
expectedNoEphemeralStorage: true,
enableLocalStorageCapacityIsolation: true,
expectedNoEphemeralStorage: true,
},
{
name: "capacity property does not have ephemeral-storage, cadvisor interface is nil",
@ -236,26 +230,24 @@ func TestGetCapacity(t *testing.T) {
cadvisorInterface: nil,
capacity: v1.ResourceList{},
},
expectedNoEphemeralStorage: true,
enableLocalStorageCapacityIsolation: true,
expectedNoEphemeralStorage: true,
},
{
name: "LocalStorageCapacityIsolation feature flag is disabled",
name: "capacity property has ephemeral-storage, but localStorageCapacityIsolation is disabled",
cm: &containerManagerImpl{
cadvisorInterface: mockCadvisor,
capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("16G"),
v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
},
},
expectedNoEphemeralStorage: true,
enableLocalStorageCapacityIsolation: false,
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
expectedNoEphemeralStorage: true,
disablelocalStorageCapacityIsolation: true,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.LocalStorageCapacityIsolation, c.enableLocalStorageCapacityIsolation)()
ret := c.cm.GetCapacity()
ret := c.cm.GetCapacity(!c.disablelocalStorageCapacityIsolation)
if v, exists := ret[v1.ResourceEphemeralStorage]; !exists {
if !c.expectedNoEphemeralStorage {
t.Errorf("did not get any ephemeral storage data")

View File

@ -41,7 +41,7 @@ type containerManagerStub struct {
var _ ContainerManager = &containerManagerStub{}
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
klog.V(2).InfoS("Starting stub container manager")
return nil
}
@ -74,7 +74,10 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList
return nil
}
func (cm *containerManagerStub) GetCapacity() v1.ResourceList {
func (cm *containerManagerStub) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if !localStorageCapacityIsolation {
return v1.ResourceList{}
}
c := v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(0),

View File

@ -38,7 +38,7 @@ type unsupportedContainerManager struct {
var _ ContainerManager = &unsupportedContainerManager{}
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
return fmt.Errorf("Container Manager is unsupported in this build")
}

View File

@ -30,11 +30,9 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
internalapi "k8s.io/cri-api/pkg/apis"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
@ -72,10 +70,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
activePods ActivePodsFunc,
sourcesReady config.SourcesReady,
podStatusProvider status.PodStatusProvider,
runtimeService internalapi.RuntimeService) error {
runtimeService internalapi.RuntimeService,
localStorageCapacityIsolation bool) error {
klog.V(2).InfoS("Starting Windows container manager")
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
if localStorageCapacityIsolation {
rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil {
return fmt.Errorf("failed to get rootfs info: %v", err)
@ -171,7 +170,7 @@ func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList
return result
}
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
return cm.capacity
}

View File

@ -50,7 +50,7 @@ func NewFakeContainerManager() *FakeContainerManager {
}
}
func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "Start")
@ -106,10 +106,13 @@ func (cm *FakeContainerManager) GetNodeAllocatableReservation() v1.ResourceList
return nil
}
func (cm *FakeContainerManager) GetCapacity() v1.ResourceList {
func (cm *FakeContainerManager) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity")
if !localStorageCapacityIsolation {
return v1.ResourceList{}
}
c := v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity(
int64(0),

View File

@ -26,13 +26,11 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
v1helper "k8s.io/component-helpers/scheduling/corev1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
@ -97,6 +95,8 @@ type managerImpl struct {
thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
thresholdsLastUpdated time.Time
// whether can support local storage capacity isolation
localStorageCapacityIsolation bool
}
// ensure it implements the required interface
@ -113,21 +113,23 @@ func NewManager(
recorder record.EventRecorder,
nodeRef *v1.ObjectReference,
clock clock.WithTicker,
localStorageCapacityIsolation bool,
) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{
clock: clock,
killPodFunc: killPodFunc,
mirrorPodFunc: mirrorPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
clock: clock,
killPodFunc: killPodFunc,
mirrorPodFunc: mirrorPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
localStorageCapacityIsolation: localStorageCapacityIsolation,
}
return manager, manager
}
@ -230,7 +232,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
return nil
}
@ -318,7 +320,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// evict pods if there is a resource usage violation from local volume temporary storage
// If eviction happens in localStorageEviction function, skip the rest of eviction action
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if m.localStorageCapacityIsolation {
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
return evictedPods
}

View File

@ -18,12 +18,13 @@ package eviction
import (
"fmt"
"k8s.io/apimachinery/pkg/util/diff"
"reflect"
"sort"
"testing"
"time"
"k8s.io/apimachinery/pkg/util/diff"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -31,6 +32,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -699,7 +701,6 @@ func TestOrderedByExceedsRequestMemory(t *testing.T) {
}
func TestOrderedByExceedsRequestDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
below := newPod("below-requests", -1, []v1.Container{
newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")),
}, nil)
@ -748,7 +749,6 @@ func TestOrderedByPriority(t *testing.T) {
}
func TestOrderedbyDisk(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
@ -813,73 +813,6 @@ func TestOrderedbyDisk(t *testing.T) {
}
}
// Tests that we correctly ignore disk requests when the local storage feature gate is disabled.
func TestOrderedbyDiskDisableLocalStorage(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, false)()
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{
newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod3 := newPod("burstable-high", defaultPriority, []v1.Container{
newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod4 := newPod("burstable-low", defaultPriority, []v1.Container{
newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{
newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{
newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
}, []v1.Volume{
newVolume("local-volume", v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}),
})
stats := map[*v1.Pod]statsapi.PodStats{
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi
pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi
pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi
pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi
pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi
pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi
}
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
result, found := stats[pod]
return result, found
}
pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2}
for i := range expected {
if pods[i] != expected[i] {
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
}
}
}
func TestOrderedbyInodes(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
low := newPod("low", defaultPriority, []v1.Container{

View File

@ -773,7 +773,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
// setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig,
killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock, kubeCfg.LocalStorageCapacityIsolation)
klet.evictionManager = evictionManager
klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
@ -1384,7 +1385,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
os.Exit(1)
}
// containerManager must start after cAdvisor because it needs filesystem capacity information
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil {
// Fail kubelet and rely on the babysitter to retry starting kubelet.
klog.ErrorS(err, "Failed to start ContainerManager")
os.Exit(1)
@ -2489,6 +2490,10 @@ func (kl *Kubelet) CheckpointContainer(
return nil
}
func (kl *Kubelet) supportLocalStorageCapacityIsolation() bool {
return kl.GetConfiguration().LocalStorageCapacityIsolation
}
// isSyncPodWorthy filters out events that are not worthy of pod syncing
func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
// ContainerRemoved doesn't affect pod state

View File

@ -624,7 +624,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
setters = append(setters,
nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc),
nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity,
kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent),
kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent, kl.supportLocalStorageCapacityIsolation()),
nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version),
nodestatus.DaemonEndpoints(kl.daemonEndpoints),
nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList),
@ -637,7 +637,8 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent),
nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent),
nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent),
nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent),
nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors,
validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent, kl.supportLocalStorageCapacityIsolation()),
nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse),
// TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event
// and record state back to the Kubelet runtime object. In the future, I'd like to isolate

View File

@ -153,7 +153,10 @@ func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
return lcm.allocatableReservation
}
func (lcm *localCM) GetCapacity() v1.ResourceList {
func (lcm *localCM) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
if !localStorageCapacityIsolation {
delete(lcm.capacity, v1.ResourceEphemeralStorage)
}
return lcm.capacity
}
@ -182,7 +185,7 @@ func TestUpdateNewNodeStatus(t *testing.T) {
}
inputImageList, expectedImageList := generateTestingImageLists(numTestImages, int(tc.nodeStatusMaxImages))
testKubelet := newTestKubeletWithImageList(
t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/)
t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/, true /* localStorageCapacityIsolation */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.nodeStatusMaxImages = tc.nodeStatusMaxImages
@ -1347,7 +1350,7 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
// generate one more in inputImageList than we configure the Kubelet to report
inputImageList, _ := generateTestingImageLists(nodeStatusMaxImages+1, nodeStatusMaxImages)
testKubelet := newTestKubeletWithImageList(
t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */)
t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */, true)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.nodeStatusMaxImages = nodeStatusMaxImages

View File

@ -23,7 +23,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
_ "k8s.io/kubernetes/pkg/apis/core/install"

View File

@ -145,14 +145,16 @@ func newTestKubelet(t *testing.T, controllerAttachDetachEnabled bool) *TestKubel
Size: 456,
},
}
return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/)
return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/, true /*localStorageCapacityIsolation*/)
}
func newTestKubeletWithImageList(
t *testing.T,
imageList []kubecontainer.Image,
controllerAttachDetachEnabled bool,
initFakeVolumePlugin bool) *TestKubelet {
initFakeVolumePlugin bool,
localStorageCapacityIsolation bool,
) *TestKubelet {
logger, _ := ktesting.NewTestContext(t)
fakeRuntime := &containertest.FakeRuntime{
@ -320,7 +322,8 @@ func newTestKubeletWithImageList(
Namespace: "",
}
// setup eviction manager
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{},
killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock, kubelet.supportLocalStorageCapacityIsolation())
kubelet.evictionManager = evictionManager
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
@ -386,6 +389,7 @@ func newTestKubeletWithImageList(
kubelet.AddPodSyncLoopHandler(activeDeadlineHandler)
kubelet.AddPodSyncHandler(activeDeadlineHandler)
kubelet.lastContainerStartedTime = newTimeCache()
kubelet.kubeletConfiguration.LocalStorageCapacityIsolation = localStorageCapacityIsolation
return &TestKubelet{kubelet, fakeRuntime, fakeContainerManager, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug}
}

View File

@ -31,13 +31,11 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/errors"
utilnet "k8s.io/apimachinery/pkg/util/net"
utilfeature "k8s.io/apiserver/pkg/util/feature"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers"
"k8s.io/component-base/version"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -244,10 +242,11 @@ func MachineInfo(nodeName string,
maxPods int,
podsPerCore int,
machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo
capacityFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
capacityFunc func(localStorageCapacityIsolation bool) v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity
nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation
recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent
localStorageCapacityIsolation bool,
) Setter {
return func(node *v1.Node) error {
// Note: avoid blindly overwriting the capacity in case opaque
@ -295,16 +294,15 @@ func MachineInfo(nodeName string,
}
node.Status.NodeInfo.BootID = info.BootID
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
// capacity for every node status request
initialCapacity := capacityFunc()
if initialCapacity != nil {
if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
node.Status.Capacity[v1.ResourceEphemeralStorage] = v
}
// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
// capacity for every node status request
initialCapacity := capacityFunc(localStorageCapacityIsolation)
if initialCapacity != nil {
if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
node.Status.Capacity[v1.ResourceEphemeralStorage] = v
}
}
//}
devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc()
for k, v := range devicePluginCapacity {
@ -469,6 +467,7 @@ func ReadyCondition(
cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status
nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors.
recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
localStorageCapacityIsolation bool,
) Setter {
return func(node *v1.Node) error {
// NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
@ -484,7 +483,7 @@ func ReadyCondition(
}
errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()}
requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if localStorageCapacityIsolation {
requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
}
missingCapacities := []string{}

View File

@ -585,17 +585,18 @@ func TestMachineInfo(t *testing.T) {
}
cases := []struct {
desc string
node *v1.Node
maxPods int
podsPerCore int
machineInfo *cadvisorapiv1.MachineInfo
machineInfoError error
capacity v1.ResourceList
devicePluginResourceCapacity dprc
nodeAllocatableReservation v1.ResourceList
expectNode *v1.Node
expectEvents []testEvent
desc string
node *v1.Node
maxPods int
podsPerCore int
machineInfo *cadvisorapiv1.MachineInfo
machineInfoError error
capacity v1.ResourceList
devicePluginResourceCapacity dprc
nodeAllocatableReservation v1.ResourceList
expectNode *v1.Node
expectEvents []testEvent
disableLocalStorageCapacityIsolation bool
}{
{
desc: "machine identifiers, basic capacity and allocatable",
@ -797,6 +798,35 @@ func TestMachineInfo(t *testing.T) {
},
},
},
{
desc: "ephemeral storage is not reflected in capacity and allocatable because localStorageCapacityIsolation is disabled",
node: &v1.Node{},
maxPods: 110,
machineInfo: &cadvisorapiv1.MachineInfo{
NumCores: 2,
MemoryCapacity: 1024,
},
capacity: v1.ResourceList{
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
expectNode: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
},
},
disableLocalStorageCapacityIsolation: true,
},
{
desc: "device plugin resources are reflected in capacity and allocatable",
node: &v1.Node{},
@ -962,7 +992,7 @@ func TestMachineInfo(t *testing.T) {
machineInfoFunc := func() (*cadvisorapiv1.MachineInfo, error) {
return tc.machineInfo, tc.machineInfoError
}
capacityFunc := func() v1.ResourceList {
capacityFunc := func(localStorageCapacityIsolation bool) v1.ResourceList {
return tc.capacity
}
devicePluginResourceCapacityFunc := func() (v1.ResourceList, v1.ResourceList, []string) {
@ -983,7 +1013,7 @@ func TestMachineInfo(t *testing.T) {
}
// construct setter
setter := MachineInfo(nodeName, tc.maxPods, tc.podsPerCore, machineInfoFunc, capacityFunc,
devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc)
devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc, tc.disableLocalStorageCapacityIsolation)
// call setter on node
if err := setter(tc.node); err != nil {
t.Fatalf("unexpected error: %v", err)
@ -1180,17 +1210,28 @@ func TestReadyCondition(t *testing.T) {
},
}
withoutStorageCapacity := &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI),
},
},
}
cases := []struct {
desc string
node *v1.Node
runtimeErrors error
networkErrors error
storageErrors error
appArmorValidateHostFunc func() error
cmStatus cm.Status
nodeShutdownManagerErrors error
expectConditions []v1.NodeCondition
expectEvents []testEvent
desc string
node *v1.Node
runtimeErrors error
networkErrors error
storageErrors error
appArmorValidateHostFunc func() error
cmStatus cm.Status
nodeShutdownManagerErrors error
expectConditions []v1.NodeCondition
expectEvents []testEvent
disableLocalStorageCapacityIsolation bool
}{
{
desc: "new, ready",
@ -1245,6 +1286,12 @@ func TestReadyCondition(t *testing.T) {
node: &v1.Node{},
expectConditions: []v1.NodeCondition{*makeReadyCondition(false, "missing node capacity for resources: cpu, memory, pods, ephemeral-storage", now, now)},
},
{
desc: "new, ready: localStorageCapacityIsolation is not supported",
node: withoutStorageCapacity.DeepCopy(),
disableLocalStorageCapacityIsolation: true,
expectConditions: []v1.NodeCondition{*makeReadyCondition(true, "kubelet is posting ready status", now, now)},
},
// the transition tests ensure timestamps are set correctly, no need to test the entire condition matrix in this section
{
desc: "transition to ready",
@ -1324,7 +1371,7 @@ func TestReadyCondition(t *testing.T) {
})
}
// construct setter
setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc)
setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc, !tc.disableLocalStorageCapacityIsolation)
// call setter on node
if err := setter(tc.node); err != nil {
t.Fatalf("unexpected error: %v", err)

View File

@ -130,7 +130,7 @@ func TestRunOnce(t *testing.T) {
return nil
}
fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false }
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock)
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock, kb.supportLocalStorageCapacityIsolation())
kb.evictionManager = evictionManager
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)

View File

@ -206,6 +206,7 @@ func GetHollowKubeletConfig(opt *HollowKubletOptions) (*options.KubeletFlags, *k
c.ProtectKernelDefaults = false
c.RegisterWithTaints = opt.RegisterWithTaints
c.RegisterNode = true
c.LocalStorageCapacityIsolation = true
return f, c
}

View File

@ -25,9 +25,6 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework"
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
@ -575,7 +572,6 @@ func TestStorageRequests(t *testing.T) {
pod *v1.Pod
nodeInfo *framework.NodeInfo
name string
features map[featuregate.Feature]bool
wantStatus *framework.Status
}{
{
@ -599,13 +595,10 @@ func TestStorageRequests(t *testing.T) {
wantStatus: framework.NewStatus(framework.Unschedulable, getErrReason(v1.ResourceEphemeralStorage)),
},
{
pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 25}), framework.Resource{EphemeralStorage: 25}),
pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 5})),
nodeInfo: framework.NewNodeInfo(
newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2})),
name: "ephemeral local storage request is ignored due to disabled feature gate",
features: map[featuregate.Feature]bool{
"LocalStorageCapacityIsolation": false,
},
newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2, EphemeralStorage: 10})),
name: "ephemeral local storage is sufficient",
},
{
pod: newResourcePod(framework.Resource{EphemeralStorage: 10}),
@ -617,9 +610,6 @@ func TestStorageRequests(t *testing.T) {
for _, test := range storagePodsTests {
t.Run(test.name, func(t *testing.T) {
for k, v := range test.features {
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, k, v)()
}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)

View File

@ -29,9 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
@ -444,10 +442,7 @@ func (r *Resource) Add(rl v1.ResourceList) {
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
r.EphemeralStorage += rQuant.Value()
}
r.EphemeralStorage += rQuant.Value()
default:
if schedutil.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value())
@ -500,9 +495,7 @@ func (r *Resource) SetMaxResource(rl v1.ResourceList) {
case v1.ResourceCPU:
r.MilliCPU = max(r.MilliCPU, rQuantity.MilliValue())
case v1.ResourceEphemeralStorage:
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
}
r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
default:
if schedutil.IsScalarResourceName(rName) {
r.SetScalar(rName, max(r.ScalarResources[rName], rQuantity.Value()))

View File

@ -18,8 +18,6 @@ package util
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
)
// For each of these resources, a pod that doesn't request the resource explicitly
@ -65,11 +63,6 @@ func GetRequestForResource(resource v1.ResourceName, requests *v1.ResourceList,
}
return requests.Memory().Value()
case v1.ResourceEphemeralStorage:
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
return 0
}
quantity, found := (*requests)[v1.ResourceEphemeralStorage]
if !found {
return 0

View File

@ -786,6 +786,17 @@ type KubeletConfiguration struct {
// +featureGate=KubeletTracing
// +optional
Tracing *tracingapi.TracingConfiguration `json:"tracing,omitempty"`
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
// usage from the volume exceeds the limit.
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
// Default: true
// +optional
LocalStorageCapacityIsolation *bool `json:"localStorageCapacityIsolation,omitempty"`
}
type KubeletAuthorizationMode string

View File

@ -452,6 +452,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
*out = new(apiv1.TracingConfiguration)
(*in).DeepCopyInto(*out)
}
if in.LocalStorageCapacityIsolation != nil {
in, out := &in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation
*out = new(bool)
**out = **in
}
return
}