mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
Promote Local storage capacity isolation feature to GA
This change is to promote local storage capacity isolation feature to GA At the same time, to allow rootless system disable this feature due to unable to get root fs, this change introduced a new kubelet config "localStorageCapacityIsolation". By default it is set to true. For rootless systems, they can set this configuration to false to disable the feature. Once it is set, user cannot set ephemeral-storage request/limit because capacity and allocatable will not be set. Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a
This commit is contained in:
parent
bc4c4930ff
commit
0064010cdd
@ -492,6 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
|
||||
|
||||
fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
|
||||
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
|
||||
fs.BoolVar(&c.LocalStorageCapacityIsolation, "local-storage-capacity-isolation", c.LocalStorageCapacityIsolation, "If true, local ephemeral storage isolation is enabled. Otherwise, local storage isolation feature will be disabled")
|
||||
|
||||
// Flags intended for testing, not recommended used in production environments.
|
||||
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")
|
||||
|
@ -646,7 +646,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
|
||||
|
||||
if kubeDeps.CAdvisorInterface == nil {
|
||||
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint)
|
||||
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint))
|
||||
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint), s.LocalStorageCapacityIsolation)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -237,6 +237,7 @@ CPU_CFS_QUOTA=${CPU_CFS_QUOTA:-true}
|
||||
ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"}
|
||||
CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default
|
||||
ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default
|
||||
LOCAL_STORAGE_CAPACITY_ISOLATION=${LOCAL_STORAGE_CAPACITY_ISOLATION:-"true"} # current default
|
||||
# This is the default dir and filename where the apiserver will generate a self-signed cert
|
||||
# which should be able to be used as the CA to verify itself
|
||||
CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"}
|
||||
@ -754,6 +755,7 @@ cgroupRoot: "${CGROUP_ROOT}"
|
||||
cgroupsPerQOS: ${CGROUPS_PER_QOS}
|
||||
cpuCFSQuota: ${CPU_CFS_QUOTA}
|
||||
enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH}
|
||||
localStorageCapacityIsolation: ${LOCAL_STORAGE_CAPACITY_ISOLATION}
|
||||
evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}"
|
||||
failSwapOn: ${FAIL_SWAP_ON}
|
||||
port: ${KUBELET_PORT}
|
||||
|
@ -517,14 +517,6 @@ func dropDisabledFields(
|
||||
}
|
||||
}
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) && !emptyDirSizeLimitInUse(oldPodSpec) {
|
||||
for i := range podSpec.Volumes {
|
||||
if podSpec.Volumes[i].EmptyDir != nil {
|
||||
podSpec.Volumes[i].EmptyDir.SizeLimit = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) {
|
||||
// Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used
|
||||
VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
|
||||
@ -703,21 +695,6 @@ func appArmorInUse(podAnnotations map[string]string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// emptyDirSizeLimitInUse returns true if any pod's EmptyDir volumes use SizeLimit.
|
||||
func emptyDirSizeLimitInUse(podSpec *api.PodSpec) bool {
|
||||
if podSpec == nil {
|
||||
return false
|
||||
}
|
||||
for i := range podSpec.Volumes {
|
||||
if podSpec.Volumes[i].EmptyDir != nil {
|
||||
if podSpec.Volumes[i].EmptyDir.SizeLimit != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use
|
||||
// of the probe-level terminationGracePeriodSeconds feature
|
||||
func probeGracePeriodInUse(podSpec *api.PodSpec) bool {
|
||||
|
@ -25,7 +25,6 @@ import (
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
@ -703,116 +702,6 @@ func TestDropProcMount(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDropEmptyDirSizeLimit(t *testing.T) {
|
||||
sizeLimit := resource.MustParse("1Gi")
|
||||
podWithEmptyDirSizeLimit := func() *api.Pod {
|
||||
return &api.Pod{
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
Volumes: []api.Volume{
|
||||
{
|
||||
Name: "a",
|
||||
VolumeSource: api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{
|
||||
Medium: "memory",
|
||||
SizeLimit: &sizeLimit,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
podWithoutEmptyDirSizeLimit := func() *api.Pod {
|
||||
return &api.Pod{
|
||||
Spec: api.PodSpec{
|
||||
RestartPolicy: api.RestartPolicyNever,
|
||||
Volumes: []api.Volume{
|
||||
{
|
||||
Name: "a",
|
||||
VolumeSource: api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{
|
||||
Medium: "memory",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
podInfo := []struct {
|
||||
description string
|
||||
hasEmptyDirSizeLimit bool
|
||||
pod func() *api.Pod
|
||||
}{
|
||||
{
|
||||
description: "has EmptyDir Size Limit",
|
||||
hasEmptyDirSizeLimit: true,
|
||||
pod: podWithEmptyDirSizeLimit,
|
||||
},
|
||||
{
|
||||
description: "does not have EmptyDir Size Limit",
|
||||
hasEmptyDirSizeLimit: false,
|
||||
pod: podWithoutEmptyDirSizeLimit,
|
||||
},
|
||||
{
|
||||
description: "is nil",
|
||||
hasEmptyDirSizeLimit: false,
|
||||
pod: func() *api.Pod { return nil },
|
||||
},
|
||||
}
|
||||
|
||||
for _, enabled := range []bool{true, false} {
|
||||
for _, oldPodInfo := range podInfo {
|
||||
for _, newPodInfo := range podInfo {
|
||||
oldPodHasEmptyDirSizeLimit, oldPod := oldPodInfo.hasEmptyDirSizeLimit, oldPodInfo.pod()
|
||||
newPodHasEmptyDirSizeLimit, newPod := newPodInfo.hasEmptyDirSizeLimit, newPodInfo.pod()
|
||||
if newPod == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, enabled)()
|
||||
|
||||
var oldPodSpec *api.PodSpec
|
||||
if oldPod != nil {
|
||||
oldPodSpec = &oldPod.Spec
|
||||
}
|
||||
dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil)
|
||||
|
||||
// old pod should never be changed
|
||||
if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) {
|
||||
t.Errorf("old pod changed: %v", cmp.Diff(oldPod, oldPodInfo.pod()))
|
||||
}
|
||||
|
||||
switch {
|
||||
case enabled || oldPodHasEmptyDirSizeLimit:
|
||||
// new pod should not be changed if the feature is enabled, or if the old pod had EmptyDir SizeLimit
|
||||
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
|
||||
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
|
||||
}
|
||||
case newPodHasEmptyDirSizeLimit:
|
||||
// new pod should be changed
|
||||
if reflect.DeepEqual(newPod, newPodInfo.pod()) {
|
||||
t.Errorf("new pod was not changed")
|
||||
}
|
||||
// new pod should not have EmptyDir SizeLimit
|
||||
if !reflect.DeepEqual(newPod, podWithoutEmptyDirSizeLimit()) {
|
||||
t.Errorf("new pod had EmptyDir SizeLimit: %v", cmp.Diff(newPod, podWithoutEmptyDirSizeLimit()))
|
||||
}
|
||||
default:
|
||||
// new pod should not need to be changed
|
||||
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
|
||||
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDropAppArmor(t *testing.T) {
|
||||
podWithAppArmor := func() *api.Pod {
|
||||
return &api.Pod{
|
||||
|
@ -24,8 +24,6 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all
|
||||
@ -131,11 +129,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
|
||||
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
|
||||
}
|
||||
|
||||
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
|
||||
return requestQuantity
|
||||
}
|
||||
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
|
||||
requestQuantity.Add(rQuantity)
|
||||
|
@ -515,8 +515,9 @@ const (
|
||||
|
||||
// owner: @jinxu
|
||||
// beta: v1.10
|
||||
// stable: v1.25
|
||||
//
|
||||
// New local storage types to support local storage capacity isolation
|
||||
// Support local ephemeral storage types for local storage capacity isolation feature.
|
||||
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
|
||||
|
||||
// owner: @RobertKrawitz
|
||||
@ -988,7 +989,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
|
||||
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
|
||||
|
||||
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
|
||||
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27
|
||||
|
||||
LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta},
|
||||
|
||||
|
7
pkg/generated/openapi/zz_generated.openapi.go
generated
7
pkg/generated/openapi/zz_generated.openapi.go
generated
@ -54589,6 +54589,13 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen
|
||||
Ref: ref("k8s.io/component-base/tracing/api/v1.TracingConfiguration"),
|
||||
},
|
||||
},
|
||||
"localStorageCapacityIsolation": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk usage from the volume exceeds the limit. This feature depends on the capability of detecting correct root file system disk usage. For certain systems, such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. Default: true",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -20,7 +20,7 @@ import (
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/google/gofuzz"
|
||||
fuzz "github.com/google/gofuzz"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer"
|
||||
@ -110,6 +110,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
|
||||
}
|
||||
obj.EnableSystemLogHandler = true
|
||||
obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64())
|
||||
obj.LocalStorageCapacityIsolation = true
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -282,5 +282,6 @@ var (
|
||||
"MemoryThrottlingFactor",
|
||||
"Tracing.Endpoint",
|
||||
"Tracing.SamplingRatePerMillion",
|
||||
"LocalStorageCapacityIsolation",
|
||||
)
|
||||
)
|
||||
|
@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
|
||||
kind: KubeletConfiguration
|
||||
kubeAPIBurst: 10
|
||||
kubeAPIQPS: 5
|
||||
localStorageCapacityIsolation: true
|
||||
logging:
|
||||
flushFrequency: 5000000000
|
||||
format: text
|
||||
|
@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
|
||||
kind: KubeletConfiguration
|
||||
kubeAPIBurst: 10
|
||||
kubeAPIQPS: 5
|
||||
localStorageCapacityIsolation: true
|
||||
logging:
|
||||
flushFrequency: 5000000000
|
||||
format: text
|
||||
|
@ -450,6 +450,16 @@ type KubeletConfiguration struct {
|
||||
// +featureGate=KubeletTracing
|
||||
// +optional
|
||||
Tracing *tracingapi.TracingConfiguration
|
||||
|
||||
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
|
||||
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
|
||||
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
|
||||
// usage from the volume exceeds the limit.
|
||||
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
|
||||
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
|
||||
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
|
||||
// +optional
|
||||
LocalStorageCapacityIsolation bool
|
||||
}
|
||||
|
||||
// KubeletAuthorizationMode denotes the authorization mode for the kubelet
|
||||
|
@ -264,4 +264,7 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
|
||||
if obj.RegisterNode == nil {
|
||||
obj.RegisterNode = utilpointer.BoolPtr(true)
|
||||
}
|
||||
if obj.LocalStorageCapacityIsolation == nil {
|
||||
obj.LocalStorageCapacityIsolation = utilpointer.BoolPtr(true)
|
||||
}
|
||||
}
|
||||
|
@ -115,12 +115,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
Format: "text",
|
||||
FlushFrequency: 5 * time.Second,
|
||||
},
|
||||
EnableSystemLogHandler: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.BoolPtr(true),
|
||||
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
|
||||
SeccompDefault: utilpointer.BoolPtr(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
EnableSystemLogHandler: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.BoolPtr(true),
|
||||
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
|
||||
SeccompDefault: utilpointer.BoolPtr(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -245,6 +246,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
SeccompDefault: utilpointer.Bool(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(0),
|
||||
RegisterNode: utilpointer.BoolPtr(false),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
|
||||
},
|
||||
&v1beta1.KubeletConfiguration{
|
||||
EnableServer: utilpointer.BoolPtr(false),
|
||||
@ -333,13 +335,14 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
Format: "text",
|
||||
FlushFrequency: 5 * time.Second,
|
||||
},
|
||||
EnableSystemLogHandler: utilpointer.Bool(false),
|
||||
ReservedMemory: []v1beta1.MemoryReservation{},
|
||||
EnableProfilingHandler: utilpointer.Bool(false),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(false),
|
||||
SeccompDefault: utilpointer.Bool(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(0),
|
||||
RegisterNode: utilpointer.BoolPtr(false),
|
||||
EnableSystemLogHandler: utilpointer.Bool(false),
|
||||
ReservedMemory: []v1beta1.MemoryReservation{},
|
||||
EnableProfilingHandler: utilpointer.Bool(false),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(false),
|
||||
SeccompDefault: utilpointer.Bool(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(0),
|
||||
RegisterNode: utilpointer.BoolPtr(false),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(false),
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -481,11 +484,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
|
||||
},
|
||||
},
|
||||
EnableProfilingHandler: utilpointer.Bool(true),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(true),
|
||||
SeccompDefault: utilpointer.Bool(true),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(1),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.Bool(true),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(true),
|
||||
SeccompDefault: utilpointer.Bool(true),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(1),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
|
||||
},
|
||||
&v1beta1.KubeletConfiguration{
|
||||
EnableServer: utilpointer.BoolPtr(true),
|
||||
@ -624,11 +628,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")},
|
||||
},
|
||||
},
|
||||
EnableProfilingHandler: utilpointer.Bool(true),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(true),
|
||||
SeccompDefault: utilpointer.Bool(true),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(1),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.Bool(true),
|
||||
EnableDebugFlagsHandler: utilpointer.Bool(true),
|
||||
SeccompDefault: utilpointer.Bool(true),
|
||||
MemoryThrottlingFactor: utilpointer.Float64(1),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -709,12 +714,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) {
|
||||
Format: "text",
|
||||
FlushFrequency: 5 * time.Second,
|
||||
},
|
||||
EnableSystemLogHandler: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.BoolPtr(true),
|
||||
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
|
||||
SeccompDefault: utilpointer.BoolPtr(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
EnableSystemLogHandler: utilpointer.BoolPtr(true),
|
||||
EnableProfilingHandler: utilpointer.BoolPtr(true),
|
||||
EnableDebugFlagsHandler: utilpointer.BoolPtr(true),
|
||||
SeccompDefault: utilpointer.BoolPtr(false),
|
||||
MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor),
|
||||
RegisterNode: utilpointer.BoolPtr(true),
|
||||
LocalStorageCapacityIsolation: utilpointer.BoolPtr(true),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -508,6 +508,9 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in
|
||||
return err
|
||||
}
|
||||
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
|
||||
if err := v1.Convert_Pointer_bool_To_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -683,6 +686,9 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in
|
||||
return err
|
||||
}
|
||||
out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing))
|
||||
if err := v1.Convert_bool_To_Pointer_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ func init() {
|
||||
}
|
||||
|
||||
// New creates a new cAdvisor Interface for linux systems.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) {
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
sysFs := sysfs.NewRealSysFs()
|
||||
|
||||
includedMetrics := cadvisormetrics.MetricSet{
|
||||
@ -99,7 +99,7 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
|
||||
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
|
||||
}
|
||||
|
||||
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||
if usingLegacyStats || localStorageCapacityIsolation {
|
||||
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ type cadvisorUnsupported struct {
|
||||
var _ Interface = new(cadvisorUnsupported)
|
||||
|
||||
// New creates a new cAdvisor Interface for unsupported systems.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats bool) (Interface, error) {
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
return &cadvisorUnsupported{}, nil
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ type cadvisorClient struct {
|
||||
var _ Interface = new(cadvisorClient)
|
||||
|
||||
// New creates a cAdvisor and exports its API on the specified port if port > 0.
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) {
|
||||
func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) {
|
||||
client, err := winstats.NewPerfCounterClient()
|
||||
return &cadvisorClient{
|
||||
rootPath: rootPath,
|
||||
|
@ -47,7 +47,7 @@ type ContainerManager interface {
|
||||
// Runs the container manager's housekeeping.
|
||||
// - Ensures that the Docker daemon is in a container.
|
||||
// - Creates the system container where all non-containerized processes run.
|
||||
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error
|
||||
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
|
||||
|
||||
// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
|
||||
// These cgroups include the system and Kubernetes services.
|
||||
@ -73,7 +73,7 @@ type ContainerManager interface {
|
||||
GetNodeAllocatableReservation() v1.ResourceList
|
||||
|
||||
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
|
||||
GetCapacity() v1.ResourceList
|
||||
GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
|
||||
|
||||
// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
|
||||
// node allocatable (amount of total healthy resources reported by device plugin),
|
||||
|
@ -554,7 +554,8 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
|
||||
activePods ActivePodsFunc,
|
||||
sourcesReady config.SourcesReady,
|
||||
podStatusProvider status.PodStatusProvider,
|
||||
runtimeService internalapi.RuntimeService) error {
|
||||
runtimeService internalapi.RuntimeService,
|
||||
localStorageCapacityIsolation bool) error {
|
||||
|
||||
// Initialize CPU manager
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) {
|
||||
@ -578,7 +579,7 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
|
||||
// allocatable of the node
|
||||
cm.nodeInfo = node
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||
if localStorageCapacityIsolation {
|
||||
rootfs, err := cm.cadvisorInterface.RootFsInfo()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get rootfs info: %v", err)
|
||||
@ -915,8 +916,8 @@ func isKernelPid(pid int) bool {
|
||||
|
||||
// GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*"
|
||||
// At present this method is only invoked when introspecting ephemeral storage
|
||||
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
if localStorageCapacityIsolation {
|
||||
// We store allocatable ephemeral-storage in the capacity property once we Start() the container manager
|
||||
if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok {
|
||||
// If we haven't yet stored the capacity for ephemeral-storage, we can try to fetch it directly from cAdvisor,
|
||||
|
@ -28,9 +28,6 @@ import (
|
||||
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -193,11 +190,11 @@ func TestGetCapacity(t *testing.T) {
|
||||
mockCadvisorError := cadvisortest.NewMockInterface(mockCtrlError)
|
||||
mockCadvisorError.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{}, errors.New("Unable to get rootfs data from cAdvisor interface"))
|
||||
cases := []struct {
|
||||
name string
|
||||
cm *containerManagerImpl
|
||||
expectedResourceQuantity *resource.Quantity
|
||||
expectedNoEphemeralStorage bool
|
||||
enableLocalStorageCapacityIsolation bool
|
||||
name string
|
||||
cm *containerManagerImpl
|
||||
expectedResourceQuantity *resource.Quantity
|
||||
expectedNoEphemeralStorage bool
|
||||
disablelocalStorageCapacityIsolation bool
|
||||
}{
|
||||
{
|
||||
name: "capacity property has ephemeral-storage",
|
||||
@ -207,9 +204,8 @@ func TestGetCapacity(t *testing.T) {
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
|
||||
},
|
||||
},
|
||||
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
|
||||
expectedNoEphemeralStorage: false,
|
||||
enableLocalStorageCapacityIsolation: true,
|
||||
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
|
||||
expectedNoEphemeralStorage: false,
|
||||
},
|
||||
{
|
||||
name: "capacity property does not have ephemeral-storage",
|
||||
@ -217,9 +213,8 @@ func TestGetCapacity(t *testing.T) {
|
||||
cadvisorInterface: mockCadvisor,
|
||||
capacity: v1.ResourceList{},
|
||||
},
|
||||
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI),
|
||||
expectedNoEphemeralStorage: false,
|
||||
enableLocalStorageCapacityIsolation: true,
|
||||
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI),
|
||||
expectedNoEphemeralStorage: false,
|
||||
},
|
||||
{
|
||||
name: "capacity property does not have ephemeral-storage, error from rootfs",
|
||||
@ -227,8 +222,7 @@ func TestGetCapacity(t *testing.T) {
|
||||
cadvisorInterface: mockCadvisorError,
|
||||
capacity: v1.ResourceList{},
|
||||
},
|
||||
expectedNoEphemeralStorage: true,
|
||||
enableLocalStorageCapacityIsolation: true,
|
||||
expectedNoEphemeralStorage: true,
|
||||
},
|
||||
{
|
||||
name: "capacity property does not have ephemeral-storage, cadvisor interface is nil",
|
||||
@ -236,26 +230,24 @@ func TestGetCapacity(t *testing.T) {
|
||||
cadvisorInterface: nil,
|
||||
capacity: v1.ResourceList{},
|
||||
},
|
||||
expectedNoEphemeralStorage: true,
|
||||
enableLocalStorageCapacityIsolation: true,
|
||||
expectedNoEphemeralStorage: true,
|
||||
},
|
||||
{
|
||||
name: "LocalStorageCapacityIsolation feature flag is disabled",
|
||||
name: "capacity property has ephemeral-storage, but localStorageCapacityIsolation is disabled",
|
||||
cm: &containerManagerImpl{
|
||||
cadvisorInterface: mockCadvisor,
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: resource.MustParse("4"),
|
||||
v1.ResourceMemory: resource.MustParse("16G"),
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
|
||||
},
|
||||
},
|
||||
expectedNoEphemeralStorage: true,
|
||||
enableLocalStorageCapacityIsolation: false,
|
||||
expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI),
|
||||
expectedNoEphemeralStorage: true,
|
||||
disablelocalStorageCapacityIsolation: true,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.LocalStorageCapacityIsolation, c.enableLocalStorageCapacityIsolation)()
|
||||
ret := c.cm.GetCapacity()
|
||||
ret := c.cm.GetCapacity(!c.disablelocalStorageCapacityIsolation)
|
||||
if v, exists := ret[v1.ResourceEphemeralStorage]; !exists {
|
||||
if !c.expectedNoEphemeralStorage {
|
||||
t.Errorf("did not get any ephemeral storage data")
|
||||
|
@ -41,7 +41,7 @@ type containerManagerStub struct {
|
||||
|
||||
var _ ContainerManager = &containerManagerStub{}
|
||||
|
||||
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
|
||||
func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
klog.V(2).InfoS("Starting stub container manager")
|
||||
return nil
|
||||
}
|
||||
@ -74,7 +74,10 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetCapacity() v1.ResourceList {
|
||||
func (cm *containerManagerStub) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
if !localStorageCapacityIsolation {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(0),
|
||||
|
@ -38,7 +38,7 @@ type unsupportedContainerManager struct {
|
||||
|
||||
var _ ContainerManager = &unsupportedContainerManager{}
|
||||
|
||||
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
|
||||
func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
return fmt.Errorf("Container Manager is unsupported in this build")
|
||||
}
|
||||
|
||||
|
@ -30,11 +30,9 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
@ -72,10 +70,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
|
||||
activePods ActivePodsFunc,
|
||||
sourcesReady config.SourcesReady,
|
||||
podStatusProvider status.PodStatusProvider,
|
||||
runtimeService internalapi.RuntimeService) error {
|
||||
runtimeService internalapi.RuntimeService,
|
||||
localStorageCapacityIsolation bool) error {
|
||||
klog.V(2).InfoS("Starting Windows container manager")
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||
if localStorageCapacityIsolation {
|
||||
rootfs, err := cm.cadvisorInterface.RootFsInfo()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get rootfs info: %v", err)
|
||||
@ -171,7 +170,7 @@ func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList
|
||||
return result
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
|
||||
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
return cm.capacity
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,7 @@ func NewFakeContainerManager() *FakeContainerManager {
|
||||
}
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error {
|
||||
func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "Start")
|
||||
@ -106,10 +106,13 @@ func (cm *FakeContainerManager) GetNodeAllocatableReservation() v1.ResourceList
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *FakeContainerManager) GetCapacity() v1.ResourceList {
|
||||
func (cm *FakeContainerManager) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity")
|
||||
if !localStorageCapacityIsolation {
|
||||
return v1.ResourceList{}
|
||||
}
|
||||
c := v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(
|
||||
int64(0),
|
||||
|
@ -26,13 +26,11 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
@ -97,6 +95,8 @@ type managerImpl struct {
|
||||
thresholdNotifiers []ThresholdNotifier
|
||||
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
|
||||
thresholdsLastUpdated time.Time
|
||||
// whether can support local storage capacity isolation
|
||||
localStorageCapacityIsolation bool
|
||||
}
|
||||
|
||||
// ensure it implements the required interface
|
||||
@ -113,21 +113,23 @@ func NewManager(
|
||||
recorder record.EventRecorder,
|
||||
nodeRef *v1.ObjectReference,
|
||||
clock clock.WithTicker,
|
||||
localStorageCapacityIsolation bool,
|
||||
) (Manager, lifecycle.PodAdmitHandler) {
|
||||
manager := &managerImpl{
|
||||
clock: clock,
|
||||
killPodFunc: killPodFunc,
|
||||
mirrorPodFunc: mirrorPodFunc,
|
||||
imageGC: imageGC,
|
||||
containerGC: containerGC,
|
||||
config: config,
|
||||
recorder: recorder,
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
dedicatedImageFs: nil,
|
||||
thresholdNotifiers: []ThresholdNotifier{},
|
||||
clock: clock,
|
||||
killPodFunc: killPodFunc,
|
||||
mirrorPodFunc: mirrorPodFunc,
|
||||
imageGC: imageGC,
|
||||
containerGC: containerGC,
|
||||
config: config,
|
||||
recorder: recorder,
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
dedicatedImageFs: nil,
|
||||
thresholdNotifiers: []ThresholdNotifier{},
|
||||
localStorageCapacityIsolation: localStorageCapacityIsolation,
|
||||
}
|
||||
return manager, manager
|
||||
}
|
||||
@ -230,7 +232,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
|
||||
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
|
||||
// if we have nothing to do, just return
|
||||
thresholds := m.config.Thresholds
|
||||
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -318,7 +320,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
|
||||
// evict pods if there is a resource usage violation from local volume temporary storage
|
||||
// If eviction happens in localStorageEviction function, skip the rest of eviction action
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
if m.localStorageCapacityIsolation {
|
||||
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
|
||||
return evictedPods
|
||||
}
|
||||
|
@ -18,12 +18,13 @@ package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -31,6 +32,7 @@ import (
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
@ -699,7 +701,6 @@ func TestOrderedByExceedsRequestMemory(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestOrderedByExceedsRequestDisk(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
|
||||
below := newPod("below-requests", -1, []v1.Container{
|
||||
newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")),
|
||||
}, nil)
|
||||
@ -748,7 +749,6 @@ func TestOrderedByPriority(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestOrderedbyDisk(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
|
||||
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
|
||||
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
|
||||
}, []v1.Volume{
|
||||
@ -813,73 +813,6 @@ func TestOrderedbyDisk(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that we correctly ignore disk requests when the local storage feature gate is disabled.
|
||||
func TestOrderedbyDiskDisableLocalStorage(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, false)()
|
||||
pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{
|
||||
newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{
|
||||
newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod3 := newPod("burstable-high", defaultPriority, []v1.Container{
|
||||
newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod4 := newPod("burstable-low", defaultPriority, []v1.Container{
|
||||
newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{
|
||||
newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{
|
||||
newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")),
|
||||
}, []v1.Volume{
|
||||
newVolume("local-volume", v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
stats := map[*v1.Pod]statsapi.PodStats{
|
||||
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi
|
||||
pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi
|
||||
pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi
|
||||
pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi
|
||||
pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi
|
||||
pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi
|
||||
}
|
||||
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
|
||||
result, found := stats[pod]
|
||||
return result, found
|
||||
}
|
||||
pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
|
||||
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
|
||||
expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2}
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOrderedbyInodes(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
|
||||
low := newPod("low", defaultPriority, []v1.Container{
|
||||
|
@ -773,7 +773,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
||||
klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
|
||||
|
||||
// setup eviction manager
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig,
|
||||
killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock, kubeCfg.LocalStorageCapacityIsolation)
|
||||
|
||||
klet.evictionManager = evictionManager
|
||||
klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
@ -1384,7 +1385,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
|
||||
os.Exit(1)
|
||||
}
|
||||
// containerManager must start after cAdvisor because it needs filesystem capacity information
|
||||
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
|
||||
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil {
|
||||
// Fail kubelet and rely on the babysitter to retry starting kubelet.
|
||||
klog.ErrorS(err, "Failed to start ContainerManager")
|
||||
os.Exit(1)
|
||||
@ -2489,6 +2490,10 @@ func (kl *Kubelet) CheckpointContainer(
|
||||
return nil
|
||||
}
|
||||
|
||||
func (kl *Kubelet) supportLocalStorageCapacityIsolation() bool {
|
||||
return kl.GetConfiguration().LocalStorageCapacityIsolation
|
||||
}
|
||||
|
||||
// isSyncPodWorthy filters out events that are not worthy of pod syncing
|
||||
func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
|
||||
// ContainerRemoved doesn't affect pod state
|
||||
|
@ -624,7 +624,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
|
||||
setters = append(setters,
|
||||
nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc),
|
||||
nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity,
|
||||
kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent),
|
||||
kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent, kl.supportLocalStorageCapacityIsolation()),
|
||||
nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version),
|
||||
nodestatus.DaemonEndpoints(kl.daemonEndpoints),
|
||||
nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList),
|
||||
@ -637,7 +637,8 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
|
||||
nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent),
|
||||
nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent),
|
||||
nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent),
|
||||
nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent),
|
||||
nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors,
|
||||
validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent, kl.supportLocalStorageCapacityIsolation()),
|
||||
nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse),
|
||||
// TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event
|
||||
// and record state back to the Kubelet runtime object. In the future, I'd like to isolate
|
||||
|
@ -153,7 +153,10 @@ func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
return lcm.allocatableReservation
|
||||
}
|
||||
|
||||
func (lcm *localCM) GetCapacity() v1.ResourceList {
|
||||
func (lcm *localCM) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
if !localStorageCapacityIsolation {
|
||||
delete(lcm.capacity, v1.ResourceEphemeralStorage)
|
||||
}
|
||||
return lcm.capacity
|
||||
}
|
||||
|
||||
@ -182,7 +185,7 @@ func TestUpdateNewNodeStatus(t *testing.T) {
|
||||
}
|
||||
inputImageList, expectedImageList := generateTestingImageLists(numTestImages, int(tc.nodeStatusMaxImages))
|
||||
testKubelet := newTestKubeletWithImageList(
|
||||
t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/)
|
||||
t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/, true /* localStorageCapacityIsolation */)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
kubelet.nodeStatusMaxImages = tc.nodeStatusMaxImages
|
||||
@ -1347,7 +1350,7 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
|
||||
// generate one more in inputImageList than we configure the Kubelet to report
|
||||
inputImageList, _ := generateTestingImageLists(nodeStatusMaxImages+1, nodeStatusMaxImages)
|
||||
testKubelet := newTestKubeletWithImageList(
|
||||
t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */)
|
||||
t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */, true)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
kubelet.nodeStatusMaxImages = nodeStatusMaxImages
|
||||
|
@ -23,7 +23,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
_ "k8s.io/kubernetes/pkg/apis/core/install"
|
||||
|
@ -145,14 +145,16 @@ func newTestKubelet(t *testing.T, controllerAttachDetachEnabled bool) *TestKubel
|
||||
Size: 456,
|
||||
},
|
||||
}
|
||||
return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/)
|
||||
return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/, true /*localStorageCapacityIsolation*/)
|
||||
}
|
||||
|
||||
func newTestKubeletWithImageList(
|
||||
t *testing.T,
|
||||
imageList []kubecontainer.Image,
|
||||
controllerAttachDetachEnabled bool,
|
||||
initFakeVolumePlugin bool) *TestKubelet {
|
||||
initFakeVolumePlugin bool,
|
||||
localStorageCapacityIsolation bool,
|
||||
) *TestKubelet {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
|
||||
fakeRuntime := &containertest.FakeRuntime{
|
||||
@ -320,7 +322,8 @@ func newTestKubeletWithImageList(
|
||||
Namespace: "",
|
||||
}
|
||||
// setup eviction manager
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock)
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{},
|
||||
killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock, kubelet.supportLocalStorageCapacityIsolation())
|
||||
|
||||
kubelet.evictionManager = evictionManager
|
||||
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
@ -386,6 +389,7 @@ func newTestKubeletWithImageList(
|
||||
kubelet.AddPodSyncLoopHandler(activeDeadlineHandler)
|
||||
kubelet.AddPodSyncHandler(activeDeadlineHandler)
|
||||
kubelet.lastContainerStartedTime = newTimeCache()
|
||||
kubelet.kubeletConfiguration.LocalStorageCapacityIsolation = localStorageCapacityIsolation
|
||||
return &TestKubelet{kubelet, fakeRuntime, fakeContainerManager, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug}
|
||||
}
|
||||
|
||||
|
@ -31,13 +31,11 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/errors"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
cloudproviderapi "k8s.io/cloud-provider/api"
|
||||
cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers"
|
||||
"k8s.io/component-base/version"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
@ -244,10 +242,11 @@ func MachineInfo(nodeName string,
|
||||
maxPods int,
|
||||
podsPerCore int,
|
||||
machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo
|
||||
capacityFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
|
||||
capacityFunc func(localStorageCapacityIsolation bool) v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
|
||||
devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity
|
||||
nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation
|
||||
recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent
|
||||
localStorageCapacityIsolation bool,
|
||||
) Setter {
|
||||
return func(node *v1.Node) error {
|
||||
// Note: avoid blindly overwriting the capacity in case opaque
|
||||
@ -295,16 +294,15 @@ func MachineInfo(nodeName string,
|
||||
}
|
||||
node.Status.NodeInfo.BootID = info.BootID
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
|
||||
// capacity for every node status request
|
||||
initialCapacity := capacityFunc()
|
||||
if initialCapacity != nil {
|
||||
if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
|
||||
node.Status.Capacity[v1.ResourceEphemeralStorage] = v
|
||||
}
|
||||
// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
|
||||
// capacity for every node status request
|
||||
initialCapacity := capacityFunc(localStorageCapacityIsolation)
|
||||
if initialCapacity != nil {
|
||||
if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
|
||||
node.Status.Capacity[v1.ResourceEphemeralStorage] = v
|
||||
}
|
||||
}
|
||||
//}
|
||||
|
||||
devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc()
|
||||
for k, v := range devicePluginCapacity {
|
||||
@ -469,6 +467,7 @@ func ReadyCondition(
|
||||
cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status
|
||||
nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors.
|
||||
recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
|
||||
localStorageCapacityIsolation bool,
|
||||
) Setter {
|
||||
return func(node *v1.Node) error {
|
||||
// NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
|
||||
@ -484,7 +483,7 @@ func ReadyCondition(
|
||||
}
|
||||
errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()}
|
||||
requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
if localStorageCapacityIsolation {
|
||||
requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
|
||||
}
|
||||
missingCapacities := []string{}
|
||||
|
@ -585,17 +585,18 @@ func TestMachineInfo(t *testing.T) {
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
node *v1.Node
|
||||
maxPods int
|
||||
podsPerCore int
|
||||
machineInfo *cadvisorapiv1.MachineInfo
|
||||
machineInfoError error
|
||||
capacity v1.ResourceList
|
||||
devicePluginResourceCapacity dprc
|
||||
nodeAllocatableReservation v1.ResourceList
|
||||
expectNode *v1.Node
|
||||
expectEvents []testEvent
|
||||
desc string
|
||||
node *v1.Node
|
||||
maxPods int
|
||||
podsPerCore int
|
||||
machineInfo *cadvisorapiv1.MachineInfo
|
||||
machineInfoError error
|
||||
capacity v1.ResourceList
|
||||
devicePluginResourceCapacity dprc
|
||||
nodeAllocatableReservation v1.ResourceList
|
||||
expectNode *v1.Node
|
||||
expectEvents []testEvent
|
||||
disableLocalStorageCapacityIsolation bool
|
||||
}{
|
||||
{
|
||||
desc: "machine identifiers, basic capacity and allocatable",
|
||||
@ -797,6 +798,35 @@ func TestMachineInfo(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "ephemeral storage is not reflected in capacity and allocatable because localStorageCapacityIsolation is disabled",
|
||||
node: &v1.Node{},
|
||||
maxPods: 110,
|
||||
machineInfo: &cadvisorapiv1.MachineInfo{
|
||||
NumCores: 2,
|
||||
MemoryCapacity: 1024,
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
|
||||
},
|
||||
expectNode: &v1.Node{
|
||||
Status: v1.NodeStatus{
|
||||
Capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
|
||||
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
|
||||
},
|
||||
Allocatable: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI),
|
||||
v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
|
||||
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
disableLocalStorageCapacityIsolation: true,
|
||||
},
|
||||
{
|
||||
desc: "device plugin resources are reflected in capacity and allocatable",
|
||||
node: &v1.Node{},
|
||||
@ -962,7 +992,7 @@ func TestMachineInfo(t *testing.T) {
|
||||
machineInfoFunc := func() (*cadvisorapiv1.MachineInfo, error) {
|
||||
return tc.machineInfo, tc.machineInfoError
|
||||
}
|
||||
capacityFunc := func() v1.ResourceList {
|
||||
capacityFunc := func(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
return tc.capacity
|
||||
}
|
||||
devicePluginResourceCapacityFunc := func() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
@ -983,7 +1013,7 @@ func TestMachineInfo(t *testing.T) {
|
||||
}
|
||||
// construct setter
|
||||
setter := MachineInfo(nodeName, tc.maxPods, tc.podsPerCore, machineInfoFunc, capacityFunc,
|
||||
devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc)
|
||||
devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc, tc.disableLocalStorageCapacityIsolation)
|
||||
// call setter on node
|
||||
if err := setter(tc.node); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
@ -1180,17 +1210,28 @@ func TestReadyCondition(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
withoutStorageCapacity := &v1.Node{
|
||||
Status: v1.NodeStatus{
|
||||
Capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI),
|
||||
v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
node *v1.Node
|
||||
runtimeErrors error
|
||||
networkErrors error
|
||||
storageErrors error
|
||||
appArmorValidateHostFunc func() error
|
||||
cmStatus cm.Status
|
||||
nodeShutdownManagerErrors error
|
||||
expectConditions []v1.NodeCondition
|
||||
expectEvents []testEvent
|
||||
desc string
|
||||
node *v1.Node
|
||||
runtimeErrors error
|
||||
networkErrors error
|
||||
storageErrors error
|
||||
appArmorValidateHostFunc func() error
|
||||
cmStatus cm.Status
|
||||
nodeShutdownManagerErrors error
|
||||
expectConditions []v1.NodeCondition
|
||||
expectEvents []testEvent
|
||||
disableLocalStorageCapacityIsolation bool
|
||||
}{
|
||||
{
|
||||
desc: "new, ready",
|
||||
@ -1245,6 +1286,12 @@ func TestReadyCondition(t *testing.T) {
|
||||
node: &v1.Node{},
|
||||
expectConditions: []v1.NodeCondition{*makeReadyCondition(false, "missing node capacity for resources: cpu, memory, pods, ephemeral-storage", now, now)},
|
||||
},
|
||||
{
|
||||
desc: "new, ready: localStorageCapacityIsolation is not supported",
|
||||
node: withoutStorageCapacity.DeepCopy(),
|
||||
disableLocalStorageCapacityIsolation: true,
|
||||
expectConditions: []v1.NodeCondition{*makeReadyCondition(true, "kubelet is posting ready status", now, now)},
|
||||
},
|
||||
// the transition tests ensure timestamps are set correctly, no need to test the entire condition matrix in this section
|
||||
{
|
||||
desc: "transition to ready",
|
||||
@ -1324,7 +1371,7 @@ func TestReadyCondition(t *testing.T) {
|
||||
})
|
||||
}
|
||||
// construct setter
|
||||
setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc)
|
||||
setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc, !tc.disableLocalStorageCapacityIsolation)
|
||||
// call setter on node
|
||||
if err := setter(tc.node); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
|
@ -130,7 +130,7 @@ func TestRunOnce(t *testing.T) {
|
||||
return nil
|
||||
}
|
||||
fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false }
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock)
|
||||
evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock, kb.supportLocalStorageCapacityIsolation())
|
||||
|
||||
kb.evictionManager = evictionManager
|
||||
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
|
@ -206,6 +206,7 @@ func GetHollowKubeletConfig(opt *HollowKubletOptions) (*options.KubeletFlags, *k
|
||||
c.ProtectKernelDefaults = false
|
||||
c.RegisterWithTaints = opt.RegisterWithTaints
|
||||
c.RegisterNode = true
|
||||
c.LocalStorageCapacityIsolation = true
|
||||
|
||||
return f, c
|
||||
}
|
||||
|
@ -25,9 +25,6 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/featuregate"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
@ -575,7 +572,6 @@ func TestStorageRequests(t *testing.T) {
|
||||
pod *v1.Pod
|
||||
nodeInfo *framework.NodeInfo
|
||||
name string
|
||||
features map[featuregate.Feature]bool
|
||||
wantStatus *framework.Status
|
||||
}{
|
||||
{
|
||||
@ -599,13 +595,10 @@ func TestStorageRequests(t *testing.T) {
|
||||
wantStatus: framework.NewStatus(framework.Unschedulable, getErrReason(v1.ResourceEphemeralStorage)),
|
||||
},
|
||||
{
|
||||
pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 25}), framework.Resource{EphemeralStorage: 25}),
|
||||
pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 5})),
|
||||
nodeInfo: framework.NewNodeInfo(
|
||||
newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2})),
|
||||
name: "ephemeral local storage request is ignored due to disabled feature gate",
|
||||
features: map[featuregate.Feature]bool{
|
||||
"LocalStorageCapacityIsolation": false,
|
||||
},
|
||||
newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2, EphemeralStorage: 10})),
|
||||
name: "ephemeral local storage is sufficient",
|
||||
},
|
||||
{
|
||||
pod: newResourcePod(framework.Resource{EphemeralStorage: 10}),
|
||||
@ -617,9 +610,6 @@ func TestStorageRequests(t *testing.T) {
|
||||
|
||||
for _, test := range storagePodsTests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
for k, v := range test.features {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, k, v)()
|
||||
}
|
||||
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
|
||||
test.nodeInfo.SetNode(&node)
|
||||
|
||||
|
@ -29,9 +29,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
@ -444,10 +442,7 @@ func (r *Resource) Add(rl v1.ResourceList) {
|
||||
case v1.ResourcePods:
|
||||
r.AllowedPodNumber += int(rQuant.Value())
|
||||
case v1.ResourceEphemeralStorage:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
|
||||
r.EphemeralStorage += rQuant.Value()
|
||||
}
|
||||
r.EphemeralStorage += rQuant.Value()
|
||||
default:
|
||||
if schedutil.IsScalarResourceName(rName) {
|
||||
r.AddScalar(rName, rQuant.Value())
|
||||
@ -500,9 +495,7 @@ func (r *Resource) SetMaxResource(rl v1.ResourceList) {
|
||||
case v1.ResourceCPU:
|
||||
r.MilliCPU = max(r.MilliCPU, rQuantity.MilliValue())
|
||||
case v1.ResourceEphemeralStorage:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
|
||||
}
|
||||
r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value())
|
||||
default:
|
||||
if schedutil.IsScalarResourceName(rName) {
|
||||
r.SetScalar(rName, max(r.ScalarResources[rName], rQuantity.Value()))
|
||||
|
@ -18,8 +18,6 @@ package util
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
// For each of these resources, a pod that doesn't request the resource explicitly
|
||||
@ -65,11 +63,6 @@ func GetRequestForResource(resource v1.ResourceName, requests *v1.ResourceList,
|
||||
}
|
||||
return requests.Memory().Value()
|
||||
case v1.ResourceEphemeralStorage:
|
||||
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
return 0
|
||||
}
|
||||
|
||||
quantity, found := (*requests)[v1.ResourceEphemeralStorage]
|
||||
if !found {
|
||||
return 0
|
||||
|
@ -786,6 +786,17 @@ type KubeletConfiguration struct {
|
||||
// +featureGate=KubeletTracing
|
||||
// +optional
|
||||
Tracing *tracingapi.TracingConfiguration `json:"tracing,omitempty"`
|
||||
|
||||
// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
|
||||
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
|
||||
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
|
||||
// usage from the volume exceeds the limit.
|
||||
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
|
||||
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
|
||||
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
|
||||
// Default: true
|
||||
// +optional
|
||||
LocalStorageCapacityIsolation *bool `json:"localStorageCapacityIsolation,omitempty"`
|
||||
}
|
||||
|
||||
type KubeletAuthorizationMode string
|
||||
|
@ -452,6 +452,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
|
||||
*out = new(apiv1.TracingConfiguration)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.LocalStorageCapacityIsolation != nil {
|
||||
in, out := &in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user