diff --git a/cmd/kubelet/app/options/options_test.go b/cmd/kubelet/app/options/options_test.go index bd7a4af4953..3ec297cd172 100644 --- a/cmd/kubelet/app/options/options_test.go +++ b/cmd/kubelet/app/options/options_test.go @@ -100,8 +100,8 @@ func TestRoundTrip(t *testing.T) { } continue } - if !reflect.DeepEqual(modifiedFlags, outputFlags) { - t.Errorf("%s: flags did not round trip: %s", testCase.name, cmp.Diff(modifiedFlags, outputFlags)) + if !reflect.DeepEqual(modifiedFlags.KubeletFlags, outputFlags.KubeletFlags) { + t.Errorf("%s: flags did not round trip: %s", testCase.name, cmp.Diff(modifiedFlags.KubeletFlags, outputFlags.KubeletFlags)) continue } } diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index cfb8d3f37bc..cfcf6e7d5cd 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -367,6 +367,7 @@ func mergeKubeletConfigurations(kubeletConfig *kubeletconfiginternal.KubeletConf } // apply defaulting after decoding kubeletconfigv1beta1conversion.SetDefaults_KubeletConfiguration(versionedConfig) + // convert back to internal config if err := kubeletconfigv1beta1conversion.Convert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(versionedConfig, kubeletConfig, nil); err != nil { return fmt.Errorf("failed to convert merged config to internal kubelet configuration: %w", err) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 5411f576031..f9838d90796 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -232,6 +232,15 @@ const ( // status from DRA drivers. DRAResourceClaimDeviceStatus featuregate.Feature = "DRAResourceClaimDeviceStatus" + // owner: @lauralorenz + // kep: https://kep.k8s.io/4603 + // owner: @lauralorenz + // kep: https://kep.k8s.io/4603 + // + // Enables support for configurable per-node backoff maximums for restarting + // containers (aka containers in CrashLoopBackOff) + KubeletCrashLoopBackOffMax featuregate.Feature = "KubeletCrashLoopBackOffMax" + // owner: @harche // kep: http://kep.k8s.io/3386 // diff --git a/pkg/features/versioned_kube_features.go b/pkg/features/versioned_kube_features.go index 360304cf95d..9b8bbc6958f 100644 --- a/pkg/features/versioned_kube_features.go +++ b/pkg/features/versioned_kube_features.go @@ -187,6 +187,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha}, }, + KubeletCrashLoopBackOffMax: { + {Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha}, + }, + ElasticIndexedJob: { {Version: version.MustParse("1.27"), Default: true, PreRelease: featuregate.Beta}, {Version: version.MustParse("1.31"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.31, remove in 1.32 diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 455da7915f8..0d2b2d3d0d7 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -1239,6 +1239,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "k8s.io/kubelet/config/v1alpha1.CredentialProvider": schema_k8sio_kubelet_config_v1alpha1_CredentialProvider(ref), "k8s.io/kubelet/config/v1alpha1.CredentialProviderConfig": schema_k8sio_kubelet_config_v1alpha1_CredentialProviderConfig(ref), "k8s.io/kubelet/config/v1alpha1.ExecEnvVar": schema_k8sio_kubelet_config_v1alpha1_ExecEnvVar(ref), + "k8s.io/kubelet/config/v1beta1.CrashLoopBackOffConfig": schema_k8sio_kubelet_config_v1beta1_CrashLoopBackOffConfig(ref), "k8s.io/kubelet/config/v1beta1.CredentialProvider": schema_k8sio_kubelet_config_v1beta1_CredentialProvider(ref), "k8s.io/kubelet/config/v1beta1.CredentialProviderConfig": schema_k8sio_kubelet_config_v1beta1_CredentialProviderConfig(ref), "k8s.io/kubelet/config/v1beta1.ExecEnvVar": schema_k8sio_kubelet_config_v1beta1_ExecEnvVar(ref), @@ -63468,6 +63469,26 @@ func schema_k8sio_kubelet_config_v1alpha1_ExecEnvVar(ref common.ReferenceCallbac } } +func schema_k8sio_kubelet_config_v1beta1_CrashLoopBackOffConfig(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "maxContainerRestartPeriod": { + SchemaProps: spec.SchemaProps{ + Description: "maxContainerRestartPeriod is the maximum duration the backoff delay can accrue to for container restarts, minimum 1 second, maximum 300 seconds. If not set, defaults to the internal crashloopbackoff maximum (300s).", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), + }, + }, + }, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/apis/meta/v1.Duration"}, + } +} + func schema_k8sio_kubelet_config_v1beta1_CredentialProvider(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -64596,6 +64617,13 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen }, }, }, + "crashLoopBackOff": { + SchemaProps: spec.SchemaProps{ + Description: "CrashLoopBackOff contains config to modify node-level parameters for container restart behavior", + Default: map[string]interface{}{}, + Ref: ref("k8s.io/kubelet/config/v1beta1.CrashLoopBackOffConfig"), + }, + }, "reservedMemory": { SchemaProps: spec.SchemaProps{ Description: "reservedMemory specifies a comma-separated list of memory reservations for NUMA nodes. The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads. For example, if you have a NUMA0 with 10Gi of memory and the reservedMemory was specified to reserve 1Gi of memory at NUMA0, the memory manager will assume that only 9Gi is available for allocation. You can specify a different amount of NUMA node and memory types. You can omit this parameter at all, but you should be aware that the amount of reserved memory from all NUMA nodes should be equal to the amount of memory specified by the [node allocatable](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#node-allocatable). If at least one node allocatable parameter has a non-zero value, you will need to specify at least one NUMA node. Also, avoid specifying:\n\n1. Duplicates, the same NUMA node, and memory type, but with a different value. 2. zero limits for any memory type. 3. NUMAs nodes IDs that do not exist under the machine. 4. memory types except for memory and hugepages-\n\nDefault: nil", @@ -64699,7 +64727,7 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen }, }, Dependencies: []string{ - "k8s.io/api/core/v1.Taint", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/component-base/logs/api/v1.LoggingConfiguration", "k8s.io/component-base/tracing/api/v1.TracingConfiguration", "k8s.io/kubelet/config/v1beta1.KubeletAuthentication", "k8s.io/kubelet/config/v1beta1.KubeletAuthorization", "k8s.io/kubelet/config/v1beta1.MemoryReservation", "k8s.io/kubelet/config/v1beta1.MemorySwapConfiguration", "k8s.io/kubelet/config/v1beta1.ShutdownGracePeriodByPodPriority"}, + "k8s.io/api/core/v1.Taint", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/component-base/logs/api/v1.LoggingConfiguration", "k8s.io/component-base/tracing/api/v1.TracingConfiguration", "k8s.io/kubelet/config/v1beta1.CrashLoopBackOffConfig", "k8s.io/kubelet/config/v1beta1.KubeletAuthentication", "k8s.io/kubelet/config/v1beta1.KubeletAuthorization", "k8s.io/kubelet/config/v1beta1.MemoryReservation", "k8s.io/kubelet/config/v1beta1.MemorySwapConfiguration", "k8s.io/kubelet/config/v1beta1.ShutdownGracePeriodByPodPriority"}, } } diff --git a/pkg/kubelet/apis/config/fuzzer/fuzzer.go b/pkg/kubelet/apis/config/fuzzer/fuzzer.go index efa8120df3a..474d6db2ef0 100644 --- a/pkg/kubelet/apis/config/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/config/fuzzer/fuzzer.go @@ -121,6 +121,10 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { obj.EnableSystemLogHandler = true obj.MemoryThrottlingFactor = ptr.To(rand.Float64()) obj.LocalStorageCapacityIsolation = true + obj.FeatureGates = map[string]bool{ + "AllAlpha": false, + "AllBeta": true, + } }, } } diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index c601cde5daa..3be9d8a392e 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -302,5 +302,6 @@ var ( "Tracing.SamplingRatePerMillion", "LocalStorageCapacityIsolation", "FailCgroupV1", + "CrashLoopBackOff.MaxContainerRestartPeriod", ) ) diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml index bd3637e9091..09b9e4ff839 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml @@ -25,6 +25,7 @@ cpuCFSQuota: true cpuCFSQuotaPeriod: 100ms cpuManagerPolicy: none cpuManagerReconcilePeriod: 10s +crashLoopBackOff: {} enableControllerAttachDetach: true enableDebugFlagsHandler: true enableDebuggingHandlers: true diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml index f733b6ef09b..99ff9671f8b 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml @@ -25,6 +25,7 @@ cpuCFSQuota: true cpuCFSQuotaPeriod: 100ms cpuManagerPolicy: none cpuManagerReconcilePeriod: 10s +crashLoopBackOff: {} enableControllerAttachDetach: true enableDebugFlagsHandler: true enableDebuggingHandlers: true diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index afc3606ef65..94ed741d05b 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -506,6 +506,12 @@ type KubeletConfiguration struct { // option is explicitly enabled. // +optional FailCgroupV1 bool + + // CrashLoopBackOff contains config to modify node-level parameters for + // container restart behavior + // +featureGate=KubeletCrashLoopBackoffMax + // +optional + CrashLoopBackOff CrashLoopBackOffConfig } // KubeletAuthorizationMode denotes the authorization mode for the kubelet @@ -684,3 +690,13 @@ type MemorySwapConfiguration struct { // +optional SwapBehavior string } + +// CrashLoopBackOffConfig is used for setting configuration for this kubelet's +// container restart behavior +type CrashLoopBackOffConfig struct { + // MaxContainerRestartPeriod is the maximum duration the backoff delay can accrue + // to for container restarts, minimum 1 second, maximum 300 seconds. + // +featureGate=KubeletCrashLoopBackOffMax + // +optional + MaxContainerRestartPeriod *metav1.Duration +} diff --git a/pkg/kubelet/apis/config/v1beta1/defaults.go b/pkg/kubelet/apis/config/v1beta1/defaults.go index 193bc1cec66..960b1489d30 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults.go @@ -17,6 +17,7 @@ limitations under the License. package v1beta1 import ( + "fmt" "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -24,8 +25,10 @@ import ( kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" // TODO: Cut references to k8s.io/kubernetes, eventually there should be none from this package + utilfeature "k8s.io/apiserver/pkg/util/feature" logsapi "k8s.io/component-base/logs/api/v1" "k8s.io/kubernetes/pkg/cluster/ports" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/qos" kubetypes "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/utils/ptr" @@ -39,6 +42,8 @@ const ( DefaultPodLogsDir = "/var/log/pods" // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2570-memory-qos DefaultMemoryThrottlingFactor = 0.9 + // MaxContainerBackOff is the max backoff period for container restarts, exported for the e2e test + MaxContainerBackOff = 300 * time.Second ) var ( @@ -53,6 +58,19 @@ func addDefaultingFuncs(scheme *kruntime.Scheme) error { } func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfiguration) { + + // TODO(lauralorenz): Reasses conditional feature gating on defaults. Here + // we 1) copy the gates to a local var, unilaterally merge it with the gate + // config while being defaulted. Alternatively we could unilaterally set the + // default value, later check the gate and wipe it if needed, like API + // strategy does for gate-disabled fields. Meanwhile, KubeletConfiguration + // is increasingly dynamic and the configured gates may change depending on + // when this is called. See also validation.go. + localFeatureGate := utilfeature.DefaultMutableFeatureGate.DeepCopy() + if err := localFeatureGate.SetFromMap(obj.FeatureGates); err != nil { + panic(fmt.Sprintf("failed to merge global and in-flight KubeletConfiguration while setting defaults, error: %v", err)) + } + if obj.EnableServer == nil { obj.EnableServer = ptr.To(true) } @@ -286,4 +304,10 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura if obj.PodLogsDir == "" { obj.PodLogsDir = DefaultPodLogsDir } + + if localFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) { + if obj.CrashLoopBackOff.MaxContainerRestartPeriod == nil { + obj.CrashLoopBackOff.MaxContainerRestartPeriod = &metav1.Duration{Duration: MaxContainerBackOff} + } + } } diff --git a/pkg/kubelet/apis/config/v1beta1/defaults_test.go b/pkg/kubelet/apis/config/v1beta1/defaults_test.go index 808980a41b4..c33c5c730ef 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults_test.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults_test.go @@ -130,6 +130,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: DefaultPodLogsDir, SingleProcessOOMKill: nil, + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{}, }, }, { @@ -365,11 +366,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(false), PodLogsDir: DefaultPodLogsDir, SingleProcessOOMKill: ptr.To(false), + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{}, }, }, { "all positive", &v1beta1.KubeletConfiguration{ + FeatureGates: map[string]bool{"KubeletCrashLoopBackOffMax": true}, EnableServer: ptr.To(true), StaticPodPath: "static/pod/path", SyncFrequency: metav1.Duration{Duration: 60 * time.Second}, @@ -519,8 +522,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: "/custom/path", SingleProcessOOMKill: ptr.To(true), + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: 55 * time.Second}, + }, }, &v1beta1.KubeletConfiguration{ + FeatureGates: map[string]bool{"KubeletCrashLoopBackOffMax": true}, EnableServer: ptr.To(true), StaticPodPath: "static/pod/path", SyncFrequency: metav1.Duration{Duration: 60 * time.Second}, @@ -670,6 +677,9 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: "/custom/path", SingleProcessOOMKill: ptr.To(true), + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: 55 * time.Second}, + }, }, }, { @@ -764,6 +774,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: DefaultPodLogsDir, SingleProcessOOMKill: nil, + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{}, }, }, { @@ -858,6 +869,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: DefaultPodLogsDir, SingleProcessOOMKill: nil, + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{}, }, }, { @@ -951,7 +963,105 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { RegisterNode: ptr.To(true), LocalStorageCapacityIsolation: ptr.To(true), PodLogsDir: DefaultPodLogsDir, + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{}, + }, + }, + { + "CrashLoopBackOff.MaxContainerRestartPeriod defaults to internal default when feature gate enabled", + &v1beta1.KubeletConfiguration{ + FeatureGates: map[string]bool{"KubeletCrashLoopBackOffMax": true}, + }, + &v1beta1.KubeletConfiguration{ + FeatureGates: map[string]bool{"KubeletCrashLoopBackOffMax": true}, + EnableServer: ptr.To(true), + SyncFrequency: metav1.Duration{Duration: 1 * time.Minute}, + FileCheckFrequency: metav1.Duration{Duration: 20 * time.Second}, + HTTPCheckFrequency: metav1.Duration{Duration: 20 * time.Second}, + Address: "0.0.0.0", + Port: ports.KubeletPort, + Authentication: v1beta1.KubeletAuthentication{ + Anonymous: v1beta1.KubeletAnonymousAuthentication{Enabled: ptr.To(false)}, + Webhook: v1beta1.KubeletWebhookAuthentication{ + Enabled: ptr.To(true), + CacheTTL: metav1.Duration{Duration: 2 * time.Minute}, + }, + }, + Authorization: v1beta1.KubeletAuthorization{ + Mode: v1beta1.KubeletAuthorizationModeWebhook, + Webhook: v1beta1.KubeletWebhookAuthorization{ + CacheAuthorizedTTL: metav1.Duration{Duration: 5 * time.Minute}, + CacheUnauthorizedTTL: metav1.Duration{Duration: 30 * time.Second}, + }, + }, + RegistryPullQPS: ptr.To[int32](5), + RegistryBurst: 10, + EventRecordQPS: ptr.To[int32](50), + EventBurst: 100, + EnableDebuggingHandlers: ptr.To(true), + HealthzPort: ptr.To[int32](10248), + HealthzBindAddress: "127.0.0.1", + OOMScoreAdj: ptr.To(int32(qos.KubeletOOMScoreAdj)), + StreamingConnectionIdleTimeout: metav1.Duration{Duration: 4 * time.Hour}, + NodeStatusUpdateFrequency: metav1.Duration{Duration: 10 * time.Second}, + NodeStatusReportFrequency: metav1.Duration{Duration: 5 * time.Minute}, + NodeLeaseDurationSeconds: 40, + ContainerRuntimeEndpoint: "unix:///run/containerd/containerd.sock", + ImageMinimumGCAge: metav1.Duration{Duration: 2 * time.Minute}, + ImageGCHighThresholdPercent: ptr.To[int32](85), + ImageGCLowThresholdPercent: ptr.To[int32](80), + VolumeStatsAggPeriod: metav1.Duration{Duration: time.Minute}, + CgroupsPerQOS: ptr.To(true), + CgroupDriver: "cgroupfs", + CPUManagerPolicy: "none", + CPUManagerReconcilePeriod: metav1.Duration{Duration: 10 * time.Second}, + MemoryManagerPolicy: v1beta1.NoneMemoryManagerPolicy, + TopologyManagerPolicy: v1beta1.NoneTopologyManagerPolicy, + TopologyManagerScope: v1beta1.ContainerTopologyManagerScope, + RuntimeRequestTimeout: metav1.Duration{Duration: 2 * time.Minute}, + HairpinMode: v1beta1.PromiscuousBridge, + MaxPods: 110, + PodPidsLimit: ptr.To[int64](-1), + ResolverConfig: ptr.To(kubetypes.ResolvConfDefault), + CPUCFSQuota: ptr.To(true), + CPUCFSQuotaPeriod: &metav1.Duration{Duration: 100 * time.Millisecond}, + NodeStatusMaxImages: ptr.To[int32](50), + MaxOpenFiles: 1000000, + ContentType: "application/vnd.kubernetes.protobuf", + KubeAPIQPS: ptr.To[int32](50), + KubeAPIBurst: 100, + SerializeImagePulls: ptr.To(true), + MaxParallelImagePulls: nil, + EvictionHard: nil, + EvictionPressureTransitionPeriod: metav1.Duration{Duration: 5 * time.Minute}, + EnableControllerAttachDetach: ptr.To(true), + MakeIPTablesUtilChains: ptr.To(true), + IPTablesMasqueradeBit: ptr.To[int32](DefaultIPTablesMasqueradeBit), + IPTablesDropBit: ptr.To[int32](DefaultIPTablesDropBit), + FailSwapOn: ptr.To(true), + ContainerLogMaxSize: "10Mi", + ContainerLogMaxFiles: ptr.To[int32](5), + ContainerLogMaxWorkers: ptr.To[int32](1), + ContainerLogMonitorInterval: &metav1.Duration{Duration: 10 * time.Second}, + ConfigMapAndSecretChangeDetectionStrategy: v1beta1.WatchChangeDetectionStrategy, + EnforceNodeAllocatable: DefaultNodeAllocatableEnforcement, + VolumePluginDir: DefaultVolumePluginDir, + Logging: logsapi.LoggingConfiguration{ + Format: "text", + FlushFrequency: logsapi.TimeOrMetaDuration{Duration: metav1.Duration{Duration: 5 * time.Second}, SerializeAsString: true}, + }, + EnableSystemLogHandler: ptr.To(true), + EnableProfilingHandler: ptr.To(true), + EnableDebugFlagsHandler: ptr.To(true), + SeccompDefault: ptr.To(false), + FailCgroupV1: ptr.To(false), + MemoryThrottlingFactor: ptr.To(DefaultMemoryThrottlingFactor), + RegisterNode: ptr.To(true), + LocalStorageCapacityIsolation: ptr.To(true), + PodLogsDir: DefaultPodLogsDir, SingleProcessOOMKill: nil, + CrashLoopBackOff: v1beta1.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: MaxContainerBackOff}, + }, }, }, } diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index 2b904b093b3..347a3d54dec 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -40,6 +40,16 @@ func init() { // RegisterConversions adds conversion functions to the given scheme. // Public to allow building arbitrary schemes. func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*configv1beta1.CrashLoopBackOffConfig)(nil), (*config.CrashLoopBackOffConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig(a.(*configv1beta1.CrashLoopBackOffConfig), b.(*config.CrashLoopBackOffConfig), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*config.CrashLoopBackOffConfig)(nil), (*configv1beta1.CrashLoopBackOffConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(a.(*config.CrashLoopBackOffConfig), b.(*configv1beta1.CrashLoopBackOffConfig), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*configv1beta1.CredentialProvider)(nil), (*config.CredentialProvider)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_CredentialProvider_To_config_CredentialProvider(a.(*configv1beta1.CredentialProvider), b.(*config.CredentialProvider), scope) }); err != nil { @@ -183,6 +193,26 @@ func RegisterConversions(s *runtime.Scheme) error { return nil } +func autoConvert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig(in *configv1beta1.CrashLoopBackOffConfig, out *config.CrashLoopBackOffConfig, s conversion.Scope) error { + out.MaxContainerRestartPeriod = (*v1.Duration)(unsafe.Pointer(in.MaxContainerRestartPeriod)) + return nil +} + +// Convert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig is an autogenerated conversion function. +func Convert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig(in *configv1beta1.CrashLoopBackOffConfig, out *config.CrashLoopBackOffConfig, s conversion.Scope) error { + return autoConvert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig(in, out, s) +} + +func autoConvert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(in *config.CrashLoopBackOffConfig, out *configv1beta1.CrashLoopBackOffConfig, s conversion.Scope) error { + out.MaxContainerRestartPeriod = (*v1.Duration)(unsafe.Pointer(in.MaxContainerRestartPeriod)) + return nil +} + +// Convert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig is an autogenerated conversion function. +func Convert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(in *config.CrashLoopBackOffConfig, out *configv1beta1.CrashLoopBackOffConfig, s conversion.Scope) error { + return autoConvert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(in, out, s) +} + func autoConvert_v1beta1_CredentialProvider_To_config_CredentialProvider(in *configv1beta1.CredentialProvider, out *config.CredentialProvider, s conversion.Scope) error { out.Name = in.Name out.MatchImages = *(*[]string)(unsafe.Pointer(&in.MatchImages)) @@ -506,6 +536,9 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in out.ShutdownGracePeriod = in.ShutdownGracePeriod out.ShutdownGracePeriodCriticalPods = in.ShutdownGracePeriodCriticalPods out.ShutdownGracePeriodByPodPriority = *(*[]config.ShutdownGracePeriodByPodPriority)(unsafe.Pointer(&in.ShutdownGracePeriodByPodPriority)) + if err := Convert_v1beta1_CrashLoopBackOffConfig_To_config_CrashLoopBackOffConfig(&in.CrashLoopBackOff, &out.CrashLoopBackOff, s); err != nil { + return err + } out.ReservedMemory = *(*[]config.MemoryReservation)(unsafe.Pointer(&in.ReservedMemory)) if err := v1.Convert_Pointer_bool_To_bool(&in.EnableProfilingHandler, &out.EnableProfilingHandler, s); err != nil { return err @@ -727,6 +760,9 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in if err := v1.Convert_bool_To_Pointer_bool(&in.FailCgroupV1, &out.FailCgroupV1, s); err != nil { return err } + if err := Convert_config_CrashLoopBackOffConfig_To_v1beta1_CrashLoopBackOffConfig(&in.CrashLoopBackOff, &out.CrashLoopBackOff, s); err != nil { + return err + } return nil } diff --git a/pkg/kubelet/apis/config/validation/validation.go b/pkg/kubelet/apis/config/validation/validation.go index 8c9fa35ff75..6633b4e6341 100644 --- a/pkg/kubelet/apis/config/validation/validation.go +++ b/pkg/kubelet/apis/config/validation/validation.go @@ -46,6 +46,12 @@ var ( func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featureGate featuregate.FeatureGate) error { allErrors := []error{} + // TODO(lauralorenz): Reasses / confirm interpretation of feature gates + // depending on where we are in merging dynamic KubeletConfiguration when + // this is called. Here we copy the gates to a local var, and unilaterally + // merge it with the current gate config to test. See also defaults.go which + // intersects with this assumption. + // Make a local copy of the feature gates and combine it with the gates set by this configuration. // This allows us to validate the config against the set of gates it will actually run against. localFeatureGate := featureGate.DeepCopy() @@ -206,6 +212,17 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur allErrors = append(allErrors, fmt.Errorf("invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled")) } + if localFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) { + if kc.CrashLoopBackOff.MaxContainerRestartPeriod == nil { + allErrors = append(allErrors, fmt.Errorf("invalid configuration: FeatureGate KubeletCrashLoopBackOffMax is enabled, CrashLoopBackOff.MaxContainerRestartPeriod must be set")) + } + if kc.CrashLoopBackOff.MaxContainerRestartPeriod != nil && utilvalidation.IsInRange(int(kc.CrashLoopBackOff.MaxContainerRestartPeriod.Duration.Milliseconds()), 1000, 300000) != nil { + allErrors = append(allErrors, fmt.Errorf("invalid configuration: CrashLoopBackOff.MaxContainerRestartPeriod (got: %v seconds) must be set between 1s and 300s", kc.CrashLoopBackOff.MaxContainerRestartPeriod.Seconds())) + } + } else if kc.CrashLoopBackOff.MaxContainerRestartPeriod != nil { + allErrors = append(allErrors, fmt.Errorf("invalid configuration: FeatureGate KubeletCrashLoopBackOffMax not enabled, CrashLoopBackOff.MaxContainerRestartPeriod must not be set")) + } + // Check for mutually exclusive keys before the main validation loop reservedKeys := map[string]bool{ kubetypes.SystemReservedEnforcementKey: false, diff --git a/pkg/kubelet/apis/config/validation/validation_test.go b/pkg/kubelet/apis/config/validation/validation_test.go index ff17f53aa16..802c5506d37 100644 --- a/pkg/kubelet/apis/config/validation/validation_test.go +++ b/pkg/kubelet/apis/config/validation/validation_test.go @@ -69,9 +69,10 @@ var ( ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second}, MemoryThrottlingFactor: ptr.To(0.9), FeatureGates: map[string]bool{ - "CustomCPUCFSQuotaPeriod": true, - "GracefulNodeShutdown": true, - "MemoryQoS": true, + "CustomCPUCFSQuotaPeriod": true, + "GracefulNodeShutdown": true, + "MemoryQoS": true, + "KubeletCrashLoopBackOffMax": true, }, Logging: logsapi.LoggingConfiguration{ Format: "text", @@ -80,6 +81,9 @@ var ( ContainerLogMaxWorkers: 1, ContainerLogMonitorInterval: metav1.Duration{Duration: 10 * time.Second}, SingleProcessOOMKill: ptr.To(!kubeletutil.IsCgroup2UnifiedMode()), + CrashLoopBackOff: kubeletconfig.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: 3 * time.Second}, + }, } ) @@ -379,286 +383,345 @@ func TestValidateKubeletConfiguration(t *testing.T) { }, errMsg: "invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled", }, { - name: "specify SystemReservedEnforcementKey without specifying SystemReservedCgroup", + name: "CrashLoopBackOff.MaxContainerRestartPeriod too low", configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedEnforcementKey} - conf.SystemReservedCgroup = "" + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true} + conf.CrashLoopBackOff = kubeletconfig.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: 0 * time.Second}, + } return conf }, - errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + errMsg: "invalid configuration: CrashLoopBackOff.MaxContainerRestartPeriod (got: 0 seconds) must be set between 1s and 300s", }, { - name: "specify SystemReservedCompressibleEnforcementKey without specifying SystemReservedCgroup", + name: "CrashLoopBackOff.MaxContainerRestartPeriod too high", configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey} - conf.SystemReservedCgroup = "" + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true} + conf.CrashLoopBackOff = kubeletconfig.CrashLoopBackOffConfig{ + MaxContainerRestartPeriod: &metav1.Duration{Duration: 301 * time.Second}, + } return conf }, - errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + errMsg: "invalid configuration: CrashLoopBackOff.MaxContainerRestartPeriod (got: 301 seconds) must be set between 1s and 300s", }, { - name: "specify SystemReservedCompressibleEnforcementKey with SystemReservedEnforcementKey", + name: "CrashLoopBackOff.MaxContainerRestartPeriod just a little too high", configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey, kubetypes.SystemReservedEnforcementKey} + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true, "CustomCPUCFSQuotaPeriod": true} + conf.CrashLoopBackOff = kubeletconfig.CrashLoopBackOffConfig{ + // 300.9 seconds + MaxContainerRestartPeriod: &metav1.Duration{Duration: 300900 * time.Millisecond}, + } return conf }, - errMsg: "invalid configuration: both \"system-reserved\" and \"system-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", - }, { - name: "specify KubeReservedCompressibleEnforcementKey without specifying KubeReservedCgroup", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey} - conf.KubeReservedCgroup = "" - return conf - }, - errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", - }, { - name: "specify KubeReservedEnforcementKey without specifying KubeReservedCgroup", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedEnforcementKey} - conf.KubeReservedCgroup = "" - return conf - }, - errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", - }, { - name: "specify KubeReservedCompressibleEnforcementKey with KubeReservedEnforcementKey", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey, kubetypes.KubeReservedEnforcementKey} - return conf - }, - errMsg: "invalid configuration: both \"kube-reserved\" and \"kube-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", - }, { - name: "specify NodeAllocatableNoneKey with additional enforcements", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableNoneKey, kubetypes.KubeReservedEnforcementKey} - return conf - }, - errMsg: "invalid configuration: enforceNodeAllocatable (--enforce-node-allocatable) may not contain additional enforcements when \"none\" is specified", - }, { - name: "duplicated EnforceNodeAllocatable", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableNoneKey, kubetypes.NodeAllocatableNoneKey} - return conf - }, - errMsg: "invalid configuration: duplicated enforcements \"none\" in enforceNodeAllocatable (--enforce-node-allocatable)", - }, { - name: "invalid EnforceNodeAllocatable", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnforceNodeAllocatable = []string{"invalid-enforce-node-allocatable"} - return conf - }, - errMsg: "invalid configuration: option \"invalid-enforce-node-allocatable\" specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are \"pods\", \"system-reserved\", \"system-reserved-compressible\", \"kube-reserved\", \"kube-reserved-compressible\" or \"none\"", - }, { - name: "invalid HairpinMode", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.HairpinMode = "invalid-hair-pin-mode" - return conf - }, - errMsg: "invalid configuration: option \"invalid-hair-pin-mode\" specified for hairpinMode (--hairpin-mode). Valid options are \"none\", \"hairpin-veth\" or \"promiscuous-bridge\"", - }, { - name: "specify ReservedSystemCPUs with SystemReservedCgroup", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ReservedSystemCPUs = "0-3" - conf.SystemReservedCgroup = "/system.slice" - return conf - }, - errMsg: "invalid configuration: can't use reservedSystemCPUs (--reserved-cpus) with systemReservedCgroup (--system-reserved-cgroup) or kubeReservedCgroup (--kube-reserved-cgroup)", - }, { - name: "specify ReservedSystemCPUs with KubeReservedCgroup", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ReservedSystemCPUs = "0-3" - conf.KubeReservedCgroup = "/system.slice" - return conf - }, - errMsg: "invalid configuration: can't use reservedSystemCPUs (--reserved-cpus) with systemReservedCgroup (--system-reserved-cgroup) or kubeReservedCgroup (--kube-reserved-cgroup)", - }, { - name: "invalid ReservedSystemCPUs", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ReservedSystemCPUs = "invalid-reserved-system-cpus" - return conf - }, - errMsg: "invalid configuration: unable to parse reservedSystemCPUs (--reserved-cpus) invalid-reserved-system-cpus, error:", - }, { - name: "enable MemoryQoS without specifying MemoryThrottlingFactor", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"MemoryQoS": true} - conf.MemoryThrottlingFactor = nil - return conf - }, - errMsg: "invalid configuration: memoryThrottlingFactor is required when MemoryQoS feature flag is enabled", - }, { - name: "invalid MemoryThrottlingFactor", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.MemoryThrottlingFactor = ptr.To(1.1) - return conf - }, - errMsg: "invalid configuration: memoryThrottlingFactor 1.1 must be greater than 0 and less than or equal to 1.0", - }, { - name: "invalid Taint.TimeAdded", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - now := metav1.Now() - conf.RegisterWithTaints = []v1.Taint{{TimeAdded: &now}} - return conf - }, - errMsg: "invalid configuration: taint.TimeAdded is not nil", - }, { - name: "specify tracing with KubeletTracing disabled", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - samplingRate := int32(99999) - conf.FeatureGates = map[string]bool{"KubeletTracing": false} - conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate} - return conf - }, - errMsg: "invalid configuration: tracing should not be configured if KubeletTracing feature flag is disabled.", - }, { - name: "specify tracing invalid sampling rate", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - samplingRate := int32(-1) - conf.FeatureGates = map[string]bool{"KubeletTracing": true} - conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate} - return conf - }, - errMsg: "tracing.samplingRatePerMillion: Invalid value: -1: sampling rate must be positive", - }, { - name: "specify tracing invalid endpoint", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - ep := "dn%2s://localhost:4317" - conf.FeatureGates = map[string]bool{"KubeletTracing": true} - conf.Tracing = &tracingapi.TracingConfiguration{Endpoint: &ep} - return conf - }, - errMsg: "tracing.endpoint: Invalid value: \"dn%2s://localhost:4317\": parse \"dn%2s://localhost:4317\": first path segment in URL cannot contain colon", - }, { - name: "invalid GracefulNodeShutdownBasedOnPodPriority", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"GracefulNodeShutdownBasedOnPodPriority": true} - conf.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{{ - Priority: 0, - ShutdownGracePeriodSeconds: 0, - }} - return conf - }, - errMsg: "invalid configuration: Cannot specify both shutdownGracePeriodByPodPriority and shutdownGracePeriod at the same time", - }, { - name: "Specifying shutdownGracePeriodByPodPriority without enable GracefulNodeShutdownBasedOnPodPriority", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"GracefulNodeShutdownBasedOnPodPriority": false} - conf.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{{ - Priority: 0, - ShutdownGracePeriodSeconds: 0, - }} - return conf - }, - errMsg: "invalid configuration: Specifying shutdownGracePeriodByPodPriority requires feature gate GracefulNodeShutdownBasedOnPodPriority", - }, { - name: "enableSystemLogQuery is enabled without NodeLogQuery feature gate", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.EnableSystemLogQuery = true - return conf - }, - errMsg: "invalid configuration: NodeLogQuery feature gate is required for enableSystemLogHandler", - }, { - name: "enableSystemLogQuery is enabled without enableSystemLogHandler", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"NodeLogQuery": true} - conf.EnableSystemLogHandler = false - conf.EnableSystemLogQuery = true - return conf - }, - errMsg: "invalid configuration: enableSystemLogHandler is required for enableSystemLogQuery", - }, { - name: "imageMaximumGCAge should not be specified without feature gate", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": false} - conf.ImageMaximumGCAge = metav1.Duration{Duration: 1} - return conf - }, - errMsg: "invalid configuration: ImageMaximumGCAge feature gate is required for Kubelet configuration option imageMaximumGCAge", - }, { - name: "imageMaximumGCAge should not be negative", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": true} - conf.ImageMaximumGCAge = metav1.Duration{Duration: -1} - return conf - }, - errMsg: "invalid configuration: imageMaximumGCAge -1ns must not be negative", - }, { - name: "imageMaximumGCAge should not be less than imageMinimumGCAge", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": true} - conf.ImageMaximumGCAge = metav1.Duration{Duration: 1} - conf.ImageMinimumGCAge = metav1.Duration{Duration: 2} - return conf - }, - errMsg: "invalid configuration: imageMaximumGCAge 1ns must be greater than imageMinimumGCAge 2ns", - }, { - name: "containerLogMaxWorkers must be greater than or equal to 1", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ContainerLogMaxWorkers = 0 - return conf - }, - errMsg: "invalid configuration: containerLogMaxWorkers must be greater than or equal to 1", - }, { - name: "containerLogMonitorInterval must be a positive time duration", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ContainerLogMonitorInterval = metav1.Duration{Duration: -1 * time.Second} - return conf - }, - errMsg: "invalid configuration: containerLogMonitorInterval must be a positive time duration greater than or equal to 3s", - }, { - name: "containerLogMonitorInterval must be at least 3s or higher", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ContainerLogMonitorInterval = metav1.Duration{Duration: 2 * time.Second} - return conf - }, - errMsg: "invalid configuration: containerLogMonitorInterval must be a positive time duration greater than or equal to 3s", - }, { - name: "pod logs path must be not empty", - configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - config.PodLogsDir = "" - return config - }, - errMsg: "invalid configuration: podLogsDir was not specified", - }, { - name: "pod logs path must be absolute", - configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - config.PodLogsDir = "./test" - return config - }, - errMsg: `invalid configuration: pod logs path "./test" must be absolute path`, - }, { - name: "pod logs path must be normalized", - configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - config.PodLogsDir = "/path/../" - return config - }, - errMsg: `invalid configuration: pod logs path "/path/../" must be normalized`, - }, { - name: "pod logs path is ascii only", - configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - config.PodLogsDir = "/🧪" - return config - }, - errMsg: `invalid configuration: pod logs path "/🧪" mut contains ASCII characters only`, - }, { - name: "invalid ContainerRuntimeEndpoint", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.ContainerRuntimeEndpoint = "" - return conf - }, - errMsg: "invalid configuration: the containerRuntimeEndpoint was not specified or empty", - }, { - name: "invalid Logging configuration", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.Logging.Format = "invalid" - return conf - }, - errMsg: "logging.format: Invalid value: \"invalid\": Unsupported log format", - }, { - name: "invalid FeatureGate", - configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { - conf.FeatureGates["invalid"] = true - return conf - }, - errMsg: "unrecognized feature gate: invalid", + errMsg: "invalid configuration: CrashLoopBackOff.MaxContainerRestartPeriod (got: 300.9 seconds) must be set between 1s and 300s", }, + { + name: "CrashLoopBackOff.MaxContainerRestartPeriod just a little too low", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true, "CustomCPUCFSQuotaPeriod": true} + conf.CrashLoopBackOff = kubeletconfig.CrashLoopBackOffConfig{ + // 300.9 seconds + MaxContainerRestartPeriod: &metav1.Duration{Duration: 999 * time.Millisecond}, + } + return conf + }, + errMsg: "invalid configuration: CrashLoopBackOff.MaxContainerRestartPeriod (got: 0.999 seconds) must be set between 1s and 300s", + }, + { + name: "KubeletCrashLoopBackOffMax feature gate on, no crashLoopBackOff config, ok", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true, "CustomCPUCFSQuotaPeriod": true} + return conf + }, + }, { + name: "KubeletCrashLoopBackOffMax feature gate on, but no crashLoopBackOff.MaxContainerRestartPeriod config", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true, "CustomCPUCFSQuotaPeriod": true} + conf.CrashLoopBackOff = kubeletconfig.CrashLoopBackOffConfig{} + return conf + }, + errMsg: "invalid configuration: FeatureGate KubeletCrashLoopBackOffMax is enabled, CrashLoopBackOff.MaxContainerRestartPeriod must be set", + }, + { + name: "specify SystemReservedEnforcementKey without specifying SystemReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedEnforcementKey} + conf.SystemReservedCgroup = "" + return conf + }, + errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify SystemReservedCompressibleEnforcementKey without specifying SystemReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey} + conf.SystemReservedCgroup = "" + return conf + }, + errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify SystemReservedCompressibleEnforcementKey with SystemReservedEnforcementKey", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey, kubetypes.SystemReservedEnforcementKey} + return conf + }, + errMsg: "invalid configuration: both \"system-reserved\" and \"system-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify KubeReservedCompressibleEnforcementKey without specifying KubeReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey} + conf.KubeReservedCgroup = "" + return conf + }, + errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify KubeReservedEnforcementKey without specifying KubeReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedEnforcementKey} + conf.KubeReservedCgroup = "" + return conf + }, + errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify KubeReservedCompressibleEnforcementKey with KubeReservedEnforcementKey", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey, kubetypes.KubeReservedEnforcementKey} + return conf + }, + errMsg: "invalid configuration: both \"kube-reserved\" and \"kube-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "specify NodeAllocatableNoneKey with additional enforcements", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableNoneKey, kubetypes.KubeReservedEnforcementKey} + return conf + }, + errMsg: "invalid configuration: enforceNodeAllocatable (--enforce-node-allocatable) may not contain additional enforcements when \"none\" is specified", + }, { + name: "duplicated EnforceNodeAllocatable", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableNoneKey, kubetypes.NodeAllocatableNoneKey} + return conf + }, + errMsg: "invalid configuration: duplicated enforcements \"none\" in enforceNodeAllocatable (--enforce-node-allocatable)", + }, { + name: "invalid EnforceNodeAllocatable", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnforceNodeAllocatable = []string{"invalid-enforce-node-allocatable"} + return conf + }, + errMsg: "invalid configuration: option \"invalid-enforce-node-allocatable\" specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are \"pods\", \"system-reserved\", \"system-reserved-compressible\", \"kube-reserved\", \"kube-reserved-compressible\" or \"none\"", + }, { + name: "invalid HairpinMode", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.HairpinMode = "invalid-hair-pin-mode" + return conf + }, + errMsg: "invalid configuration: option \"invalid-hair-pin-mode\" specified for hairpinMode (--hairpin-mode). Valid options are \"none\", \"hairpin-veth\" or \"promiscuous-bridge\"", + }, { + name: "specify ReservedSystemCPUs with SystemReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ReservedSystemCPUs = "0-3" + conf.SystemReservedCgroup = "/system.slice" + return conf + }, + errMsg: "invalid configuration: can't use reservedSystemCPUs (--reserved-cpus) with systemReservedCgroup (--system-reserved-cgroup) or kubeReservedCgroup (--kube-reserved-cgroup)", + }, { + name: "specify ReservedSystemCPUs with KubeReservedCgroup", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ReservedSystemCPUs = "0-3" + conf.KubeReservedCgroup = "/system.slice" + return conf + }, + errMsg: "invalid configuration: can't use reservedSystemCPUs (--reserved-cpus) with systemReservedCgroup (--system-reserved-cgroup) or kubeReservedCgroup (--kube-reserved-cgroup)", + }, { + name: "invalid ReservedSystemCPUs", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ReservedSystemCPUs = "invalid-reserved-system-cpus" + return conf + }, + errMsg: "invalid configuration: unable to parse reservedSystemCPUs (--reserved-cpus) invalid-reserved-system-cpus, error:", + }, { + name: "enable MemoryQoS without specifying MemoryThrottlingFactor", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"MemoryQoS": true} + conf.MemoryThrottlingFactor = nil + return conf + }, + errMsg: "invalid configuration: memoryThrottlingFactor is required when MemoryQoS feature flag is enabled", + }, { + name: "invalid MemoryThrottlingFactor", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.MemoryThrottlingFactor = ptr.To(1.1) + return conf + }, + errMsg: "invalid configuration: memoryThrottlingFactor 1.1 must be greater than 0 and less than or equal to 1.0", + }, { + name: "invalid Taint.TimeAdded", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + now := metav1.Now() + conf.RegisterWithTaints = []v1.Taint{{TimeAdded: &now}} + return conf + }, + errMsg: "invalid configuration: taint.TimeAdded is not nil", + }, { + name: "specify tracing with KubeletTracing disabled", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + samplingRate := int32(99999) + conf.FeatureGates = map[string]bool{"KubeletTracing": false} + conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate} + return conf + }, + errMsg: "invalid configuration: tracing should not be configured if KubeletTracing feature flag is disabled.", + }, { + name: "specify tracing invalid sampling rate", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + samplingRate := int32(-1) + conf.FeatureGates = map[string]bool{"KubeletTracing": true} + conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate} + return conf + }, + errMsg: "tracing.samplingRatePerMillion: Invalid value: -1: sampling rate must be positive", + }, { + name: "specify tracing invalid endpoint", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + ep := "dn%2s://localhost:4317" + conf.FeatureGates = map[string]bool{"KubeletTracing": true} + conf.Tracing = &tracingapi.TracingConfiguration{Endpoint: &ep} + return conf + }, + errMsg: "tracing.endpoint: Invalid value: \"dn%2s://localhost:4317\": parse \"dn%2s://localhost:4317\": first path segment in URL cannot contain colon", + }, { + name: "invalid GracefulNodeShutdownBasedOnPodPriority", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"GracefulNodeShutdownBasedOnPodPriority": true} + conf.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{{ + Priority: 0, + ShutdownGracePeriodSeconds: 0, + }} + return conf + }, + errMsg: "invalid configuration: Cannot specify both shutdownGracePeriodByPodPriority and shutdownGracePeriod at the same time", + }, { + name: "Specifying shutdownGracePeriodByPodPriority without enable GracefulNodeShutdownBasedOnPodPriority", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"GracefulNodeShutdownBasedOnPodPriority": false} + conf.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{{ + Priority: 0, + ShutdownGracePeriodSeconds: 0, + }} + return conf + }, + errMsg: "invalid configuration: Specifying shutdownGracePeriodByPodPriority requires feature gate GracefulNodeShutdownBasedOnPodPriority", + }, { + name: "enableSystemLogQuery is enabled without NodeLogQuery feature gate", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.EnableSystemLogQuery = true + return conf + }, + errMsg: "invalid configuration: NodeLogQuery feature gate is required for enableSystemLogHandler", + }, { + name: "enableSystemLogQuery is enabled without enableSystemLogHandler", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"NodeLogQuery": true} + conf.EnableSystemLogHandler = false + conf.EnableSystemLogQuery = true + return conf + }, + errMsg: "invalid configuration: enableSystemLogHandler is required for enableSystemLogQuery", + }, { + name: "imageMaximumGCAge should not be specified without feature gate", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": false} + conf.ImageMaximumGCAge = metav1.Duration{Duration: 1} + return conf + }, + errMsg: "invalid configuration: ImageMaximumGCAge feature gate is required for Kubelet configuration option imageMaximumGCAge", + }, { + name: "imageMaximumGCAge should not be negative", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": true} + conf.ImageMaximumGCAge = metav1.Duration{Duration: -1} + return conf + }, + errMsg: "invalid configuration: imageMaximumGCAge -1ns must not be negative", + }, { + name: "imageMaximumGCAge should not be less than imageMinimumGCAge", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates = map[string]bool{"ImageMaximumGCAge": true} + conf.ImageMaximumGCAge = metav1.Duration{Duration: 1} + conf.ImageMinimumGCAge = metav1.Duration{Duration: 2} + return conf + }, + errMsg: "invalid configuration: imageMaximumGCAge 1ns must be greater than imageMinimumGCAge 2ns", + }, { + name: "containerLogMaxWorkers must be greater than or equal to 1", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ContainerLogMaxWorkers = 0 + return conf + }, + errMsg: "invalid configuration: containerLogMaxWorkers must be greater than or equal to 1", + }, { + name: "containerLogMonitorInterval must be a positive time duration", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ContainerLogMonitorInterval = metav1.Duration{Duration: -1 * time.Second} + return conf + }, + errMsg: "invalid configuration: containerLogMonitorInterval must be a positive time duration greater than or equal to 3s", + }, { + name: "containerLogMonitorInterval must be at least 3s or higher", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ContainerLogMonitorInterval = metav1.Duration{Duration: 2 * time.Second} + return conf + }, + errMsg: "invalid configuration: containerLogMonitorInterval must be a positive time duration greater than or equal to 3s", + }, { + name: "pod logs path must be not empty", + configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + config.PodLogsDir = "" + return config + }, + errMsg: "invalid configuration: podLogsDir was not specified", + }, { + name: "pod logs path must be absolute", + configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + config.PodLogsDir = "./test" + return config + }, + errMsg: `invalid configuration: pod logs path "./test" must be absolute path`, + }, { + name: "pod logs path must be normalized", + configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + config.PodLogsDir = "/path/../" + return config + }, + errMsg: `invalid configuration: pod logs path "/path/../" must be normalized`, + }, { + name: "pod logs path is ascii only", + configure: func(config *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + config.PodLogsDir = "/🧪" + return config + }, + errMsg: `invalid configuration: pod logs path "/🧪" mut contains ASCII characters only`, + }, { + name: "invalid ContainerRuntimeEndpoint", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.ContainerRuntimeEndpoint = "" + return conf + }, + errMsg: "invalid configuration: the containerRuntimeEndpoint was not specified or empty", + }, { + name: "invalid Logging configuration", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.Logging.Format = "invalid" + return conf + }, + errMsg: "logging.format: Invalid value: \"invalid\": Unsupported log format", + }, { + name: "invalid FeatureGate", + configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration { + conf.FeatureGates["invalid"] = true + return conf + }, + errMsg: "unrecognized feature gate: invalid", + }, } for _, tc := range cases { diff --git a/pkg/kubelet/apis/config/zz_generated.deepcopy.go b/pkg/kubelet/apis/config/zz_generated.deepcopy.go index b4ab86f64dd..2942552e30a 100644 --- a/pkg/kubelet/apis/config/zz_generated.deepcopy.go +++ b/pkg/kubelet/apis/config/zz_generated.deepcopy.go @@ -28,6 +28,27 @@ import ( apiv1 "k8s.io/component-base/tracing/api/v1" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CrashLoopBackOffConfig) DeepCopyInto(out *CrashLoopBackOffConfig) { + *out = *in + if in.MaxContainerRestartPeriod != nil { + in, out := &in.MaxContainerRestartPeriod, &out.MaxContainerRestartPeriod + *out = new(v1.Duration) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopBackOffConfig. +func (in *CrashLoopBackOffConfig) DeepCopy() *CrashLoopBackOffConfig { + if in == nil { + return nil + } + out := new(CrashLoopBackOffConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CredentialProvider) DeepCopyInto(out *CredentialProvider) { *out = *in @@ -332,6 +353,7 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { *out = new(apiv1.TracingConfiguration) (*in).DeepCopyInto(*out) } + in.CrashLoopBackOff.DeepCopyInto(&out.CrashLoopBackOff) return } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index dc1e7029eca..5f4c8e38f33 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -78,6 +78,7 @@ import ( "k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/features" kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1" "k8s.io/kubernetes/pkg/kubelet/apis/podresources" "k8s.io/kubernetes/pkg/kubelet/cadvisor" kubeletcertificate "k8s.io/kubernetes/pkg/kubelet/certificate" @@ -150,7 +151,7 @@ const ( DefaultContainerLogsDir = "/var/log/containers" // MaxContainerBackOff is the max backoff period for container restarts, exported for the e2e test - MaxContainerBackOff = 300 * time.Second + MaxContainerBackOff = v1beta1.MaxContainerBackOff // MaxImageBackOff is the max backoff period for image pulls, exported for the e2e test MaxImageBackOff = 300 * time.Second @@ -924,7 +925,15 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps.Recorder, volumepathhandler.NewBlockVolumePathHandler()) - klet.backOff = flowcontrol.NewBackOff(containerBackOffPeriod, MaxContainerBackOff) + boMax := MaxContainerBackOff + base := containerBackOffPeriod + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) { + boMax = kubeCfg.CrashLoopBackOff.MaxContainerRestartPeriod.Duration + if boMax < containerBackOffPeriod { + base = boMax + } + } + klet.backOff = flowcontrol.NewBackOff(base, boMax) klet.backOff.HasExpiredFunc = func(eventTime time.Time, lastUpdate time.Time, maxDuration time.Duration) bool { return eventTime.Sub(lastUpdate) > 600*time.Second } diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index d10578f2c9a..e5ac2d18f5a 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -775,6 +775,11 @@ type KubeletConfiguration struct { // +featureGate=GracefulNodeShutdownBasedOnPodPriority // +optional ShutdownGracePeriodByPodPriority []ShutdownGracePeriodByPodPriority `json:"shutdownGracePeriodByPodPriority,omitempty"` + // CrashLoopBackOff contains config to modify node-level parameters for + // container restart behavior + // +featureGate=KubeletCrashLoopBackOffMax + // +optional + CrashLoopBackOff CrashLoopBackOffConfig `json:"crashLoopBackOff,omitempty"` // reservedMemory specifies a comma-separated list of memory reservations for NUMA nodes. // The parameter makes sense only in the context of the memory manager feature. // The memory manager will not allocate reserved memory for container workloads. @@ -975,6 +980,15 @@ type MemorySwapConfiguration struct { SwapBehavior string `json:"swapBehavior,omitempty"` } +type CrashLoopBackOffConfig struct { + // maxContainerRestartPeriod is the maximum duration the backoff delay can accrue + // to for container restarts, minimum 1 second, maximum 300 seconds. If not set, + // defaults to the internal crashloopbackoff maximum (300s). + // +featureGate=KubeletCrashLoopBackOffMax + // +optional + MaxContainerRestartPeriod *metav1.Duration `json:"maxContainerRestartPeriod,omitempty"` +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // CredentialProviderConfig is the configuration containing information about diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go index 0ab6259f98f..89dda0dfa17 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go @@ -28,6 +28,27 @@ import ( apiv1 "k8s.io/component-base/tracing/api/v1" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CrashLoopBackOffConfig) DeepCopyInto(out *CrashLoopBackOffConfig) { + *out = *in + if in.MaxContainerRestartPeriod != nil { + in, out := &in.MaxContainerRestartPeriod, &out.MaxContainerRestartPeriod + *out = new(v1.Duration) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopBackOffConfig. +func (in *CrashLoopBackOffConfig) DeepCopy() *CrashLoopBackOffConfig { + if in == nil { + return nil + } + out := new(CrashLoopBackOffConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CredentialProvider) DeepCopyInto(out *CredentialProvider) { *out = *in @@ -441,6 +462,7 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { *out = make([]ShutdownGracePeriodByPodPriority, len(*in)) copy(*out, *in) } + in.CrashLoopBackOff.DeepCopyInto(&out.CrashLoopBackOff) if in.ReservedMemory != nil { in, out := &in.ReservedMemory, &out.ReservedMemory *out = make([]MemoryReservation, len(*in)) diff --git a/test/featuregates_linter/test_data/versioned_feature_list.yaml b/test/featuregates_linter/test_data/versioned_feature_list.yaml index 50e80cd6782..3e355898dd7 100644 --- a/test/featuregates_linter/test_data/versioned_feature_list.yaml +++ b/test/featuregates_linter/test_data/versioned_feature_list.yaml @@ -632,6 +632,12 @@ lockToDefault: false preRelease: Beta version: "1.31" +- name: KubeletCrashLoopBackOffMax + versionedSpecs: + - default: false + lockToDefault: false + preRelease: Alpha + version: "1.32" - name: KubeletFineGrainedAuthz versionedSpecs: - default: false