mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #38258 from derekwaynecarr/kernel-memcg-flag
Automatic merge from submit-queue (batch tested with PRs 38318, 38258) kernel memcg notification enabled via experimental flag Kubelet integrates with kernel memcg notification API if and only if enabled via experimental flag.
This commit is contained in:
commit
ca049360e2
@ -249,6 +249,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.")
|
||||
fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.")
|
||||
fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.")
|
||||
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
|
||||
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
|
||||
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")
|
||||
|
||||
|
@ -202,6 +202,7 @@ experimental-nvidia-gpus
|
||||
experimental-prefix
|
||||
experimental-cri
|
||||
experimental-check-node-capabilities-before-mount
|
||||
experimental-kernel-memcg-notification
|
||||
external-etcd-cafile
|
||||
external-etcd-certfile
|
||||
external-etcd-endpoints
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -430,6 +430,9 @@ type KubeletConfiguration struct {
|
||||
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
|
||||
// +optional
|
||||
EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"`
|
||||
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
|
||||
// +optional
|
||||
ExperimentalKernelMemcgNotification bool `json:"experimentalKernelMemcgNotification"`
|
||||
// Maximum number of pods per core. Cannot exceed MaxPods
|
||||
PodsPerCore int32 `json:"podsPerCore"`
|
||||
// enableControllerAttachDetach enables the Attach/Detach controller to
|
||||
|
@ -374,6 +374,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
||||
if obj.EvictionPressureTransitionPeriod == zeroDuration {
|
||||
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
|
||||
}
|
||||
if obj.ExperimentalKernelMemcgNotification == nil {
|
||||
obj.ExperimentalKernelMemcgNotification = boolVar(false)
|
||||
}
|
||||
if obj.SystemReserved == nil {
|
||||
obj.SystemReserved = make(map[string]string)
|
||||
}
|
||||
|
@ -472,6 +472,8 @@ type KubeletConfiguration struct {
|
||||
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"`
|
||||
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
|
||||
EvictionMinimumReclaim string `json:"evictionMinimumReclaim"`
|
||||
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
|
||||
ExperimentalKernelMemcgNotification *bool `json:"experimentalKernelMemcgNotification"`
|
||||
// Maximum number of pods per core. Cannot exceed MaxPods
|
||||
PodsPerCore int32 `json:"podsPerCore"`
|
||||
// enableControllerAttachDetach enables the Attach/Detach controller to
|
||||
|
@ -391,6 +391,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
||||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if err := api.Convert_Pointer_bool_To_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
@ -561,6 +564,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
||||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if err := api.Convert_bool_To_Pointer_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
|
@ -414,6 +414,13 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
||||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if in.ExperimentalKernelMemcgNotification != nil {
|
||||
in, out := &in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
} else {
|
||||
out.ExperimentalKernelMemcgNotification = nil
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if in.EnableControllerAttachDetach != nil {
|
||||
in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach
|
||||
|
@ -369,6 +369,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
||||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
out.ExperimentalKernelMemcgNotification = in.ExperimentalKernelMemcgNotification
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
out.EnableControllerAttachDetach = in.EnableControllerAttachDetach
|
||||
if in.SystemReserved != nil {
|
||||
|
@ -2918,6 +2918,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalKernelMemcgNotification": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"podsPerCore": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
|
||||
@ -14806,6 +14813,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalKernelMemcgNotification": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"podsPerCore": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
|
||||
@ -14919,7 +14933,7 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
|
@ -204,7 +204,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
}
|
||||
|
||||
// attempt to create a threshold notifier to improve eviction response time
|
||||
if !m.notifiersInitialized {
|
||||
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
|
||||
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
|
||||
m.notifiersInitialized = true
|
||||
// start soft memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
|
||||
|
@ -69,6 +69,8 @@ type Config struct {
|
||||
MaxPodGracePeriodSeconds int64
|
||||
// Thresholds define the set of conditions monitored to trigger eviction.
|
||||
Thresholds []Threshold
|
||||
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
|
||||
KernelMemcgNotification bool
|
||||
}
|
||||
|
||||
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
|
||||
|
@ -354,6 +354,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
||||
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
|
||||
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
|
||||
Thresholds: thresholds,
|
||||
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
|
||||
}
|
||||
|
||||
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)
|
||||
|
Loading…
Reference in New Issue
Block a user