Merge pull request #38258 from derekwaynecarr/kernel-memcg-flag

Automatic merge from submit-queue (batch tested with PRs 38318, 38258)

kernel memcg notification enabled via experimental flag

Kubelet integrates with kernel memcg notification API if and only if enabled via experimental flag.
This commit is contained in:
Kubernetes Submit Queue 2016-12-07 15:00:10 -08:00 committed by GitHub
commit ca049360e2
13 changed files with 3215 additions and 3132 deletions

View File

@ -249,6 +249,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.") fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.")
fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.") fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.")
fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.") fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.")
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.") fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.") fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")

View File

@ -202,6 +202,7 @@ experimental-nvidia-gpus
experimental-prefix experimental-prefix
experimental-cri experimental-cri
experimental-check-node-capabilities-before-mount experimental-check-node-capabilities-before-mount
experimental-kernel-memcg-notification
external-etcd-cafile external-etcd-cafile
external-etcd-certfile external-etcd-certfile
external-etcd-endpoints external-etcd-endpoints

File diff suppressed because it is too large Load Diff

View File

@ -430,6 +430,9 @@ type KubeletConfiguration struct {
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure. // Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
// +optional // +optional
EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"` EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
// +optional
ExperimentalKernelMemcgNotification bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods // Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"` PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to // enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -374,6 +374,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.EvictionPressureTransitionPeriod == zeroDuration { if obj.EvictionPressureTransitionPeriod == zeroDuration {
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute} obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
} }
if obj.ExperimentalKernelMemcgNotification == nil {
obj.ExperimentalKernelMemcgNotification = boolVar(false)
}
if obj.SystemReserved == nil { if obj.SystemReserved == nil {
obj.SystemReserved = make(map[string]string) obj.SystemReserved = make(map[string]string)
} }

View File

@ -472,6 +472,8 @@ type KubeletConfiguration struct {
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"` EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"`
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure. // Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
EvictionMinimumReclaim string `json:"evictionMinimumReclaim"` EvictionMinimumReclaim string `json:"evictionMinimumReclaim"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
ExperimentalKernelMemcgNotification *bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods // Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"` PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to // enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -391,6 +391,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_Pointer_bool_To_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore out.PodsPerCore = in.PodsPerCore
if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil { if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err return err
@ -561,6 +564,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_bool_To_Pointer_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore out.PodsPerCore = in.PodsPerCore
if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil { if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err return err

View File

@ -414,6 +414,13 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if in.ExperimentalKernelMemcgNotification != nil {
in, out := &in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification
*out = new(bool)
**out = **in
} else {
out.ExperimentalKernelMemcgNotification = nil
}
out.PodsPerCore = in.PodsPerCore out.PodsPerCore = in.PodsPerCore
if in.EnableControllerAttachDetach != nil { if in.EnableControllerAttachDetach != nil {
in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach

View File

@ -369,6 +369,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
out.ExperimentalKernelMemcgNotification = in.ExperimentalKernelMemcgNotification
out.PodsPerCore = in.PodsPerCore out.PodsPerCore = in.PodsPerCore
out.EnableControllerAttachDetach = in.EnableControllerAttachDetach out.EnableControllerAttachDetach = in.EnableControllerAttachDetach
if in.SystemReserved != nil { if in.SystemReserved != nil {

View File

@ -2918,6 +2918,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "", Format: "",
}, },
}, },
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": { "podsPerCore": {
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods", Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14806,6 +14813,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "", Format: "",
}, },
}, },
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": { "podsPerCore": {
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods", Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14919,7 +14933,7 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
}, },
}, },
}, },
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"}, Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
}, },
}, },
Dependencies: []string{ Dependencies: []string{

View File

@ -204,7 +204,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
} }
// attempt to create a threshold notifier to improve eviction response time // attempt to create a threshold notifier to improve eviction response time
if !m.notifiersInitialized { if m.config.KernelMemcgNotification && !m.notifiersInitialized {
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
m.notifiersInitialized = true m.notifiersInitialized = true
// start soft memory notification // start soft memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) { err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {

View File

@ -69,6 +69,8 @@ type Config struct {
MaxPodGracePeriodSeconds int64 MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction. // Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []Threshold Thresholds []Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
} }
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity // ThresholdValue is a value holder that abstracts literal versus percentage based quantity

View File

@ -354,6 +354,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration, PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod), MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
Thresholds: thresholds, Thresholds: thresholds,
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
} }
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved) reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)