Merge pull request #38258 from derekwaynecarr/kernel-memcg-flag

Automatic merge from submit-queue (batch tested with PRs 38318, 38258)

kernel memcg notification enabled via experimental flag

Kubelet integrates with kernel memcg notification API if and only if enabled via experimental flag.
This commit is contained in:
Kubernetes Submit Queue 2016-12-07 15:00:10 -08:00 committed by GitHub
commit ca049360e2
13 changed files with 3215 additions and 3132 deletions

View File

@ -249,6 +249,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.")
fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.")
fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.")
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")

View File

@ -202,6 +202,7 @@ experimental-nvidia-gpus
experimental-prefix
experimental-cri
experimental-check-node-capabilities-before-mount
experimental-kernel-memcg-notification
external-etcd-cafile
external-etcd-certfile
external-etcd-endpoints

File diff suppressed because it is too large Load Diff

View File

@ -430,6 +430,9 @@ type KubeletConfiguration struct {
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
// +optional
EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
// +optional
ExperimentalKernelMemcgNotification bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -374,6 +374,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.EvictionPressureTransitionPeriod == zeroDuration {
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
}
if obj.ExperimentalKernelMemcgNotification == nil {
obj.ExperimentalKernelMemcgNotification = boolVar(false)
}
if obj.SystemReserved == nil {
obj.SystemReserved = make(map[string]string)
}

View File

@ -472,6 +472,8 @@ type KubeletConfiguration struct {
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"`
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
EvictionMinimumReclaim string `json:"evictionMinimumReclaim"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
ExperimentalKernelMemcgNotification *bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -391,6 +391,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_Pointer_bool_To_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore
if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
@ -561,6 +564,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_bool_To_Pointer_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore
if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err

View File

@ -414,6 +414,13 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if in.ExperimentalKernelMemcgNotification != nil {
in, out := &in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification
*out = new(bool)
**out = **in
} else {
out.ExperimentalKernelMemcgNotification = nil
}
out.PodsPerCore = in.PodsPerCore
if in.EnableControllerAttachDetach != nil {
in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach

View File

@ -369,6 +369,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
out.ExperimentalKernelMemcgNotification = in.ExperimentalKernelMemcgNotification
out.PodsPerCore = in.PodsPerCore
out.EnableControllerAttachDetach = in.EnableControllerAttachDetach
if in.SystemReserved != nil {

View File

@ -2918,6 +2918,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "",
},
},
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": {
SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14806,6 +14813,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "",
},
},
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": {
SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14919,7 +14933,7 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
},
},
},
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
},
},
Dependencies: []string{

View File

@ -204,7 +204,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
// attempt to create a threshold notifier to improve eviction response time
if !m.notifiersInitialized {
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
m.notifiersInitialized = true
// start soft memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {

View File

@ -69,6 +69,8 @@ type Config struct {
MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity

View File

@ -354,6 +354,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
Thresholds: thresholds,
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
}
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)