mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 19:01:49 +00:00
adding kubelet flags for node allocatable phase 2
Signed-off-by: Vishnu Kannan <vishnuk@google.com>
This commit is contained in:
parent
9a1f0574a4
commit
70e340b045
@ -225,8 +225,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.Float64Var(&s.ChaosChance, "chaos-chance", s.ChaosChance, "If > 0.0, introduce random client errors and latency. Intended for testing. [default=0.0]")
|
||||
fs.BoolVar(&s.Containerized, "containerized", s.Containerized, "Experimental support for running kubelet in a container. Intended for testing. [default=false]")
|
||||
fs.Int64Var(&s.MaxOpenFiles, "max-open-files", s.MaxOpenFiles, "Number of files that can be opened by Kubelet process. [default=1000000]")
|
||||
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
|
||||
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
|
||||
|
||||
fs.BoolVar(&s.RegisterSchedulable, "register-schedulable", s.RegisterSchedulable, "Register the node as schedulable. Won't have any effect if register-node is false. [default=true]")
|
||||
fs.MarkDeprecated("register-schedulable", "will be removed in a future version")
|
||||
fs.Var(utiltaints.NewTaintsVar(&s.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma seperated \"<key>=<value>:<effect>\"). No-op if register-node is false.")
|
||||
@ -264,4 +263,13 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.StringVar(&s.RemoteImageEndpoint, "image-service-endpoint", s.RemoteImageEndpoint, "[Experimental] The unix socket endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. The endpoint is used only when CRI integration is enabled (--enable-cri)")
|
||||
|
||||
fs.BoolVar(&s.ExperimentalCheckNodeCapabilitiesBeforeMount, "experimental-check-node-capabilities-before-mount", s.ExperimentalCheckNodeCapabilitiesBeforeMount, "[Experimental] if set true, the kubelet will check the underlying node for required componenets (binaries, etc.) before performing the mount")
|
||||
|
||||
// Node Allocatable Flags
|
||||
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
|
||||
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
|
||||
|
||||
fs.StringSliceVar(&s.EnforceNodeAllocatable, "enforce-node-allocatable", s.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default='']")
|
||||
fs.StringVar(&s.SystemReservedCgroup, "system-reserved-cgroup", s.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
|
||||
fs.StringVar(&s.KubeReservedCgroup, "kube-reserved-cgroup", s.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
|
||||
fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-node-allocatable-ignore-eviction-threshold", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]")
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
@ -469,6 +470,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
|
||||
CgroupDriver: s.CgroupDriver,
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
EnableCRI: s.EnableCRI,
|
||||
NodeAllocatableConfig: cm.NodeAllocatableConfig{
|
||||
KubeReservedCgroupName: s.KubeReservedCgroup,
|
||||
SystemReservedCgroupName: s.SystemReservedCgroup,
|
||||
EnforceNodeAllocatable: sets.NewString(s.EnforceNodeAllocatable...),
|
||||
KubeReserved: kubeReserved,
|
||||
SystemReserved: systemReserved,
|
||||
HardEvictionThresholds: hardEvictionThresholds,
|
||||
},
|
||||
},
|
||||
s.ExperimentalFailSwapOn)
|
||||
|
||||
|
@ -442,16 +442,6 @@ type KubeletConfiguration struct {
|
||||
// manage attachment/detachment of volumes scheduled to this node, and
|
||||
// disables kubelet from executing any attach/detach operations
|
||||
EnableControllerAttachDetach bool
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for non-kubernetes components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
SystemReserved ConfigurationMap
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for kubernetes system components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
KubeReserved ConfigurationMap
|
||||
// Default behaviour for kernel tuning
|
||||
ProtectKernelDefaults bool
|
||||
// If true, Kubelet ensures a set of iptables rules are present on host.
|
||||
@ -485,6 +475,32 @@ type KubeletConfiguration struct {
|
||||
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
|
||||
// This can be useful for debugging volume related issues.
|
||||
KeepTerminatedPodVolumes bool
|
||||
|
||||
/* following flags are meant for Node Allocatable */
|
||||
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for non-kubernetes components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
SystemReserved ConfigurationMap
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for kubernetes system components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
KubeReserved ConfigurationMap
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
SystemReservedCgroup string
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
KubeReservedCgroup string
|
||||
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
|
||||
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
EnforceNodeAllocatable []string
|
||||
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool
|
||||
}
|
||||
|
||||
type KubeletAuthorizationMode string
|
||||
|
@ -48,7 +48,12 @@ const (
|
||||
defaultIPTablesDropBit = 15
|
||||
)
|
||||
|
||||
var zeroDuration = metav1.Duration{}
|
||||
var (
|
||||
zeroDuration = metav1.Duration{}
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
// TODO: Set the default to "pods" once cgroups per qos is turned on by default.
|
||||
defaultNodeAllocatableEnforcement = []string{}
|
||||
)
|
||||
|
||||
func addDefaultingFuncs(scheme *kruntime.Scheme) error {
|
||||
RegisterDefaults(scheme)
|
||||
@ -401,6 +406,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
||||
if obj.CgroupDriver == "" {
|
||||
obj.CgroupDriver = "cgroupfs"
|
||||
}
|
||||
if obj.EnforceNodeAllocatable == nil {
|
||||
obj.EnforceNodeAllocatable = defaultNodeAllocatableEnforcement
|
||||
}
|
||||
if obj.EnableCRI == nil {
|
||||
obj.EnableCRI = boolVar(true)
|
||||
}
|
||||
|
@ -478,16 +478,6 @@ type KubeletConfiguration struct {
|
||||
// manage attachment/detachment of volumes scheduled to this node, and
|
||||
// disables kubelet from executing any attach/detach operations
|
||||
EnableControllerAttachDetach *bool `json:"enableControllerAttachDetach"`
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for non-kubernetes components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
SystemReserved map[string]string `json:"systemReserved"`
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for kubernetes system components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
KubeReserved map[string]string `json:"kubeReserved"`
|
||||
// Default behaviour for kernel tuning
|
||||
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
|
||||
// If true, Kubelet ensures a set of iptables rules are present on host.
|
||||
@ -522,6 +512,33 @@ type KubeletConfiguration struct {
|
||||
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
|
||||
// This can be useful for debugging volume related issues.
|
||||
KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"`
|
||||
|
||||
/* following flags are meant for Node Allocatable */
|
||||
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for non-kubernetes components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
SystemReserved map[string]string `json:"systemReserved"`
|
||||
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
|
||||
// that describe resources reserved for kubernetes system components.
|
||||
// Currently only cpu and memory are supported. [default=none]
|
||||
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
|
||||
KubeReserved map[string]string `json:"kubeReserved"`
|
||||
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
SystemReservedCgroup string `json:"systemReservedCgroup,omitempty"`
|
||||
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
KubeReservedCgroup string `json:"kubeReservedCgroup,omitempty"`
|
||||
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
|
||||
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
EnforceNodeAllocatable []string `json:"enforceNodeAllocatable,omitempty"`
|
||||
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
|
||||
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
|
||||
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool `json:"experimentalNodeAllocatableIgnoreEvictionThreshold,omitempty"`
|
||||
}
|
||||
|
||||
type KubeletAuthorizationMode string
|
||||
|
@ -396,8 +396,6 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
||||
if err := v1.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
|
||||
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
if err := v1.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
|
||||
return err
|
||||
@ -416,6 +414,12 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
|
||||
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
|
||||
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
|
||||
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
|
||||
out.SystemReservedCgroup = in.SystemReservedCgroup
|
||||
out.KubeReservedCgroup = in.KubeReservedCgroup
|
||||
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
|
||||
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -570,8 +574,6 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
||||
if err := v1.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
|
||||
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
if err := v1.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
|
||||
return err
|
||||
@ -590,6 +592,12 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
|
||||
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
|
||||
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
|
||||
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
|
||||
out.SystemReservedCgroup = in.SystemReservedCgroup
|
||||
out.KubeReservedCgroup = in.KubeReservedCgroup
|
||||
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
|
||||
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -266,20 +266,6 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.SystemReserved != nil {
|
||||
in, out := &in.SystemReserved, &out.SystemReserved
|
||||
*out = make(map[string]string)
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.KubeReserved != nil {
|
||||
in, out := &in.KubeReserved, &out.KubeReserved
|
||||
*out = make(map[string]string)
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.MakeIPTablesUtilChains != nil {
|
||||
in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains
|
||||
*out = new(bool)
|
||||
@ -305,6 +291,25 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.SystemReserved != nil {
|
||||
in, out := &in.SystemReserved, &out.SystemReserved
|
||||
*out = make(map[string]string)
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.KubeReserved != nil {
|
||||
in, out := &in.KubeReserved, &out.KubeReserved
|
||||
*out = make(map[string]string)
|
||||
for key, val := range *in {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EnforceNodeAllocatable != nil {
|
||||
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -177,6 +177,11 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.AllowedUnsafeSysctls != nil {
|
||||
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.SystemReserved != nil {
|
||||
in, out := &in.SystemReserved, &out.SystemReserved
|
||||
*out = make(ConfigurationMap)
|
||||
@ -191,8 +196,8 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.AllowedUnsafeSysctls != nil {
|
||||
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
|
||||
if in.EnforceNodeAllocatable != nil {
|
||||
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
|
@ -13283,34 +13283,6 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"systemReserved": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
|
||||
Type: []string{"object"},
|
||||
AdditionalProperties: &spec.SchemaOrBool{
|
||||
Schema: &spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"kubeReserved": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
|
||||
Type: []string{"object"},
|
||||
AdditionalProperties: &spec.SchemaOrBool{
|
||||
Schema: &spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"protectKernelDefaults": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Default behaviour for kernel tuning",
|
||||
@ -13388,8 +13360,71 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"systemReserved": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
|
||||
Type: []string{"object"},
|
||||
AdditionalProperties: &spec.SchemaOrBool{
|
||||
Schema: &spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"kubeReserved": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
|
||||
Type: []string{"object"},
|
||||
AdditionalProperties: &spec.SchemaOrBool{
|
||||
Schema: &spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"systemReservedCgroup": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"kubeReservedCgroup": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"enforceNodeAllocatable": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
|
||||
Type: []string{"array"},
|
||||
Items: &spec.SchemaOrArray{
|
||||
Schema: &spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"experimentalNodeAllocatableIgnoreEvictionThreshold": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "systemReserved", "kubeReserved"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
|
@ -16,7 +16,10 @@ limitations under the License.
|
||||
|
||||
package cm
|
||||
|
||||
import "k8s.io/kubernetes/pkg/api/v1"
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
)
|
||||
|
||||
// Manages the containers running on a machine.
|
||||
type ContainerManager interface {
|
||||
@ -56,6 +59,16 @@ type NodeConfig struct {
|
||||
CgroupDriver string
|
||||
ProtectKernelDefaults bool
|
||||
EnableCRI bool
|
||||
NodeAllocatableConfig
|
||||
}
|
||||
|
||||
type NodeAllocatableConfig struct {
|
||||
KubeReservedCgroupName string
|
||||
SystemReservedCgroupName string
|
||||
EnforceNodeAllocatable sets.String
|
||||
KubeReserved v1.ResourceList
|
||||
SystemReserved v1.ResourceList
|
||||
HardEvictionThresholds []evictionapi.Threshold
|
||||
}
|
||||
|
||||
type Status struct {
|
||||
|
164
pkg/kubelet/cm/node_allocatable.go
Normal file
164
pkg/kubelet/cm/node_allocatable.go
Normal file
@ -0,0 +1,164 @@
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultNodeAllocatableCgroupName = "/kubepods"
|
||||
nodeAllocatableEnforcementKey = "pods"
|
||||
systemReservedEnforcementKey = "system-reserved"
|
||||
kubeReservedEnforcementKey = "kube-reserved"
|
||||
)
|
||||
|
||||
func createNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: CgroupName(defaultNodeAllocatableCgroupName),
|
||||
}
|
||||
if err := cgroupManager.Create(cgroupConfig); err != nil {
|
||||
glog.Errorf("Failed to create %q cgroup and apply limits")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Enforce Node Allocatable Cgroup settings.
|
||||
func enforceNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
|
||||
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
|
||||
glog.V(4).Infof("Node Allocatable resources: %+v", nodeAllocatable)
|
||||
// Create top level cgroups for all pods if necessary.
|
||||
if nc.EnforceNodeAllocatable.Has(nodeAllocatableEnforcementKey) {
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: CgroupName(defaultNodeAllocatableCgroupName),
|
||||
ResourceParameters: getCgroupConfig(nodeAllocatable),
|
||||
}
|
||||
glog.V(4).Infof("Updating Node Allocatable cgroup with %d cpu shares and %d bytes of memory", cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
|
||||
if err := cgroupManager.Update(cgroupConfig); err != nil {
|
||||
glog.Errorf("Failed to create %q cgroup and apply limits")
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Now apply kube reserved and system reserved limits if required.
|
||||
if nc.EnforceNodeAllocatable.Has(systemReservedEnforcementKey) {
|
||||
glog.V(2).Infof("Enforcing system reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
|
||||
if err := enforceExistingCgroup(cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
|
||||
return fmt.Errorf("failed to enforce System Reserved Cgroup Limits: %v", err)
|
||||
}
|
||||
}
|
||||
if nc.EnforceNodeAllocatable.Has(kubeReservedEnforcementKey) {
|
||||
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
|
||||
if err := enforceExistingCgroup(cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
|
||||
return fmt.Errorf("failed to enforce Kube Reserved Cgroup Limits: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
|
||||
cgroupConfig := &CgroupConfig{
|
||||
Name: CgroupName(cName),
|
||||
ResourceParameters: getCgroupConfig(rl),
|
||||
}
|
||||
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
|
||||
if !cgroupManager.Exists(cgroupConfig.Name) {
|
||||
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
|
||||
}
|
||||
if err := cgroupManager.Update(cgroupConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
|
||||
// TODO(vishh): Set CPU Quota if necessary.
|
||||
if rl == nil {
|
||||
return nil
|
||||
}
|
||||
var rc ResourceConfig
|
||||
if q, exists := rl[v1.ResourceMemory]; exists {
|
||||
// Memory is defined in bytes.
|
||||
val := q.Value()
|
||||
rc.Memory = &val
|
||||
}
|
||||
if q, exists := rl[v1.ResourceCPU]; exists {
|
||||
// CPU is defined in milli-cores.
|
||||
val := MilliCPUToShares(q.MilliValue())
|
||||
rc.CpuShares = &val
|
||||
}
|
||||
return &rc
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) getNodeAllocatableInternal(includeHardEviction bool) v1.ResourceList {
|
||||
var evictionReservation v1.ResourceList
|
||||
if includeHardEviction {
|
||||
evictionReservation = hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
|
||||
}
|
||||
result := make(v1.ResourceList)
|
||||
for k, v := range cm.capacity {
|
||||
value := *(v.Copy())
|
||||
if cm.NodeConfig.SystemReserved != nil {
|
||||
value.Sub(cm.NodeConfig.SystemReserved[k])
|
||||
}
|
||||
if cm.NodeConfig.KubeReserved != nil {
|
||||
value.Sub(cm.NodeConfig.KubeReserved[k])
|
||||
}
|
||||
if evictionReservation != nil {
|
||||
value.Sub(evictionReservation[k])
|
||||
}
|
||||
if value.Sign() < 0 {
|
||||
// Negative Allocatable resources don't make sense.
|
||||
value.Set(0)
|
||||
}
|
||||
result[k] = value
|
||||
}
|
||||
return result
|
||||
|
||||
}
|
||||
|
||||
// GetNodeAllocatable returns amount of compute resource available for pods.
|
||||
func (cm *containerManagerImpl) GetNodeAllocatable() v1.ResourceList {
|
||||
return cm.getNodeAllocatableInternal(!cm.NodeConfig.IgnoreHardEvictionThreshold)
|
||||
}
|
||||
|
||||
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
|
||||
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
|
||||
if len(thresholds) == 0 {
|
||||
return nil
|
||||
}
|
||||
ret := v1.ResourceList{}
|
||||
for _, threshold := range thresholds {
|
||||
if threshold.Operator != evictionapi.OpLessThan {
|
||||
continue
|
||||
}
|
||||
switch threshold.Signal {
|
||||
case evictionapi.SignalMemoryAvailable:
|
||||
memoryCapacity := capacity[v1.ResourceMemory]
|
||||
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
|
||||
ret[v1.ResourceMemory] = *value
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user