adding kubelet flags for node allocatable phase 2

Signed-off-by: Vishnu Kannan <vishnuk@google.com>
This commit is contained in:
Vishnu Kannan 2017-02-09 12:13:28 -08:00 committed by Vishnu kannan
parent 9a1f0574a4
commit 70e340b045
11 changed files with 361 additions and 73 deletions

View File

@ -225,8 +225,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.Float64Var(&s.ChaosChance, "chaos-chance", s.ChaosChance, "If > 0.0, introduce random client errors and latency. Intended for testing. [default=0.0]")
fs.BoolVar(&s.Containerized, "containerized", s.Containerized, "Experimental support for running kubelet in a container. Intended for testing. [default=false]")
fs.Int64Var(&s.MaxOpenFiles, "max-open-files", s.MaxOpenFiles, "Number of files that can be opened by Kubelet process. [default=1000000]")
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.BoolVar(&s.RegisterSchedulable, "register-schedulable", s.RegisterSchedulable, "Register the node as schedulable. Won't have any effect if register-node is false. [default=true]")
fs.MarkDeprecated("register-schedulable", "will be removed in a future version")
fs.Var(utiltaints.NewTaintsVar(&s.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma seperated \"<key>=<value>:<effect>\"). No-op if register-node is false.")
@ -264,4 +263,13 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.RemoteImageEndpoint, "image-service-endpoint", s.RemoteImageEndpoint, "[Experimental] The unix socket endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. The endpoint is used only when CRI integration is enabled (--enable-cri)")
fs.BoolVar(&s.ExperimentalCheckNodeCapabilitiesBeforeMount, "experimental-check-node-capabilities-before-mount", s.ExperimentalCheckNodeCapabilitiesBeforeMount, "[Experimental] if set true, the kubelet will check the underlying node for required componenets (binaries, etc.) before performing the mount")
// Node Allocatable Flags
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.StringSliceVar(&s.EnforceNodeAllocatable, "enforce-node-allocatable", s.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default='']")
fs.StringVar(&s.SystemReservedCgroup, "system-reserved-cgroup", s.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&s.KubeReservedCgroup, "kube-reserved-cgroup", s.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-node-allocatable-ignore-eviction-threshold", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]")
}

View File

@ -40,6 +40,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/server/healthz"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -469,6 +470,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
CgroupDriver: s.CgroupDriver,
ProtectKernelDefaults: s.ProtectKernelDefaults,
EnableCRI: s.EnableCRI,
NodeAllocatableConfig: cm.NodeAllocatableConfig{
KubeReservedCgroupName: s.KubeReservedCgroup,
SystemReservedCgroupName: s.SystemReservedCgroup,
EnforceNodeAllocatable: sets.NewString(s.EnforceNodeAllocatable...),
KubeReserved: kubeReserved,
SystemReserved: systemReserved,
HardEvictionThresholds: hardEvictionThresholds,
},
},
s.ExperimentalFailSwapOn)

View File

@ -442,16 +442,6 @@ type KubeletConfiguration struct {
// manage attachment/detachment of volumes scheduled to this node, and
// disables kubelet from executing any attach/detach operations
EnableControllerAttachDetach bool
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved ConfigurationMap
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved ConfigurationMap
// Default behaviour for kernel tuning
ProtectKernelDefaults bool
// If true, Kubelet ensures a set of iptables rules are present on host.
@ -485,6 +475,32 @@ type KubeletConfiguration struct {
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool
/* following flags are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved ConfigurationMap
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved ConfigurationMap
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
SystemReservedCgroup string
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
KubeReservedCgroup string
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
EnforceNodeAllocatable []string
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool
}
type KubeletAuthorizationMode string

View File

@ -48,7 +48,12 @@ const (
defaultIPTablesDropBit = 15
)
var zeroDuration = metav1.Duration{}
var (
zeroDuration = metav1.Duration{}
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
// TODO: Set the default to "pods" once cgroups per qos is turned on by default.
defaultNodeAllocatableEnforcement = []string{}
)
func addDefaultingFuncs(scheme *kruntime.Scheme) error {
RegisterDefaults(scheme)
@ -401,6 +406,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.CgroupDriver == "" {
obj.CgroupDriver = "cgroupfs"
}
if obj.EnforceNodeAllocatable == nil {
obj.EnforceNodeAllocatable = defaultNodeAllocatableEnforcement
}
if obj.EnableCRI == nil {
obj.EnableCRI = boolVar(true)
}

View File

@ -478,16 +478,6 @@ type KubeletConfiguration struct {
// manage attachment/detachment of volumes scheduled to this node, and
// disables kubelet from executing any attach/detach operations
EnableControllerAttachDetach *bool `json:"enableControllerAttachDetach"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved map[string]string `json:"systemReserved"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved map[string]string `json:"kubeReserved"`
// Default behaviour for kernel tuning
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
// If true, Kubelet ensures a set of iptables rules are present on host.
@ -522,6 +512,33 @@ type KubeletConfiguration struct {
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"`
/* following flags are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved map[string]string `json:"systemReserved"`
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// that describe resources reserved for kubernetes system components.
// Currently only cpu and memory are supported. [default=none]
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
KubeReserved map[string]string `json:"kubeReserved"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
SystemReservedCgroup string `json:"systemReservedCgroup,omitempty"`
// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
KubeReservedCgroup string `json:"kubeReservedCgroup,omitempty"`
// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform.
// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
EnforceNodeAllocatable []string `json:"enforceNodeAllocatable,omitempty"`
// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable.
// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.
ExperimentalNodeAllocatableIgnoreEvictionThreshold bool `json:"experimentalNodeAllocatableIgnoreEvictionThreshold,omitempty"`
}
type KubeletAuthorizationMode string

View File

@ -396,8 +396,6 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
if err := v1.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
}
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := v1.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
@ -416,6 +414,12 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved))
out.SystemReservedCgroup = in.SystemReservedCgroup
out.KubeReservedCgroup = in.KubeReservedCgroup
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
return nil
}
@ -570,8 +574,6 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
if err := v1.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
}
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := v1.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
@ -590,6 +592,12 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved))
out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved))
out.SystemReservedCgroup = in.SystemReservedCgroup
out.KubeReservedCgroup = in.KubeReservedCgroup
out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable))
out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold
return nil
}

View File

@ -266,20 +266,6 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
*out = new(bool)
**out = **in
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.KubeReserved != nil {
in, out := &in.KubeReserved, &out.KubeReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.MakeIPTablesUtilChains != nil {
in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains
*out = new(bool)
@ -305,6 +291,25 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
*out = new(bool)
**out = **in
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.KubeReserved != nil {
in, out := &in.KubeReserved, &out.KubeReserved
*out = make(map[string]string)
for key, val := range *in {
(*out)[key] = val
}
}
if in.EnforceNodeAllocatable != nil {
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
*out = make([]string, len(*in))
copy(*out, *in)
}
return nil
}
}

View File

@ -177,6 +177,11 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
(*out)[key] = val
}
}
if in.AllowedUnsafeSysctls != nil {
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.SystemReserved != nil {
in, out := &in.SystemReserved, &out.SystemReserved
*out = make(ConfigurationMap)
@ -191,8 +196,8 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
(*out)[key] = val
}
}
if in.AllowedUnsafeSysctls != nil {
in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls
if in.EnforceNodeAllocatable != nil {
in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable
*out = make([]string, len(*in))
copy(*out, *in)
}

View File

@ -13283,34 +13283,6 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
Format: "",
},
},
"systemReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"kubeReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"protectKernelDefaults": {
SchemaProps: spec.SchemaProps{
Description: "Default behaviour for kernel tuning",
@ -13388,8 +13360,71 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope
Format: "",
},
},
"systemReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
},
},
},
},
"kubeReserved": {
SchemaProps: spec.SchemaProps{
Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"systemReservedCgroup": {
SchemaProps: spec.SchemaProps{
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"string"},
Format: "",
},
},
"kubeReservedCgroup": {
SchemaProps: spec.SchemaProps{
Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"string"},
Format: "",
},
},
"enforceNodeAllocatable": {
SchemaProps: spec.SchemaProps{
Description: "This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"string"},
Format: "",
},
},
},
},
},
"experimentalNodeAllocatableIgnoreEvictionThreshold": {
SchemaProps: spec.SchemaProps{
Description: "This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.",
Type: []string{"boolean"},
Format: "",
},
},
},
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "systemReserved", "kubeReserved"},
},
},
Dependencies: []string{

View File

@ -16,7 +16,10 @@ limitations under the License.
package cm
import "k8s.io/kubernetes/pkg/api/v1"
import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/v1"
)
// Manages the containers running on a machine.
type ContainerManager interface {
@ -56,6 +59,16 @@ type NodeConfig struct {
CgroupDriver string
ProtectKernelDefaults bool
EnableCRI bool
NodeAllocatableConfig
}
type NodeAllocatableConfig struct {
KubeReservedCgroupName string
SystemReservedCgroupName string
EnforceNodeAllocatable sets.String
KubeReserved v1.ResourceList
SystemReserved v1.ResourceList
HardEvictionThresholds []evictionapi.Threshold
}
type Status struct {

View File

@ -0,0 +1,164 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const (
defaultNodeAllocatableCgroupName = "/kubepods"
nodeAllocatableEnforcementKey = "pods"
systemReservedEnforcementKey = "system-reserved"
kubeReservedEnforcementKey = "kube-reserved"
)
func createNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(defaultNodeAllocatableCgroupName),
}
if err := cgroupManager.Create(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup and apply limits")
return err
}
return nil
}
// Enforce Node Allocatable Cgroup settings.
func enforceNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
glog.V(4).Infof("Node Allocatable resources: %+v", nodeAllocatable)
// Create top level cgroups for all pods if necessary.
if nc.EnforceNodeAllocatable.Has(nodeAllocatableEnforcementKey) {
cgroupConfig := &CgroupConfig{
Name: CgroupName(defaultNodeAllocatableCgroupName),
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
glog.V(4).Infof("Updating Node Allocatable cgroup with %d cpu shares and %d bytes of memory", cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if err := cgroupManager.Update(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup and apply limits")
return err
}
}
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(systemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing system reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
return fmt.Errorf("failed to enforce System Reserved Cgroup Limits: %v", err)
}
}
if nc.EnforceNodeAllocatable.Has(kubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
return fmt.Errorf("failed to enforce Kube Reserved Cgroup Limits: %v", err)
}
}
return nil
}
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if !cgroupManager.Exists(cgroupConfig.Name) {
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
}
if err := cgroupManager.Update(cgroupConfig); err != nil {
return err
}
return nil
}
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
// TODO(vishh): Set CPU Quota if necessary.
if rl == nil {
return nil
}
var rc ResourceConfig
if q, exists := rl[v1.ResourceMemory]; exists {
// Memory is defined in bytes.
val := q.Value()
rc.Memory = &val
}
if q, exists := rl[v1.ResourceCPU]; exists {
// CPU is defined in milli-cores.
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
return &rc
}
func (cm *containerManagerImpl) getNodeAllocatableInternal(includeHardEviction bool) v1.ResourceList {
var evictionReservation v1.ResourceList
if includeHardEviction {
evictionReservation = hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
}
result := make(v1.ResourceList)
for k, v := range cm.capacity {
value := *(v.Copy())
if cm.NodeConfig.SystemReserved != nil {
value.Sub(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Sub(cm.NodeConfig.KubeReserved[k])
}
if evictionReservation != nil {
value.Sub(evictionReservation[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
}
result[k] = value
}
return result
}
// GetNodeAllocatable returns amount of compute resource available for pods.
func (cm *containerManagerImpl) GetNodeAllocatable() v1.ResourceList {
return cm.getNodeAllocatableInternal(!cm.NodeConfig.IgnoreHardEvictionThreshold)
}
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
if len(thresholds) == 0 {
return nil
}
ret := v1.ResourceList{}
for _, threshold := range thresholds {
if threshold.Operator != evictionapi.OpLessThan {
continue
}
switch threshold.Signal {
case evictionapi.SignalMemoryAvailable:
memoryCapacity := capacity[v1.ResourceMemory]
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
ret[v1.ResourceMemory] = *value
}
}
return ret
}