mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #31996 from mtaufen/detect-swap-enabled
Automatic merge from submit-queue Fail kubelet creation if swap enabled and configured with memory eviction thresholds /cc @vishh @derekwaynecarr **Release note**: ``` This adds an opt-in Kubelet flag (--fail-swap-on) that causes the Kubelet to fail to start if swap is enabled on the node. This is a temporary opt-in, and the Kubelet will by default fail with swap enabled starting in 1.6.0. The KubeletConfiguration equivalent to the flag is FailSwapOn. ```
This commit is contained in:
commit
31fbb771a2
@ -78,6 +78,9 @@ func NewKubeletServer() *KubeletServer {
|
|||||||
|
|
||||||
// AddFlags adds flags for a specific KubeletServer to the specified FlagSet
|
// AddFlags adds flags for a specific KubeletServer to the specified FlagSet
|
||||||
func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||||
|
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||||
|
fs.BoolVar(&s.ExperimentalFailSwapOn, "experimental-fail-swap-on", s.ExperimentalFailSwapOn, "Makes the Kubelet fail to start if swap is enabled on the node. This is a temporary opton to maintain legacy behavior, failing due to swap enabled will happen by default in v1.6.")
|
||||||
|
|
||||||
fs.Var(&s.KubeConfig, "kubeconfig", "Path to a kubeconfig file, specifying how to connect to the API server. --api-servers will be used for the location unless --require-kubeconfig is set.")
|
fs.Var(&s.KubeConfig, "kubeconfig", "Path to a kubeconfig file, specifying how to connect to the API server. --api-servers will be used for the location unless --require-kubeconfig is set.")
|
||||||
fs.BoolVar(&s.RequireKubeConfig, "require-kubeconfig", s.RequireKubeConfig, "If true the Kubelet will exit if there are configuration errors, and will ignore the value of --api-servers in favor of the server defined in the kubeconfig file.")
|
fs.BoolVar(&s.RequireKubeConfig, "require-kubeconfig", s.RequireKubeConfig, "If true the Kubelet will exit if there are configuration errors, and will ignore the value of --api-servers in favor of the server defined in the kubeconfig file.")
|
||||||
|
|
||||||
|
@ -434,17 +434,22 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
|
|||||||
if s.SystemCgroups != "" && s.CgroupRoot == "" {
|
if s.SystemCgroups != "" && s.CgroupRoot == "" {
|
||||||
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
|
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
|
||||||
}
|
}
|
||||||
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{
|
kubeDeps.ContainerManager, err = cm.NewContainerManager(
|
||||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
kubeDeps.Mounter,
|
||||||
SystemCgroupsName: s.SystemCgroups,
|
kubeDeps.CAdvisorInterface,
|
||||||
KubeletCgroupsName: s.KubeletCgroups,
|
cm.NodeConfig{
|
||||||
ContainerRuntime: s.ContainerRuntime,
|
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||||
CgroupsPerQOS: s.CgroupsPerQOS,
|
SystemCgroupsName: s.SystemCgroups,
|
||||||
CgroupRoot: s.CgroupRoot,
|
KubeletCgroupsName: s.KubeletCgroups,
|
||||||
CgroupDriver: s.CgroupDriver,
|
ContainerRuntime: s.ContainerRuntime,
|
||||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
CgroupsPerQOS: s.CgroupsPerQOS,
|
||||||
EnableCRI: s.EnableCRI,
|
CgroupRoot: s.CgroupRoot,
|
||||||
})
|
CgroupDriver: s.CgroupDriver,
|
||||||
|
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||||
|
EnableCRI: s.EnableCRI,
|
||||||
|
},
|
||||||
|
s.ExperimentalFailSwapOn)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -202,6 +202,7 @@ external-etcd-keyfile
|
|||||||
external-hostname
|
external-hostname
|
||||||
external-ip
|
external-ip
|
||||||
extra-peer-dirs
|
extra-peer-dirs
|
||||||
|
experimental-fail-swap-on
|
||||||
failover-timeout
|
failover-timeout
|
||||||
failure-domains
|
failure-domains
|
||||||
fake-clientset
|
fake-clientset
|
||||||
|
@ -465,6 +465,9 @@ type KubeletConfiguration struct {
|
|||||||
// Enable Container Runtime Interface (CRI) integration.
|
// Enable Container Runtime Interface (CRI) integration.
|
||||||
// +optional
|
// +optional
|
||||||
EnableCRI bool `json:"enableCRI,omitempty"`
|
EnableCRI bool `json:"enableCRI,omitempty"`
|
||||||
|
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||||
|
// Tells the Kubelet to fail to start if swap is enabled on the node.
|
||||||
|
ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type KubeletAuthorizationMode string
|
type KubeletAuthorizationMode string
|
||||||
|
@ -504,6 +504,9 @@ type KubeletConfiguration struct {
|
|||||||
// Enable Container Runtime Interface (CRI) integration.
|
// Enable Container Runtime Interface (CRI) integration.
|
||||||
// +optional
|
// +optional
|
||||||
EnableCRI bool `json:"enableCRI,omitempty"`
|
EnableCRI bool `json:"enableCRI,omitempty"`
|
||||||
|
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||||
|
// Tells the Kubelet to fail to start if swap is enabled on the node.
|
||||||
|
ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type KubeletAuthorizationMode string
|
type KubeletAuthorizationMode string
|
||||||
|
@ -407,6 +407,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
|||||||
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
||||||
out.FeatureGates = in.FeatureGates
|
out.FeatureGates = in.FeatureGates
|
||||||
out.EnableCRI = in.EnableCRI
|
out.EnableCRI = in.EnableCRI
|
||||||
|
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -575,6 +576,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
|||||||
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
||||||
out.FeatureGates = in.FeatureGates
|
out.FeatureGates = in.FeatureGates
|
||||||
out.EnableCRI = in.EnableCRI
|
out.EnableCRI = in.EnableCRI
|
||||||
|
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,6 +461,7 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
|||||||
}
|
}
|
||||||
out.FeatureGates = in.FeatureGates
|
out.FeatureGates = in.FeatureGates
|
||||||
out.EnableCRI = in.EnableCRI
|
out.EnableCRI = in.EnableCRI
|
||||||
|
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -392,6 +392,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
|||||||
}
|
}
|
||||||
out.FeatureGates = in.FeatureGates
|
out.FeatureGates = in.FeatureGates
|
||||||
out.EnableCRI = in.EnableCRI
|
out.EnableCRI = in.EnableCRI
|
||||||
|
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3003,8 +3003,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||||||
Format: "",
|
Format: "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"experimentalFailSwapOn": {
|
||||||
|
SchemaProps: spec.SchemaProps{
|
||||||
|
Description: "Tells the Kubelet to fail to start if swap is enabled on the node.",
|
||||||
|
Type: []string{"boolean"},
|
||||||
|
Format: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"},
|
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Dependencies: []string{
|
Dependencies: []string{
|
||||||
@ -14757,8 +14764,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||||||
Format: "",
|
Format: "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"experimentalFailSwapOn": {
|
||||||
|
SchemaProps: spec.SchemaProps{
|
||||||
|
Description: "Tells the Kubelet to fail to start if swap is enabled on the node.",
|
||||||
|
Type: []string{"boolean"},
|
||||||
|
Format: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"},
|
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Dependencies: []string{
|
Dependencies: []string{
|
||||||
|
@ -19,9 +19,11 @@ limitations under the License.
|
|||||||
package cm
|
package cm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
@ -164,12 +166,44 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
|
|||||||
// TODO(vmarmol): Add limits to the system containers.
|
// TODO(vmarmol): Add limits to the system containers.
|
||||||
// Takes the absolute name of the specified containers.
|
// Takes the absolute name of the specified containers.
|
||||||
// Empty container name disables use of the specified container.
|
// Empty container name disables use of the specified container.
|
||||||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig) (ContainerManager, error) {
|
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
|
||||||
subsystems, err := GetCgroupSubsystems()
|
subsystems, err := GetCgroupSubsystems()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
|
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check whether swap is enabled. The Kubelet does not support running with swap enabled.
|
||||||
|
cmd := exec.Command("cat", "/proc/swaps")
|
||||||
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var buf []string
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
for scanner.Scan() { // Splits on newlines by default
|
||||||
|
buf = append(buf, scanner.Text())
|
||||||
|
}
|
||||||
|
if err := cmd.Wait(); err != nil { // Clean up
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||||
|
// Running with swap enabled should be considered an error, but in order to maintain legacy
|
||||||
|
// behavior we have to require an opt-in to this error for a period of time.
|
||||||
|
|
||||||
|
// If there is more than one line (table headers) in /proc/swaps, swap is enabled and we should error out.
|
||||||
|
if len(buf) > 1 {
|
||||||
|
if failSwapOn {
|
||||||
|
return nil, fmt.Errorf("Running with swap on is not supported, please disable swap! /proc/swaps contained: %v", buf)
|
||||||
|
}
|
||||||
|
glog.Warningf("Running with swap on is not supported, please disable swap! " +
|
||||||
|
"This will be a fatal error by default starting in K8s v1.6! " +
|
||||||
|
"In the meantime, you can opt-in to making this a fatal error by enabling --experimental-fail-swap-on.")
|
||||||
|
}
|
||||||
|
|
||||||
// Check if Cgroup-root actually exists on the node
|
// Check if Cgroup-root actually exists on the node
|
||||||
if nodeConfig.CgroupsPerQOS {
|
if nodeConfig.CgroupsPerQOS {
|
||||||
// this does default to / when enabled, but this tests against regressions.
|
// this does default to / when enabled, but this tests against regressions.
|
||||||
|
Loading…
Reference in New Issue
Block a user