From 0c6c6224342709001bf527f03f4462d918837b04 Mon Sep 17 00:00:00 2001 From: Michael Taufen Date: Fri, 2 Sep 2016 11:13:02 -0700 Subject: [PATCH] Fail kubelet creation if swap enabled Provides an opt-in flag, --experimental-fail-swap-on (and corresponding KubeletConfiguration value, ExperimentalFailSwapOn), which is false by default. --- cmd/kubelet/app/options/options.go | 3 ++ cmd/kubelet/app/server.go | 27 ++++++++------ hack/verify-flags/known-flags.txt | 1 + pkg/apis/componentconfig/types.go | 3 ++ pkg/apis/componentconfig/v1alpha1/types.go | 3 ++ .../v1alpha1/zz_generated.conversion.go | 2 ++ .../v1alpha1/zz_generated.deepcopy.go | 1 + .../componentconfig/zz_generated.deepcopy.go | 1 + pkg/generated/openapi/zz_generated.openapi.go | 18 ++++++++-- pkg/kubelet/cm/container_manager_linux.go | 36 ++++++++++++++++++- 10 files changed, 81 insertions(+), 14 deletions(-) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 7b0e178f766..1596f0486fe 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -78,6 +78,9 @@ func NewKubeletServer() *KubeletServer { // AddFlags adds flags for a specific KubeletServer to the specified FlagSet func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { + // TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled. + fs.BoolVar(&s.ExperimentalFailSwapOn, "experimental-fail-swap-on", s.ExperimentalFailSwapOn, "Makes the Kubelet fail to start if swap is enabled on the node. This is a temporary opton to maintain legacy behavior, failing due to swap enabled will happen by default in v1.6.") + fs.Var(&s.KubeConfig, "kubeconfig", "Path to a kubeconfig file, specifying how to connect to the API server. --api-servers will be used for the location unless --require-kubeconfig is set.") fs.BoolVar(&s.RequireKubeConfig, "require-kubeconfig", s.RequireKubeConfig, "If true the Kubelet will exit if there are configuration errors, and will ignore the value of --api-servers in favor of the server defined in the kubeconfig file.") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 21fae7645d6..43bc01686c8 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -434,17 +434,22 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) { if s.SystemCgroups != "" && s.CgroupRoot == "" { return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified") } - kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{ - RuntimeCgroupsName: s.RuntimeCgroups, - SystemCgroupsName: s.SystemCgroups, - KubeletCgroupsName: s.KubeletCgroups, - ContainerRuntime: s.ContainerRuntime, - CgroupsPerQOS: s.CgroupsPerQOS, - CgroupRoot: s.CgroupRoot, - CgroupDriver: s.CgroupDriver, - ProtectKernelDefaults: s.ProtectKernelDefaults, - EnableCRI: s.EnableCRI, - }) + kubeDeps.ContainerManager, err = cm.NewContainerManager( + kubeDeps.Mounter, + kubeDeps.CAdvisorInterface, + cm.NodeConfig{ + RuntimeCgroupsName: s.RuntimeCgroups, + SystemCgroupsName: s.SystemCgroups, + KubeletCgroupsName: s.KubeletCgroups, + ContainerRuntime: s.ContainerRuntime, + CgroupsPerQOS: s.CgroupsPerQOS, + CgroupRoot: s.CgroupRoot, + CgroupDriver: s.CgroupDriver, + ProtectKernelDefaults: s.ProtectKernelDefaults, + EnableCRI: s.EnableCRI, + }, + s.ExperimentalFailSwapOn) + if err != nil { return err } diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index be05f988058..2feaf7fda6c 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -202,6 +202,7 @@ external-etcd-keyfile external-hostname external-ip extra-peer-dirs +experimental-fail-swap-on failover-timeout failure-domains fake-clientset diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 8ff5f02fc71..a7138db9082 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -465,6 +465,9 @@ type KubeletConfiguration struct { // Enable Container Runtime Interface (CRI) integration. // +optional EnableCRI bool `json:"enableCRI,omitempty"` + // TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled. + // Tells the Kubelet to fail to start if swap is enabled on the node. + ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"` } type KubeletAuthorizationMode string diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index b662a20957c..ba56ae54a3b 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -504,6 +504,9 @@ type KubeletConfiguration struct { // Enable Container Runtime Interface (CRI) integration. // +optional EnableCRI bool `json:"enableCRI,omitempty"` + // TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled. + // Tells the Kubelet to fail to start if swap is enabled on the node. + ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"` } type KubeletAuthorizationMode string diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index 670b9da7888..38902344433 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -407,6 +407,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls)) out.FeatureGates = in.FeatureGates out.EnableCRI = in.EnableCRI + out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn return nil } @@ -575,6 +576,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls)) out.FeatureGates = in.FeatureGates out.EnableCRI = in.EnableCRI + out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn return nil } diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go index eb01bdceb49..a2eb4fe2ffc 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go @@ -461,6 +461,7 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c * } out.FeatureGates = in.FeatureGates out.EnableCRI = in.EnableCRI + out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn return nil } } diff --git a/pkg/apis/componentconfig/zz_generated.deepcopy.go b/pkg/apis/componentconfig/zz_generated.deepcopy.go index f4b8ad3020b..be8f3db09c4 100644 --- a/pkg/apis/componentconfig/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/zz_generated.deepcopy.go @@ -392,6 +392,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface } out.FeatureGates = in.FeatureGates out.EnableCRI = in.EnableCRI + out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn return nil } } diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 4f93f4e7ebb..9095ac2311c 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -3003,8 +3003,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{ Format: "", }, }, + "experimentalFailSwapOn": { + SchemaProps: spec.SchemaProps{ + Description: "Tells the Kubelet to fail to start if swap is enabled on the node.", + Type: []string{"boolean"}, + Format: "", + }, + }, }, - Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"}, + Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"}, }, }, Dependencies: []string{ @@ -14757,8 +14764,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{ Format: "", }, }, + "experimentalFailSwapOn": { + SchemaProps: spec.SchemaProps{ + Description: "Tells the Kubelet to fail to start if swap is enabled on the node.", + Type: []string{"boolean"}, + Format: "", + }, + }, }, - Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"}, + Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"}, }, }, Dependencies: []string{ diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 82cfaeae7df..83216cf76ae 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -19,9 +19,11 @@ limitations under the License. package cm import ( + "bufio" "fmt" "io/ioutil" "os" + "os/exec" "path" "strconv" "sync" @@ -164,12 +166,44 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) { // TODO(vmarmol): Add limits to the system containers. // Takes the absolute name of the specified containers. // Empty container name disables use of the specified container. -func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig) (ContainerManager, error) { +func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) { subsystems, err := GetCgroupSubsystems() if err != nil { return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err) } + // Check whether swap is enabled. The Kubelet does not support running with swap enabled. + cmd := exec.Command("cat", "/proc/swaps") + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + if err := cmd.Start(); err != nil { + return nil, err + } + var buf []string + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { // Splits on newlines by default + buf = append(buf, scanner.Text()) + } + if err := cmd.Wait(); err != nil { // Clean up + return nil, err + } + + // TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled. + // Running with swap enabled should be considered an error, but in order to maintain legacy + // behavior we have to require an opt-in to this error for a period of time. + + // If there is more than one line (table headers) in /proc/swaps, swap is enabled and we should error out. + if len(buf) > 1 { + if failSwapOn { + return nil, fmt.Errorf("Running with swap on is not supported, please disable swap! /proc/swaps contained: %v", buf) + } + glog.Warningf("Running with swap on is not supported, please disable swap! " + + "This will be a fatal error by default starting in K8s v1.6! " + + "In the meantime, you can opt-in to making this a fatal error by enabling --experimental-fail-swap-on.") + } + // Check if Cgroup-root actually exists on the node if nodeConfig.CgroupsPerQOS { // this does default to / when enabled, but this tests against regressions.