From b117a928a6c3f650931bdac02a41fca6680548c4 Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Wed, 7 Aug 2019 04:16:36 +0300 Subject: [PATCH] kubeadm: prevent bootstrap of nodes with known names If a Node name in the cluster is already taken and this Node is Ready, prevent TLS bootsrap on "kubeadm join" and exit early. This change requires that a new ClusterRole is granted to the "system:bootstrappers:kubeadm:default-node-token" group to be able get Nodes in the cluster. The same group already has access to obtain objects such as the KubeletConfiguration and kubeadm's ClusterConfiguration. The motivation of this change is to prevent undefined behavior and the potential control-plane breakdown if such a cluster is racing to have two nodes with the same name for long periods of time. The following values are validated in the following precedence from lower to higher: - actual hostname - NodeRegistration.Name (or "--node-name") from JoinConfiguration - "--hostname-override" passed via kubeletExtraArgs If the user decides to not let kubeadm know about a custom node name and to instead override the hostname from a kubelet systemd unit file, kubeadm will not be able to detect the problem. --- .../app/cmd/phases/init/bootstraptoken.go | 4 ++ cmd/kubeadm/app/cmd/phases/join/BUILD | 3 ++ cmd/kubeadm/app/cmd/phases/join/kubelet.go | 25 +++++++++++ .../bootstraptoken/node/tlsbootstrap.go | 41 +++++++++++++++++++ cmd/kubeadm/app/phases/kubelet/flags.go | 39 ++++++++++++------ cmd/kubeadm/app/phases/kubelet/flags_test.go | 32 +++++++-------- 6 files changed, 116 insertions(+), 28 deletions(-) diff --git a/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go b/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go index bdfcabb2b7f..b89001063ad 100644 --- a/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go +++ b/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go @@ -86,6 +86,10 @@ func runBootstrapToken(c workflow.RunData) error { if err := nodebootstraptokenphase.UpdateOrCreateTokens(client, false, data.Cfg().BootstrapTokens); err != nil { return errors.Wrap(err, "error updating or creating token") } + // Create RBAC rules that makes the bootstrap tokens able to get nodes + if err := nodebootstraptokenphase.AllowBoostrapTokensToGetNodes(client); err != nil { + return errors.Wrap(err, "error allowing bootstrap tokens to get Nodes") + } // Create RBAC rules that makes the bootstrap tokens able to post CSRs if err := nodebootstraptokenphase.AllowBootstrapTokensToPostCSRs(client); err != nil { return errors.Wrap(err, "error allowing bootstrap tokens to post CSRs") diff --git a/cmd/kubeadm/app/cmd/phases/join/BUILD b/cmd/kubeadm/app/cmd/phases/join/BUILD index 4da71bfd685..28895fcc0c6 100644 --- a/cmd/kubeadm/app/cmd/phases/join/BUILD +++ b/cmd/kubeadm/app/cmd/phases/join/BUILD @@ -30,6 +30,9 @@ go_library( "//cmd/kubeadm/app/preflight:go_default_library", "//cmd/kubeadm/app/util/apiclient:go_default_library", "//cmd/kubeadm/app/util/kubeconfig:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", diff --git a/cmd/kubeadm/app/cmd/phases/join/kubelet.go b/cmd/kubeadm/app/cmd/phases/join/kubelet.go index add3ed4d8f8..a7e043a6849 100644 --- a/cmd/kubeadm/app/cmd/phases/join/kubelet.go +++ b/cmd/kubeadm/app/cmd/phases/join/kubelet.go @@ -22,6 +22,9 @@ import ( "github.com/lithammer/dedent" "github.com/pkg/errors" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" @@ -128,6 +131,28 @@ func runKubeletStartJoinPhase(c workflow.RunData) (returnErr error) { return errors.Errorf("couldn't create client from kubeconfig file %q", bootstrapKubeConfigFile) } + // Obtain the name of this Node. + nodeName, _, err := kubeletphase.GetNodeNameAndHostname(&cfg.NodeRegistration) + if err != nil { + klog.Warning(err) + } + + // Make sure to exit before TLS bootstrap if a Node with the same name exist in the cluster + // and it has the "Ready" status. + // A new Node with the same name as an existing control-plane Node can cause undefined + // behavior and ultimately control-plane failure. + klog.V(1).Infof("[kubelet-start] Checking for an existing Node in the cluster with name %q and status %q", nodeName, v1.NodeReady) + node, err := bootstrapClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "cannot get Node %q", nodeName) + } + for _, cond := range node.Status.Conditions { + if cond.Type == v1.NodeReady { + return errors.Errorf("a Node with name %q and status %q already exists in the cluster. "+ + "You must delete the existing Node or change the name of this new joining Node", nodeName, v1.NodeReady) + } + } + // Configure the kubelet. In this short timeframe, kubeadm is trying to stop/restart the kubelet // Try to stop the kubelet service so no race conditions occur when configuring it klog.V(1).Infoln("[kubelet-start] Stopping the kubelet") diff --git a/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go b/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go index 293209a9a00..e190079a947 100644 --- a/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go +++ b/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go @@ -32,6 +32,8 @@ const ( NodeBootstrapperClusterRoleName = "system:node-bootstrapper" // NodeKubeletBootstrap defines the name of the ClusterRoleBinding that lets kubelets post CSRs NodeKubeletBootstrap = "kubeadm:kubelet-bootstrap" + // GetNodesClusterRoleName defines the name of the ClusterRole and ClusterRoleBinding to get nodes + GetNodesClusterRoleName = "kubeadm:get-nodes" // CSRAutoApprovalClusterRoleName defines the name of the auto-bootstrapped ClusterRole for making the csrapprover controller auto-approve the CSR // TODO: This value should be defined in an other, generic authz package instead of here @@ -67,6 +69,45 @@ func AllowBootstrapTokensToPostCSRs(client clientset.Interface) error { }) } +// AllowBoostrapTokensToGetNodes creates RBAC rules to allow Node Bootstrap Tokens to list nodes +func AllowBoostrapTokensToGetNodes(client clientset.Interface) error { + fmt.Println("[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes") + + if err := apiclient.CreateOrUpdateClusterRole(client, &rbac.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: GetNodesClusterRoleName, + Namespace: metav1.NamespaceSystem, + }, + Rules: []rbac.PolicyRule{ + { + Verbs: []string{"get"}, + APIGroups: []string{""}, + Resources: []string{"nodes"}, + }, + }, + }); err != nil { + return err + } + + return apiclient.CreateOrUpdateClusterRoleBinding(client, &rbac.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: GetNodesClusterRoleName, + Namespace: metav1.NamespaceSystem, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "ClusterRole", + Name: GetNodesClusterRoleName, + }, + Subjects: []rbac.Subject{ + { + Kind: rbac.GroupKind, + Name: constants.NodeBootstrapTokenAuthGroup, + }, + }, + }) +} + // AutoApproveNodeBootstrapTokens creates RBAC rules in a way that makes Node Bootstrap Tokens' CSR auto-approved by the csrapprover controller func AutoApproveNodeBootstrapTokens(client clientset.Interface) error { fmt.Println("[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token") diff --git a/cmd/kubeadm/app/phases/kubelet/flags.go b/cmd/kubeadm/app/phases/kubelet/flags.go index 14f7317be12..0c370efe2ae 100644 --- a/cmd/kubeadm/app/phases/kubelet/flags.go +++ b/cmd/kubeadm/app/phases/kubelet/flags.go @@ -41,17 +41,29 @@ type kubeletFlagsOpts struct { registerTaintsUsingFlags bool execer utilsexec.Interface isServiceActiveFunc func(string) (bool, error) - defaultHostname string +} + +// GetNodeNameAndHostname obtains the name for this Node using the following precedence +// (from lower to higher): +// - actual hostname +// - NodeRegistrationOptions.Name (same as "--node-name" passed to "kubeadm init/join") +// - "hostname-overide" flag in NodeRegistrationOptions.KubeletExtraArgs +// It also returns the hostname or an error if getting the hostname failed. +func GetNodeNameAndHostname(cfg *kubeadmapi.NodeRegistrationOptions) (string, string, error) { + hostname, err := kubeadmutil.GetHostname("") + nodeName := hostname + if cfg.Name != "" { + nodeName = cfg.Name + } + if name, ok := cfg.KubeletExtraArgs["hostname-override"]; ok { + nodeName = name + } + return nodeName, hostname, err } // WriteKubeletDynamicEnvFile writes an environment file with dynamic flags to the kubelet. // Used at "kubeadm init" and "kubeadm join" time. func WriteKubeletDynamicEnvFile(cfg *kubeadmapi.ClusterConfiguration, nodeReg *kubeadmapi.NodeRegistrationOptions, registerTaintsUsingFlags bool, kubeletDir string) error { - hostName, err := kubeadmutil.GetHostname("") - if err != nil { - return err - } - flagOpts := kubeletFlagsOpts{ nodeRegOpts: nodeReg, featureGates: cfg.FeatureGates, @@ -65,7 +77,6 @@ func WriteKubeletDynamicEnvFile(cfg *kubeadmapi.ClusterConfiguration, nodeReg *k } return initSystem.ServiceIsActive(name), nil }, - defaultHostname: hostName, } stringMap := buildKubeletArgMap(flagOpts) argList := kubeadmutil.BuildArgumentListFromMap(stringMap, nodeReg.KubeletExtraArgs) @@ -113,15 +124,19 @@ func buildKubeletArgMap(opts kubeletFlagsOpts) map[string]string { kubeletFlags["resolv-conf"] = "/run/systemd/resolve/resolv.conf" } - // Make sure the node name we're passed will work with Kubelet - if opts.nodeRegOpts.Name != "" && opts.nodeRegOpts.Name != opts.defaultHostname { - klog.V(1).Infof("setting kubelet hostname-override to %q", opts.nodeRegOpts.Name) - kubeletFlags["hostname-override"] = opts.nodeRegOpts.Name + // Pass the "--hostname-override" flag to the kubelet only if it's different from the hostname + nodeName, hostname, err := GetNodeNameAndHostname(opts.nodeRegOpts) + if err != nil { + klog.Warning(err) + } + if nodeName != hostname { + klog.V(1).Infof("setting kubelet hostname-override to %q", nodeName) + kubeletFlags["hostname-override"] = nodeName } // TODO: Conditionally set `--cgroup-driver` to either `systemd` or `cgroupfs` for CRI other than Docker - // TODO: The following code should be remvoved after dual-stack is GA. + // TODO: The following code should be removed after dual-stack is GA. // Note: The user still retains the ability to explicitly set feature-gates and that value will overwrite this base value. if enabled, present := opts.featureGates[features.IPv6DualStack]; present { kubeletFlags["feature-gates"] = fmt.Sprintf("%s=%t", features.IPv6DualStack, enabled) diff --git a/cmd/kubeadm/app/phases/kubelet/flags_test.go b/cmd/kubeadm/app/phases/kubelet/flags_test.go index d9e7190f19d..551aa5df13f 100644 --- a/cmd/kubeadm/app/phases/kubelet/flags_test.go +++ b/cmd/kubeadm/app/phases/kubelet/flags_test.go @@ -109,7 +109,6 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/dockershim.sock", - Name: "foo", Taints: []v1.Taint{ // This should be ignored as registerTaintsUsingFlags is false { Key: "foo", @@ -120,14 +119,13 @@ func TestBuildKubeletArgMap(t *testing.T) { }, execer: errCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "network-plugin": "cni", }, }, { - name: "nodeRegOpts.Name != default hostname", + name: "hostname override from NodeRegistrationOptions.Name", opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/dockershim.sock", @@ -135,7 +133,21 @@ func TestBuildKubeletArgMap(t *testing.T) { }, execer: errCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "default", + }, + expected: map[string]string{ + "network-plugin": "cni", + "hostname-override": "override-name", + }, + }, + { + name: "hostname override from NodeRegistrationOptions.KubeletExtraArgs", + opts: kubeletFlagsOpts{ + nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ + CRISocket: "/var/run/dockershim.sock", + KubeletExtraArgs: map[string]string{"hostname-override": "override-name"}, + }, + execer: errCgroupExecer, + isServiceActiveFunc: serviceIsNotActiveFunc, }, expected: map[string]string{ "network-plugin": "cni", @@ -147,11 +159,9 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/dockershim.sock", - Name: "foo", }, execer: systemdCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "network-plugin": "cni", @@ -163,11 +173,9 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/dockershim.sock", - Name: "foo", }, execer: cgroupfsCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "network-plugin": "cni", @@ -179,11 +187,9 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/containerd.sock", - Name: "foo", }, execer: cgroupfsCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "container-runtime": "remote", @@ -195,7 +201,6 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/containerd.sock", - Name: "foo", Taints: []v1.Taint{ { Key: "foo", @@ -212,7 +217,6 @@ func TestBuildKubeletArgMap(t *testing.T) { registerTaintsUsingFlags: true, execer: cgroupfsCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "container-runtime": "remote", @@ -225,11 +229,9 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/containerd.sock", - Name: "foo", }, execer: cgroupfsCgroupExecer, isServiceActiveFunc: serviceIsActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "container-runtime": "remote", @@ -242,12 +244,10 @@ func TestBuildKubeletArgMap(t *testing.T) { opts: kubeletFlagsOpts{ nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{ CRISocket: "/var/run/dockershim.sock", - Name: "foo", }, pauseImage: "gcr.io/pause:3.1", execer: cgroupfsCgroupExecer, isServiceActiveFunc: serviceIsNotActiveFunc, - defaultHostname: "foo", }, expected: map[string]string{ "network-plugin": "cni",