Merge pull request #51369 from luxas/kubeadm_poll_kubelet

Automatic merge from submit-queue (batch tested with PRs 51682, 51546, 51369, 50924, 51827) kubeadm: Detect kubelet readiness and error out if the kubelet is unhealthy **What this PR does / why we need it**: In order to improve the UX when the kubelet is unhealthy or stopped, or whatever, kubeadm now polls the kubelet's API after 40 and 60 seconds, and then performs an exponential backoff for a total of 155 seconds. If the kubelet endpoint is not returning `ok` by then, kubeadm gives up and exits. This will miligate at least 60% of our "[apiclient] Created API client, waiting for control plane to come up" issues in the kubeadm issue tracker 🎉, as kubeadm now informs the user what's wrong and also doesn't deadlock like before. Demo: ``` lucas@THEGOPHER:~/luxas/kubernetes$ sudo ./kubeadm init --skip-preflight-checks [kubeadm] WARNING: kubeadm is in beta, please do not use it for production clusters. [init] Using Kubernetes version: v1.7.4 [init] Using Authorization modes: [Node RBAC] [preflight] Skipping pre-flight checks [kubeadm] WARNING: starting in 1.8, tokens expire after 24 hours by default (if you require a non-expiring token use --token-ttl 0) [certificates] Generated ca certificate and key. [certificates] Generated apiserver certificate and key. [certificates] apiserver serving cert is signed for DNS names [thegopher kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.1.115] [certificates] Generated apiserver-kubelet-client certificate and key. [certificates] Generated sa key and public key. [certificates] Generated front-proxy-ca certificate and key. [certificates] Generated front-proxy-client certificate and key. [certificates] Valid certificates and keys now exist in "/etc/kubernetes/pki" [kubeconfig] Wrote KubeConfig file to disk: "admin.conf" [kubeconfig] Wrote KubeConfig file to disk: "kubelet.conf" [kubeconfig] Wrote KubeConfig file to disk: "controller-manager.conf" [kubeconfig] Wrote KubeConfig file to disk: "scheduler.conf" [controlplane] Wrote Static Pod manifest for component kube-apiserver to "/etc/kubernetes/manifests/kube-apiserver.yaml" [controlplane] Wrote Static Pod manifest for component kube-controller-manager to "/etc/kubernetes/manifests/kube-controller-manager.yaml" [controlplane] Wrote Static Pod manifest for component kube-scheduler to "/etc/kubernetes/manifests/kube-scheduler.yaml" [etcd] Wrote Static Pod manifest for a local etcd instance to "/etc/kubernetes/manifests/etcd.yaml" [init] Waiting for the kubelet to boot up the control plane as Static Pods from directory "/etc/kubernetes/manifests" [init] This often takes around a minute; or longer if the control plane images have to be pulled. [apiclient] All control plane components are healthy after 40.502199 seconds [markmaster] Will mark node thegopher as master by adding a label and a taint [markmaster] Master thegopher tainted and labelled with key/value: node-role.kubernetes.io/master="" [bootstraptoken] Using token: 5776d5.91e7ed14f9e274df [bootstraptoken] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials [bootstraptoken] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token [bootstraptoken] Creating the "cluster-info" ConfigMap in the "kube-public" namespace [uploadconfig] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace [addons] Applied essential addon: kube-dns [addons] Applied essential addon: kube-proxy Your Kubernetes master has initialized successfully! To start using your cluster, you need to run (as a regular user): mkdir -p $HOME/.kube sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/config You should now deploy a pod network to the cluster. Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at: http://kubernetes.io/docs/admin/addons/ You can now join any number of machines by running the following on each node as root: kubeadm join --token 5776d5.91e7ed14f9e274df 192.168.1.115:6443 --discovery-token-ca-cert-hash sha256:6f301ce8c3f5f6558090b2c3599d26d6fc94ffa3c3565ffac952f4f0c7a9b2a9 lucas@THEGOPHER:~/luxas/kubernetes$ sudo ./kubeadm reset [preflight] Running pre-flight checks [reset] Stopping the kubelet service [reset] Unmounting mounted directories in "/var/lib/kubelet" [reset] Removing kubernetes-managed containers [reset] Deleting contents of stateful directories: [/var/lib/kubelet /etc/cni/net.d /var/lib/dockershim /var/run/kubernetes /var/lib/etcd] [reset] Deleting contents of config directories: [/etc/kubernetes/manifests /etc/kubernetes/pki] [reset] Deleting files: [/etc/kubernetes/admin.conf /etc/kubernetes/kubelet.conf /etc/kubernetes/controller-manager.conf /etc/kubernetes/scheduler.conf] lucas@THEGOPHER:~/luxas/kubernetes$ sudo systemctl stop kubelet lucas@THEGOPHER:~/luxas/kubernetes$ sudo ./kubeadm init --skip-preflight-checks [kubeadm] WARNING: kubeadm is in beta, please do not use it for production clusters. [init] Using Kubernetes version: v1.7.4 [init] Using Authorization modes: [Node RBAC] [preflight] Skipping pre-flight checks [kubeadm] WARNING: starting in 1.8, tokens expire after 24 hours by default (if you require a non-expiring token use --token-ttl 0) [certificates] Generated ca certificate and key. [certificates] Generated apiserver certificate and key. [certificates] apiserver serving cert is signed for DNS names [thegopher kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.1.115] [certificates] Generated apiserver-kubelet-client certificate and key. [certificates] Generated sa key and public key. [certificates] Generated front-proxy-ca certificate and key. [certificates] Generated front-proxy-client certificate and key. [certificates] Valid certificates and keys now exist in "/etc/kubernetes/pki" [kubeconfig] Wrote KubeConfig file to disk: "admin.conf" [kubeconfig] Wrote KubeConfig file to disk: "kubelet.conf" [kubeconfig] Wrote KubeConfig file to disk: "controller-manager.conf" [kubeconfig] Wrote KubeConfig file to disk: "scheduler.conf" [controlplane] Wrote Static Pod manifest for component kube-apiserver to "/etc/kubernetes/manifests/kube-apiserver.yaml" [controlplane] Wrote Static Pod manifest for component kube-controller-manager to "/etc/kubernetes/manifests/kube-controller-manager.yaml" [controlplane] Wrote Static Pod manifest for component kube-scheduler to "/etc/kubernetes/manifests/kube-scheduler.yaml" [etcd] Wrote Static Pod manifest for a local etcd instance to "/etc/kubernetes/manifests/etcd.yaml" [init] Waiting for the kubelet to boot up the control plane as Static Pods from directory "/etc/kubernetes/manifests" [init] This often takes around a minute; or longer if the control plane images have to be pulled. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz' failed with error: Get http://localhost:10255/healthz: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz' failed with error: Get http://localhost:10255/healthz: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz' failed with error: Get http://localhost:10255/healthz: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz/syncloop' failed with error: Get http://localhost:10255/healthz/syncloop: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz/syncloop' failed with error: Get http://localhost:10255/healthz/syncloop: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz/syncloop' failed with error: Get http://localhost:10255/healthz/syncloop: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz' failed with error: Get http://localhost:10255/healthz: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz/syncloop' failed with error: Get http://localhost:10255/healthz/syncloop: dial tcp 127.0.0.1:10255: getsockopt: connection refused. [kubelet-check] It seems like the kubelet isn't running or healthy. [kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10255/healthz' failed with error: Get http://localhost:10255/healthz: dial tcp 127.0.0.1:10255: getsockopt: connection refused. Unfortunately, an error has occurred: timed out waiting for the condition This error is likely caused by that: - The kubelet is not running - The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled) - There is no internet connection; so the kubelet can't pull the following control plane images: - gcr.io/google_containers/kube-apiserver-amd64:v1.7.4 - gcr.io/google_containers/kube-controller-manager-amd64:v1.7.4 - gcr.io/google_containers/kube-scheduler-amd64:v1.7.4 You can troubleshoot this for example with the following commands if you're on a systemd-powered system: - 'systemctl status kubelet' - 'journalctl -xeu kubelet' couldn't initialize a Kubernetes cluster ``` In this demo, I'm first starting kubeadm normally and everything works as usual. In the second case, I'm explicitely stopping the kubelet so it doesn't run, and skipping preflight checks, so that kubeadm doesn't even try to exec `systemctl start kubelet` like it does usually. That obviously results in a non-working system, but now kubeadm tells the user what's the problem instead of waiting forever. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes # Fixes: https://github.com/kubernetes/kubeadm/issues/377 **Special notes for your reviewer**: **Release note**: ```release-note kubeadm: Detect kubelet readiness and error out if the kubelet is unhealthy ``` @kubernetes/sig-cluster-lifecycle-pr-reviews @pipejakob cc @justinsb @kris-nova @lukemarsden as well as you wanted this feature :)
2025-09-13 21:25:09 +00:00 · 2017-09-03 15:54:19 -07:00
parent 0f2a72f9f5 92c5997b8e
commit e528a6e785
7 changed files with 111 additions and 22 deletions
--- a/cmd/kubeadm/app/cmd/BUILD
+++ b/cmd/kubeadm/app/cmd/BUILD
@@ -28,6 +28,7 @@ go_library(
        "//cmd/kubeadm/app/constants:go_default_library",
        "//cmd/kubeadm/app/discovery:go_default_library",
        "//cmd/kubeadm/app/features:go_default_library",
+        "//cmd/kubeadm/app/images:go_default_library",
        "//cmd/kubeadm/app/phases/addons/dns:go_default_library",
        "//cmd/kubeadm/app/phases/addons/proxy:go_default_library",
        "//cmd/kubeadm/app/phases/apiconfig:go_default_library",
--- a/cmd/kubeadm/app/cmd/init.go
+++ b/cmd/kubeadm/app/cmd/init.go
@@ -37,6 +37,7 @@ import (
 	"k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/validation"
 	kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"
 	"k8s.io/kubernetes/cmd/kubeadm/app/features"
+	"k8s.io/kubernetes/cmd/kubeadm/app/images"
 	dnsaddonphase "k8s.io/kubernetes/cmd/kubeadm/app/phases/addons/dns"
 	proxyaddonphase "k8s.io/kubernetes/cmd/kubeadm/app/phases/addons/proxy"
 	apiconfigphase "k8s.io/kubernetes/cmd/kubeadm/app/phases/apiconfig"
@@ -81,6 +82,23 @@ var (
 		  kubeadm join --token {{.Token}} {{.MasterHostPort}} --discovery-token-ca-cert-hash {{.CAPubKeyPin}}

 		`)))
+
+	kubeletFailTempl = template.Must(template.New("init").Parse(dedent.Dedent(`
+		Unfortunately, an error has occurred:
+			{{ .Error }}
+
+		This error is likely caused by that:
+			- The kubelet is not running
+			- The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)
+			- There is no internet connection; so the kubelet can't pull the following control plane images:
+				- {{ .APIServerImage }}
+				- {{ .ControllerManagerImage }}
+				- {{ .SchedulerImage }}
+
+		You can troubleshoot this for example with the following commands if you're on a systemd-powered system:
+			- 'systemctl status kubelet'
+			- 'journalctl -xeu kubelet'
+		`)))
 )

 // NewCmdInit returns "kubeadm init" command.
@@ -325,12 +343,17 @@ func (i *Init) Run(out io.Writer) error {
 	// waiter holds the apiclient.Waiter implementation of choice, responsible for querying the API server in various ways and waiting for conditions to be fulfilled
 	waiter := getWaiter(i.dryRun, client)

-	fmt.Printf("[init] Waiting for the kubelet to boot up the control plane as Static Pods from directory %q\n", kubeadmconstants.GetStaticPodDirectory())
-	fmt.Println("[init] This process often takes about a minute to perform or longer if the control plane images have to be pulled...")
-	// TODO: Adjust this timeout or start polling the kubelet API
-	// TODO: Make this timeout more realistic when we do create some more complex logic about the interaction with the kubelet
-	if err := waiter.WaitForAPI(); err != nil {
-		return err
+	if err := waitForAPIAndKubelet(waiter); err != nil {
+		ctx := map[string]string{
+			"Error":                  fmt.Sprintf("%v", err),
+			"APIServerImage":         images.GetCoreImage(kubeadmconstants.KubeAPIServer, i.cfg.GetControlPlaneImageRepository(), i.cfg.KubernetesVersion, i.cfg.UnifiedControlPlaneImage),
+			"ControllerManagerImage": images.GetCoreImage(kubeadmconstants.KubeControllerManager, i.cfg.GetControlPlaneImageRepository(), i.cfg.KubernetesVersion, i.cfg.UnifiedControlPlaneImage),
+			"SchedulerImage":         images.GetCoreImage(kubeadmconstants.KubeScheduler, i.cfg.GetControlPlaneImageRepository(), i.cfg.KubernetesVersion, i.cfg.UnifiedControlPlaneImage),
+		}
+
+		kubeletFailTempl.Execute(out, ctx)
+
+		return fmt.Errorf("couldn't initialize a Kubernetes cluster")
 	}

 	// Upload currently used configuration to the cluster
@@ -472,11 +495,43 @@ func printFilesIfDryRunning(dryRun bool, manifestDir string) error {
 	return dryrunutil.PrintDryRunFiles(files, os.Stdout)
 }

-// getWaiter gets the right waiter implementation
+// getWaiter gets the right waiter implementation for the right occasion
 func getWaiter(dryRun bool, client clientset.Interface) apiclient.Waiter {
 	if dryRun {
 		return dryrunutil.NewWaiter()
 	}
-	// TODO: Adjust this timeout slightly?
 	return apiclient.NewKubeWaiter(client, 30*time.Minute, os.Stdout)
 }
+
+// waitForAPIAndKubelet waits primarily for the API server to come up. If that takes a long time, and the kubelet
+// /healthz and /healthz/syncloop endpoints continuously are unhealthy, kubeadm will error out after a period of
+// backoffing exponentially
+func waitForAPIAndKubelet(waiter apiclient.Waiter) error {
+	errorChan := make(chan error)
+
+	fmt.Printf("[init] Waiting for the kubelet to boot up the control plane as Static Pods from directory %q\n", kubeadmconstants.GetStaticPodDirectory())
+	fmt.Println("[init] This often takes around a minute; or longer if the control plane images have to be pulled.")
+
+	go func(errC chan error, waiter apiclient.Waiter) {
+		// This goroutine can only make kubeadm init fail. If this check succeeds, it won't do anything special
+		if err := waiter.WaitForHealthyKubelet(40*time.Second, "http://localhost:10255/healthz"); err != nil {
+			errC <- err
+		}
+	}(errorChan, waiter)
+
+	go func(errC chan error, waiter apiclient.Waiter) {
+		// This goroutine can only make kubeadm init fail. If this check succeeds, it won't do anything special
+		if err := waiter.WaitForHealthyKubelet(60*time.Second, "http://localhost:10255/healthz/syncloop"); err != nil {
+			errC <- err
+		}
+	}(errorChan, waiter)
+
+	go func(errC chan error, waiter apiclient.Waiter) {
+		// This main goroutine sends whatever WaitForAPI returns (error or not) to the channel
+		// This in order to continue on success (nil error), or just fail if
+		errC <- waiter.WaitForAPI()
+	}(errorChan, waiter)
+
+	// This call is blocking until one of the goroutines sends to errorChan
+	return <-errorChan
+}
--- a/cmd/kubeadm/app/phases/selfhosting/selfhosting.go
+++ b/cmd/kubeadm/app/phases/selfhosting/selfhosting.go
@@ -39,7 +39,7 @@ const (
 	selfHostingWaitTimeout = 2 * time.Minute

 	// selfHostingFailureThreshold describes how many times kubeadm will retry creating the DaemonSets
-	selfHostingFailureThreshold uint8 = 5
+	selfHostingFailureThreshold int = 5
 )

 // CreateSelfHostedControlPlane is responsible for turning a Static Pod-hosted control plane to a self-hosted one
--- a/cmd/kubeadm/app/phases/upgrade/selfhosted.go
+++ b/cmd/kubeadm/app/phases/upgrade/selfhosted.go
@@ -43,7 +43,7 @@ const (
 	selfHostingWaitTimeout = 2 * time.Minute

 	// selfHostingFailureThreshold describes how many times kubeadm will retry creating the DaemonSets
-	selfHostingFailureThreshold uint8 = 10
+	selfHostingFailureThreshold int = 10
 )

 // controlPlaneComponentResources holds the relevant Pod and DaemonSet associated with a control plane component
--- a/cmd/kubeadm/app/phases/upgrade/staticpods_test.go
+++ b/cmd/kubeadm/app/phases/upgrade/staticpods_test.go
@@ -113,6 +113,11 @@ func (w *fakeWaiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) erro
 	return w.errsToReturn[waitForHashChange]
 }

+// WaitForHealthyKubelet returns a dummy nil just to implement the interface
+func (w *fakeWaiter) WaitForHealthyKubelet(_ time.Duration, _ string) error {
+	return nil
+}
+
 type fakeStaticPodPathManager struct {
 	realManifestDir   string
 	tempManifestDir   string
--- a/cmd/kubeadm/app/util/apiclient/wait.go
+++ b/cmd/kubeadm/app/util/apiclient/wait.go
@@ -40,10 +40,13 @@ type Waiter interface {
 	WaitForPodsWithLabel(kvLabel string) error
 	// WaitForPodToDisappear waits for the given Pod in the kube-system namespace to be deleted
 	WaitForPodToDisappear(staticPodName string) error
-	// WaitForStaticPodControlPlaneHashes
+	// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
 	WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
-	// WaitForStaticPodControlPlaneHashChange
+	// WaitForStaticPodControlPlaneHashChange waits for the given static pod component's static pod hash to get updated.
+	// By doing that we can be sure that the kubelet has restarted the given Static Pod
 	WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error
+	// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
+	WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error
 	// SetTimeout adjusts the timeout to the specified duration
 	SetTimeout(timeout time.Duration)
 }
@@ -123,6 +126,26 @@ func (w *KubeWaiter) WaitForPodToDisappear(podName string) error {
 	})
 }

+// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
+func (w *KubeWaiter) WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error {
+	time.Sleep(initalTimeout)
+	return TryRunCommand(func() error {
+		resp, err := http.Get(healthzEndpoint)
+		if err != nil {
+			fmt.Printf("[kubelet-check] It seems like the kubelet isn't running or healthy.\n")
+			fmt.Printf("[kubelet-check] The HTTP call equal to 'curl -sSL %s' failed with error: %v.\n", healthzEndpoint, err)
+			return err
+		}
+		defer resp.Body.Close()
+		if resp.StatusCode != http.StatusOK {
+			fmt.Printf("[kubelet-check] It seems like the kubelet isn't running or healthy.")
+			fmt.Printf("[kubelet-check] The HTTP call equal to 'curl -sSL %s' returned HTTP code %d\n", healthzEndpoint, resp.StatusCode)
+			return fmt.Errorf("the kubelet healthz endpoint is unhealthy")
+		}
+		return nil
+	}, 5) // a failureThreshold of five means waiting for a total of 155 seconds
+}
+
 // SetTimeout adjusts the timeout to the specified duration
 func (w *KubeWaiter) SetTimeout(timeout time.Duration) {
 	w.timeout = timeout
@@ -184,20 +207,19 @@ func getStaticPodControlPlaneHashes(client clientset.Interface, nodeName string)
 }

 // TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned
-func TryRunCommand(f func() error, failureThreshold uint8) error {
-	var numFailures uint8
-	return wait.PollImmediate(5*time.Second, 20*time.Minute, func() (bool, error) {
+func TryRunCommand(f func() error, failureThreshold int) error {
+	backoff := wait.Backoff{
+		Duration: 5 * time.Second,
+		Factor:   2, // double the timeout for every failure
+		Steps:    failureThreshold,
+	}
+	return wait.ExponentialBackoff(backoff, func() (bool, error) {
 		err := f()
 		if err != nil {
-			numFailures++
-			// If we've reached the maximum amount of failures, error out
-			if numFailures == failureThreshold {
-				return false, err
-			}
-			// Retry
+			// Retry until the timeout
 			return false, nil
 		}
-		// The last f() call was a success!
+		// The last f() call was a success, return cleanly
 		return true, nil
 	})
 }
--- a/cmd/kubeadm/app/util/dryrun/dryrun.go
+++ b/cmd/kubeadm/app/util/dryrun/dryrun.go
@@ -97,6 +97,12 @@ func (w *Waiter) WaitForPodToDisappear(podName string) error {
 	return nil
 }

+// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
+func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string) error {
+	fmt.Printf("[dryrun] Would make sure the kubelet %q endpoint is healthy\n", healthzEndpoint)
+	return nil
+}
+
 // SetTimeout is a no-op; we don't wait in this implementation
 func (w *Waiter) SetTimeout(_ time.Duration) {}