mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-18 08:09:58 +00:00
Merge pull request #121958 from neolit123/1.30-remove-kubelet-and-func
kubeadm: drop concurrency when waiting for kubelet /healthz
This commit is contained in:
commit
a56093db9f
@ -26,9 +26,9 @@ import (
|
|||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/klog/v2"
|
|
||||||
|
|
||||||
"k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow"
|
"k8s.io/kubernetes/cmd/kubeadm/app/cmd/phases/workflow"
|
||||||
|
kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||||
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
|
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
|
||||||
dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun"
|
dryrunutil "k8s.io/kubernetes/cmd/kubeadm/app/util/dryrun"
|
||||||
)
|
)
|
||||||
@ -79,24 +79,23 @@ func runWaitControlPlanePhase(c workflow.RunData) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// waiter holds the apiclient.Waiter implementation of choice, responsible for querying the API server in various ways and waiting for conditions to be fulfilled
|
// Both Wait* calls below use a /healthz endpoint, thus a client without permissions works fine
|
||||||
klog.V(1).Infoln("[wait-control-plane] Waiting for the API server to be healthy")
|
|
||||||
|
|
||||||
// WaitForAPI uses the /healthz endpoint, thus a client without permissions works fine
|
|
||||||
client, err := data.ClientWithoutBootstrap()
|
client, err := data.ClientWithoutBootstrap()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrap(err, "cannot obtain client without bootstrap")
|
return errors.Wrap(err, "cannot obtain client without bootstrap")
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout := data.Cfg().ClusterConfiguration.APIServer.TimeoutForControlPlane.Duration
|
waiter, err := newControlPlaneWaiter(data.DryRun(), 0, client, data.OutputWriter())
|
||||||
waiter, err := newControlPlaneWaiter(data.DryRun(), timeout, client, data.OutputWriter())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrap(err, "error creating waiter")
|
return errors.Wrap(err, "error creating waiter")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory %q. This can take up to %v\n", data.ManifestDir(), timeout)
|
controlPlaneTimeout := data.Cfg().ClusterConfiguration.APIServer.TimeoutForControlPlane.Duration
|
||||||
|
fmt.Printf("[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods"+
|
||||||
|
" from directory %q\n",
|
||||||
|
data.ManifestDir())
|
||||||
|
|
||||||
if err := waiter.WaitForKubeletAndFunc(waiter.WaitForAPI); err != nil {
|
handleError := func(err error) error {
|
||||||
context := struct {
|
context := struct {
|
||||||
Error string
|
Error string
|
||||||
Socket string
|
Socket string
|
||||||
@ -109,6 +108,16 @@ func runWaitControlPlanePhase(c workflow.RunData) error {
|
|||||||
return errors.New("couldn't initialize a Kubernetes cluster")
|
return errors.New("couldn't initialize a Kubernetes cluster")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
waiter.SetTimeout(kubeadmconstants.DefaultKubeletTimeout)
|
||||||
|
if err := waiter.WaitForKubelet(); err != nil {
|
||||||
|
return handleError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
waiter.SetTimeout(controlPlaneTimeout)
|
||||||
|
if err := waiter.WaitForAPI(); err != nil {
|
||||||
|
return handleError(err)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,8 +205,14 @@ func runKubeletStartJoinPhase(c workflow.RunData) (returnErr error) {
|
|||||||
// Now the kubelet will perform the TLS Bootstrap, transforming /etc/kubernetes/bootstrap-kubelet.conf to /etc/kubernetes/kubelet.conf
|
// Now the kubelet will perform the TLS Bootstrap, transforming /etc/kubernetes/bootstrap-kubelet.conf to /etc/kubernetes/kubelet.conf
|
||||||
// Wait for the kubelet to create the /etc/kubernetes/kubelet.conf kubeconfig file. If this process
|
// Wait for the kubelet to create the /etc/kubernetes/kubelet.conf kubeconfig file. If this process
|
||||||
// times out, display a somewhat user-friendly message.
|
// times out, display a somewhat user-friendly message.
|
||||||
waiter := apiclient.NewKubeWaiter(nil, kubeadmconstants.TLSBootstrapTimeout, os.Stdout)
|
waiter := apiclient.NewKubeWaiter(nil, 0, os.Stdout)
|
||||||
if err := waiter.WaitForKubeletAndFunc(waitForTLSBootstrappedClient); err != nil {
|
waiter.SetTimeout(kubeadmconstants.DefaultKubeletTimeout)
|
||||||
|
if err := waiter.WaitForKubelet(); err != nil {
|
||||||
|
fmt.Printf(kubeadmJoinFailMsg, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := waitForTLSBootstrappedClient(); err != nil {
|
||||||
fmt.Printf(kubeadmJoinFailMsg, err)
|
fmt.Printf(kubeadmJoinFailMsg, err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -227,7 +233,7 @@ func runKubeletStartJoinPhase(c workflow.RunData) (returnErr error) {
|
|||||||
|
|
||||||
// waitForTLSBootstrappedClient waits for the /etc/kubernetes/kubelet.conf file to be available
|
// waitForTLSBootstrappedClient waits for the /etc/kubernetes/kubelet.conf file to be available
|
||||||
func waitForTLSBootstrappedClient() error {
|
func waitForTLSBootstrappedClient() error {
|
||||||
fmt.Println("[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...")
|
fmt.Println("[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap")
|
||||||
|
|
||||||
// Loop on every falsy return. Return with an error if raised. Exit successfully if true is returned.
|
// Loop on every falsy return. Return with an error if raised. Exit successfully if true is returned.
|
||||||
return wait.PollImmediate(kubeadmconstants.TLSBootstrapRetryInterval, kubeadmconstants.TLSBootstrapTimeout, func() (bool, error) {
|
return wait.PollImmediate(kubeadmconstants.TLSBootstrapRetryInterval, kubeadmconstants.TLSBootstrapTimeout, func() (bool, error) {
|
||||||
|
@ -230,6 +230,8 @@ const (
|
|||||||
|
|
||||||
// DefaultControlPlaneTimeout specifies the default control plane (actually API Server) timeout for use by kubeadm
|
// DefaultControlPlaneTimeout specifies the default control plane (actually API Server) timeout for use by kubeadm
|
||||||
DefaultControlPlaneTimeout = 4 * time.Minute
|
DefaultControlPlaneTimeout = 4 * time.Minute
|
||||||
|
// DefaultKubeletTimeout specifies the default kubelet timeout
|
||||||
|
DefaultKubeletTimeout = 4 * time.Minute
|
||||||
|
|
||||||
// MinimumAddressesInServiceSubnet defines minimum amount of nodes the Service subnet should allow.
|
// MinimumAddressesInServiceSubnet defines minimum amount of nodes the Service subnet should allow.
|
||||||
// We need at least ten, because the DNS service is always at the tenth cluster clusterIP
|
// We need at least ten, because the DNS service is always at the tenth cluster clusterIP
|
||||||
|
@ -22,10 +22,12 @@ import (
|
|||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
bootstraputil "k8s.io/cluster-bootstrap/token/util"
|
bootstraputil "k8s.io/cluster-bootstrap/token/util"
|
||||||
|
|
||||||
bootstraptokenv1 "k8s.io/kubernetes/cmd/kubeadm/app/apis/bootstraptoken/v1"
|
bootstraptokenv1 "k8s.io/kubernetes/cmd/kubeadm/app/apis/bootstraptoken/v1"
|
||||||
|
kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||||
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
|
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -46,15 +48,21 @@ func UpdateOrCreateTokens(client clientset.Interface, failIfExists bool, tokens
|
|||||||
}
|
}
|
||||||
|
|
||||||
updatedOrNewSecret := bootstraptokenv1.BootstrapTokenToSecret(&token)
|
updatedOrNewSecret := bootstraptokenv1.BootstrapTokenToSecret(&token)
|
||||||
// Try to create or update the token with an exponential backoff
|
|
||||||
err = apiclient.TryRunCommand(func() error {
|
var lastError error
|
||||||
|
err = wait.PollUntilContextTimeout(
|
||||||
|
context.Background(),
|
||||||
|
kubeadmconstants.APICallRetryInterval,
|
||||||
|
kubeadmconstants.APICallWithWriteTimeout,
|
||||||
|
true, func(_ context.Context) (bool, error) {
|
||||||
if err := apiclient.CreateOrUpdateSecret(client, updatedOrNewSecret); err != nil {
|
if err := apiclient.CreateOrUpdateSecret(client, updatedOrNewSecret); err != nil {
|
||||||
return errors.Wrapf(err, "failed to create or update bootstrap token with name %s", secretName)
|
lastError = errors.Wrapf(err, "failed to create or update bootstrap token with name %s", secretName)
|
||||||
|
return false, nil
|
||||||
}
|
}
|
||||||
return nil
|
return true, nil
|
||||||
}, 5)
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return lastError
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -131,13 +131,8 @@ func (w *fakeWaiter) WaitForStaticPodHashChange(_, _, _ string) error {
|
|||||||
return w.errsToReturn[waitForHashChange]
|
return w.errsToReturn[waitForHashChange]
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForHealthyKubelet returns a dummy nil just to implement the interface
|
// WaitForHKubelet returns a dummy nil just to implement the interface
|
||||||
func (w *fakeWaiter) WaitForHealthyKubelet(_ time.Duration, _ string) error {
|
func (w *fakeWaiter) WaitForKubelet() error {
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// WaitForKubeletAndFunc is a wrapper for WaitForHealthyKubelet that also blocks for a function
|
|
||||||
func (w *fakeWaiter) WaitForKubeletAndFunc(f func() error) error {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,10 +50,8 @@ type Waiter interface {
|
|||||||
WaitForStaticPodHashChange(nodeName, component, previousHash string) error
|
WaitForStaticPodHashChange(nodeName, component, previousHash string) error
|
||||||
// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
|
// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
|
||||||
WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
|
WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
|
||||||
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
// WaitForKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
||||||
WaitForHealthyKubelet(initialTimeout time.Duration, healthzEndpoint string) error
|
WaitForKubelet() error
|
||||||
// WaitForKubeletAndFunc is a wrapper for WaitForHealthyKubelet that also blocks for a function
|
|
||||||
WaitForKubeletAndFunc(f func() error) error
|
|
||||||
// SetTimeout adjusts the timeout to the specified duration
|
// SetTimeout adjusts the timeout to the specified duration
|
||||||
SetTimeout(timeout time.Duration)
|
SetTimeout(timeout time.Duration)
|
||||||
}
|
}
|
||||||
@ -76,17 +74,28 @@ func NewKubeWaiter(client clientset.Interface, timeout time.Duration, writer io.
|
|||||||
|
|
||||||
// WaitForAPI waits for the API Server's /healthz endpoint to report "ok"
|
// WaitForAPI waits for the API Server's /healthz endpoint to report "ok"
|
||||||
func (w *KubeWaiter) WaitForAPI() error {
|
func (w *KubeWaiter) WaitForAPI() error {
|
||||||
|
fmt.Printf("[api-check] Waiting for a healthy API server. This can take up to %v\n", w.timeout)
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
return wait.PollImmediate(kubeadmconstants.APICallRetryInterval, w.timeout, func() (bool, error) {
|
err := wait.PollUntilContextTimeout(
|
||||||
|
context.Background(),
|
||||||
|
kubeadmconstants.APICallRetryInterval,
|
||||||
|
w.timeout,
|
||||||
|
true, func(ctx context.Context) (bool, error) {
|
||||||
healthStatus := 0
|
healthStatus := 0
|
||||||
w.client.Discovery().RESTClient().Get().AbsPath("/healthz").Do(context.TODO()).StatusCode(&healthStatus)
|
w.client.Discovery().RESTClient().Get().AbsPath("/healthz").Do(ctx).StatusCode(&healthStatus)
|
||||||
if healthStatus != http.StatusOK {
|
if healthStatus != http.StatusOK {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("[apiclient] All control plane components are healthy after %f seconds\n", time.Since(start).Seconds())
|
|
||||||
return true, nil
|
return true, nil
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("[api-check] The API server is not healthy after %v\n", time.Since(start))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("[api-check] The API server is healthy after %v\n", time.Since(start))
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForPodsWithLabel will lookup pods with the given label and wait until they are all
|
// WaitForPodsWithLabel will lookup pods with the given label and wait until they are all
|
||||||
@ -133,47 +142,54 @@ func (w *KubeWaiter) WaitForPodToDisappear(podName string) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
// WaitForKubelet blocks until the kubelet /healthz endpoint returns 'ok'.
|
||||||
func (w *KubeWaiter) WaitForHealthyKubelet(initialTimeout time.Duration, healthzEndpoint string) error {
|
func (w *KubeWaiter) WaitForKubelet() error {
|
||||||
time.Sleep(initialTimeout)
|
var (
|
||||||
fmt.Printf("[kubelet-check] Initial timeout of %v passed.\n", initialTimeout)
|
lastError error
|
||||||
return TryRunCommand(func() error {
|
start = time.Now()
|
||||||
|
healthzEndpoint = fmt.Sprintf("http://localhost:%d/healthz", kubeadmconstants.KubeletHealthzPort)
|
||||||
|
)
|
||||||
|
|
||||||
|
fmt.Printf("[kubelet-check] Waiting for a healthy kubelet. This can take up to %v\n", w.timeout)
|
||||||
|
|
||||||
|
formatError := func(cause string) error {
|
||||||
|
return errors.Errorf("The HTTP call equal to 'curl -sSL %s' returned %s\n",
|
||||||
|
healthzEndpoint, cause)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := wait.PollUntilContextTimeout(
|
||||||
|
context.Background(),
|
||||||
|
kubeadmconstants.APICallRetryInterval,
|
||||||
|
w.timeout,
|
||||||
|
true, func(ctx context.Context) (bool, error) {
|
||||||
client := &http.Client{Transport: netutil.SetOldTransportDefaults(&http.Transport{})}
|
client := &http.Client{Transport: netutil.SetOldTransportDefaults(&http.Transport{})}
|
||||||
resp, err := client.Get(healthzEndpoint)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthzEndpoint, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("[kubelet-check] It seems like the kubelet isn't running or healthy.")
|
lastError = formatError(fmt.Sprintf("error: %v", err))
|
||||||
fmt.Printf("[kubelet-check] The HTTP call equal to 'curl -sSL %s' failed with error: %v.\n", healthzEndpoint, err)
|
return false, err
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
lastError = formatError(fmt.Sprintf("error: %v", err))
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
}()
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
fmt.Println("[kubelet-check] It seems like the kubelet isn't running or healthy.")
|
lastError = formatError(fmt.Sprintf("status code: %d", resp.StatusCode))
|
||||||
fmt.Printf("[kubelet-check] The HTTP call equal to 'curl -sSL %s' returned HTTP code %d\n", healthzEndpoint, resp.StatusCode)
|
return false, nil
|
||||||
return errors.New("the kubelet healthz endpoint is unhealthy")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("[kubelet-check] The kubelet is not healthy after %v\n", time.Since(start))
|
||||||
|
return lastError
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("[kubelet-check] The kubelet is healthy after %v\n", time.Since(start))
|
||||||
return nil
|
return nil
|
||||||
}, 5) // a failureThreshold of five means waiting for a total of 155 seconds
|
|
||||||
}
|
|
||||||
|
|
||||||
// WaitForKubeletAndFunc waits primarily for the function f to execute, even though it might take some time. If that takes a long time, and the kubelet
|
|
||||||
// /healthz continuously are unhealthy, kubeadm will error out after a period of exponential backoff
|
|
||||||
func (w *KubeWaiter) WaitForKubeletAndFunc(f func() error) error {
|
|
||||||
errorChan := make(chan error, 1)
|
|
||||||
|
|
||||||
go func(errC chan error, waiter Waiter) {
|
|
||||||
if err := waiter.WaitForHealthyKubelet(40*time.Second, fmt.Sprintf("http://localhost:%d/healthz", kubeadmconstants.KubeletHealthzPort)); err != nil {
|
|
||||||
errC <- err
|
|
||||||
}
|
|
||||||
}(errorChan, w)
|
|
||||||
|
|
||||||
go func(errC chan error) {
|
|
||||||
// This main goroutine sends whatever the f function returns (error or not) to the channel
|
|
||||||
// This in order to continue on success (nil error), or just fail if the function returns an error
|
|
||||||
errC <- f()
|
|
||||||
}(errorChan)
|
|
||||||
|
|
||||||
// This call is blocking until one of the goroutines sends to errorChan
|
|
||||||
return <-errorChan
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTimeout adjusts the timeout to the specified duration
|
// SetTimeout adjusts the timeout to the specified duration
|
||||||
@ -264,21 +280,3 @@ func getStaticPodSingleHash(client clientset.Interface, nodeName string, compone
|
|||||||
staticPodHash := staticPod.Annotations["kubernetes.io/config.hash"]
|
staticPodHash := staticPod.Annotations["kubernetes.io/config.hash"]
|
||||||
return staticPodHash, nil
|
return staticPodHash, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned
|
|
||||||
func TryRunCommand(f func() error, failureThreshold int) error {
|
|
||||||
backoff := wait.Backoff{
|
|
||||||
Duration: 5 * time.Second,
|
|
||||||
Factor: 2, // double the timeout for every failure
|
|
||||||
Steps: failureThreshold,
|
|
||||||
}
|
|
||||||
return wait.ExponentialBackoff(backoff, func() (bool, error) {
|
|
||||||
err := f()
|
|
||||||
if err != nil {
|
|
||||||
// Retry until the timeout
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
// The last f() call was a success, return cleanly
|
|
||||||
return true, nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
@ -106,14 +106,9 @@ func (w *Waiter) WaitForPodToDisappear(podName string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
// WaitForKubelet blocks until the kubelet /healthz endpoint returns 'ok'
|
||||||
func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string) error {
|
func (w *Waiter) WaitForKubelet() error {
|
||||||
fmt.Printf("[dryrun] Would make sure the kubelet %q endpoint is healthy\n", healthzEndpoint)
|
fmt.Println("[dryrun] Would make sure the kubelet's /healthz endpoint is healthy")
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// WaitForKubeletAndFunc is a wrapper for WaitForHealthyKubelet that also blocks for a function
|
|
||||||
func (w *Waiter) WaitForKubeletAndFunc(f func() error) error {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user